This commit is contained in:
Jun-te Kim 2026-03-24 13:01:56 +00:00
parent fc425b8b66
commit e01b7225bb
4 changed files with 26 additions and 26 deletions

View file

@ -74,24 +74,23 @@ def app():
"""
data_folder = "/workspaces/model/asset_list"
# data_filename = "For Modelling - Final - reviewed.xlsx"
data_filename = "assests.xlsx"
sheet_name = "Sheet1"
postcode_column = "POSTCODE"
address1_column = "ADDRESS"
data_filename = "Calico ARA Upload Review.xlsx"
sheet_name = "Upload to Ara - Needs Sign Off"
postcode_column = "Postcode"
address1_column = "Address 1"
address1_method = None
fulladdress_column = "ADDRESS"
fulladdress_column = "Address 1"
address_cols_to_concat = []
missing_postcodes_method = None
landlord_year_built = None
landlord_os_uprn = None
landlord_property_type = None
landlord_built_form = None
landlord_os_uprn = "ara_found_uprn"
landlord_property_type = "Property Type"
landlord_built_form = "Property Type"
landlord_wall_construction = None
landlord_roof_construction = None
landlord_heating_system = None
landlord_existing_pv = None
landlord_property_id = "UPRN"
landlord_property_id = "Asset Reference"
landlord_sap = None
outcomes_filename = None
outcomes_sheetname = None

View file

@ -9,7 +9,7 @@ I believe lower and upper case matter:
* Address 1
* Address 2
* Address 3
* Postcode
* postcode
And save it as a .csv file
@ -24,18 +24,19 @@ For this example I'll be using "s3://retrofit-data-dev/ara_raw_inputs/calico/Cal
Go to Ara DB and make a new task_id with a randomly generated uuid as the primarily key
task_id = 169ea9b0-01b5-48dc-9f90-ae1989491d09
sub_task_id = e5704f9e-29fe-43c8-8913-05be09f2440f
s3 => s3://retrofit-data-dev/ara_raw_inputs/calico/Calico UPRN Matching Rerun After Address Fix.csv
task_id = ea615ac3-ac28-46c4-8bff-2431c5b9c13d
sub_task_id = 85a23b67-8f18-4299-9bf0-69bfb87adbc7
s3 => s3://retrofit-data-dev/ara_raw_inputs/eon/North Tyneside Council.csv
Step 3) Alright, now lets make the input for postcode-splitter sqs to get the ball rolling
postcode-splitter-sqs => https://eu-west-2.console.aws.amazon.com/sqs/v3/home?region=eu-west-2#/queues/https%3A%2F%2Fsqs.eu-west-2.amazonaws.com%2F337213553626%2Fpostcode-splitter-queue-dev
{
"task_id": "169ea9b0-01b5-48dc-9f90-ae1989491d09",
"sub_task_id": "e5704f9e-29fe-43c8-8913-05be09f2440f",
"s3_uri": "s3://retrofit-data-dev/ara_raw_inputs/calico/Calico UPRN Matching (1)(Sheet1).csv"
"task_id": "ea615ac3-ac28-46c4-8bff-2431c5b9c13d",
"sub_task_id": "85a23b67-8f18-4299-9bf0-69bfb87adbc7",
"s3_uri": "s3://retrofit-data-dev/ara_raw_inputs/eon/eon(Sheet1).csv"
}
Each batch of csv should be saved in retrofit-data-dev/ara_postcode_splitter_batches/<task-id>/<sub-task-id>/<timestamp:uuid4>.csv
outputs of address2uprn ( which is automatically triggered on postcodesplitter) will be saved on retrofit-data-dev/ara_raw_outputs/<task-id>/<subtask-id>/<timestamp:uuid4>.csv

View file

@ -55,11 +55,11 @@ def main(task_id, output):
print(f"Total rows: {len(combined)}")
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("task_id", help="Task ID folder in S3")
parser.add_argument("--output", default="combined.csv")
# if __name__ == "__main__":
# parser = argparse.ArgumentParser()
# parser.add_argument("task_id", help="Task ID folder in S3")
# parser.add_argument("--output", default="combined.csv")
args = parser.parse_args()
# args = parser.parse_args()
main(args.task_id, args.output)
# main(args.task_id, args.output)

View file

@ -28,10 +28,10 @@ from sqlalchemy import func
# PORTFOLIO_ID = 206
# SCENARIOS = [389]
PORTFOLIO_ID = 581
SCENARIOS = [1124]
PORTFOLIO_ID = 633
SCENARIOS = [1146]
scenario_names = {
1124: "EPC C - Solar Focused",
1146: "Most Economic",
}
project_name = "WCHG EPC D rated properties"