From e5583aac1f693fa58ed1d1f5501751d97b38bd01 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Wed, 20 May 2026 17:36:20 +0000 Subject: [PATCH 1/5] some excel files are formatted differently --- .../postcode_splitter/local_handler/invoke_local_lambda.py | 2 +- infrastructure/csv_s3_client.py | 7 ++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/applications/postcode_splitter/local_handler/invoke_local_lambda.py b/applications/postcode_splitter/local_handler/invoke_local_lambda.py index 21fa9b9e..17d7e345 100755 --- a/applications/postcode_splitter/local_handler/invoke_local_lambda.py +++ b/applications/postcode_splitter/local_handler/invoke_local_lambda.py @@ -14,7 +14,7 @@ payload = { { "task_id": "f4b3332f-c0cc-481f-96a5-d39860a647cf", "sub_task_id": "14c042de-40c4-473b-8cd8-72c983a94a8d", - "s3_uri": "s3://retrofit-data-dev/ara_raw_inputs/calico/Calico Homes Full list EPC Properties(Sheet2) (1) (1).csv", + "s3_uri": "s3://retrofit-data-dev/bulk_onboarding_inputs/hyde2.csv", } ) } diff --git a/infrastructure/csv_s3_client.py b/infrastructure/csv_s3_client.py index 055d1ce3..8af8de73 100644 --- a/infrastructure/csv_s3_client.py +++ b/infrastructure/csv_s3_client.py @@ -13,7 +13,12 @@ class CsvS3Client(S3Client): f"s3_uri bucket {bucket!r} does not match client bucket {self.bucket!r}" ) raw = self.get_object(key) - text = raw.decode("utf-8-sig") + try: + text = raw.decode("utf-8-sig") + except UnicodeDecodeError: + # Some uploads are Windows-1252 (e.g. £ as byte 0xA3), not UTF-8. + text = raw.decode("cp1252") + reader = csv.DictReader(StringIO(text)) return [dict(row) for row in reader] From 714478a99a7a221e26367bb2a762d1a31f938ac0 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Wed, 20 May 2026 17:51:45 +0000 Subject: [PATCH 2/5] clean up sanitise postcode --- backend/epc_client/epc_client_service.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/backend/epc_client/epc_client_service.py b/backend/epc_client/epc_client_service.py index 86caeea3..72dbf142 100644 --- a/backend/epc_client/epc_client_service.py +++ b/backend/epc_client/epc_client_service.py @@ -47,8 +47,14 @@ class EpcClientService: latest = max(results, key=lambda r: r.registration_date) return self.get_by_certificate_number(latest.certificate_number) + @staticmethod + def _normalise_postcode(postcode: str) -> str: + """Return the postcode with all spaces removed and uppercased.""" + return postcode.replace(" ", "").upper() + def search_by_postcode(self, postcode: str) -> list[EpcSearchResult]: - return call_with_retry(lambda: self._search(postcode=postcode)) + normalised = self._normalise_postcode(postcode) + return call_with_retry(lambda: self._search(postcode=normalised)) # ------------------------------------------------------------------ # Private helperEpcRateLimpolarss From c5ab795f851402145bc7ed65e3b17a10cd8cd494 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Thu, 21 May 2026 09:46:47 +0000 Subject: [PATCH 3/5] redeploy old postcode splitter --- .github/workflows/deploy_terraform.yml | 4 +++- asset_list/app.py | 13 ++++++----- .../terraform/lambda/postcodeSplitter/main.tf | 22 +++++++++++++++++-- 3 files changed, 30 insertions(+), 9 deletions(-) diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index 8ba473ca..1af90291 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -169,7 +169,9 @@ jobs: uses: ./.github/workflows/_build_image.yml with: ecr_repo: postcode_splitter-${{ needs.determine_stage.outputs.stage }} - dockerfile_path: applications/postcode_splitter/Dockerfile + # dockerfile_path: applications/postcode_splitter/Dockerfile + # Switch back to the old postcode_splitter due to hyde priority - interface for the new one isn't working atm + dockerfile_path: backend/postcode_splitter/handler/Dockerfile build_context: . build_args: | DEV_DB_HOST=$DEV_DB_HOST diff --git a/asset_list/app.py b/asset_list/app.py index 9b10d7f3..424f4df6 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -79,23 +79,23 @@ def app(): """ data_folder = "/workspaces/model/asset_list" - data_filename = "lincs_address_list.xlsx" - sheet_name = "Sheet1" + data_filename = "hyde.xlsx" + sheet_name = "AddressProfilingResults" postcode_column = "Postcode" - address1_column = "Deal Name" + address1_column = "Address" address1_method = None - fulladdress_column = "Deal Name" + fulladdress_column = "Postcode" address_cols_to_concat = [] missing_postcodes_method = None landlord_year_built = None landlord_os_uprn = None - landlord_property_type = None # Good to include if landlord gave + landlord_property_type = "Property Type" # Good to include if landlord gave landlord_built_form = None # Good to include if landlord gave landlord_wall_construction = None landlord_roof_construction = None landlord_heating_system = None landlord_existing_pv = None - landlord_property_id = "landlord_id" + landlord_property_id = "Organisation Reference" landlord_sap = None outcomes_filename = None outcomes_sheetname = None @@ -468,3 +468,4 @@ def app(): asset_list.duplicated_addresses.to_excel( writer, sheet_name="Duplicate Properties", index=False ) + diff --git a/deployment/terraform/lambda/postcodeSplitter/main.tf b/deployment/terraform/lambda/postcodeSplitter/main.tf index 721cb2ea..e04ae00f 100644 --- a/deployment/terraform/lambda/postcodeSplitter/main.tf +++ b/deployment/terraform/lambda/postcodeSplitter/main.tf @@ -38,8 +38,26 @@ module "lambda" { { STAGE = var.stage LOG_LEVEL = "info" - POSTGRES_USERNAME = local.db_credentials.db_assessment_model_username - POSTGRES_PASSWORD = local.db_credentials.db_assessment_model_password + # POSTGRES_USERNAME = local.db_credentials.db_assessment_model_username + # POSTGRES_PASSWORD = local.db_credentials.db_assessment_model_password + # Switch back to the old postcode_splitter due to hyde priority - interface for the new one isn't working atm + DB_USERNAME = local.db_credentials.db_assessment_model_username + DB_PASSWORD = local.db_credentials.db_assessment_model_password + # Placeholder values so backend/app/config.py Settings doesn't fall back to "changeme" + GOOGLE_SOLAR_API_KEY = "test" + SAP_PREDICTIONS_BUCKET = "test" + CARBON_PREDICTIONS_BUCKET = "test" + HEAT_PREDICTIONS_BUCKET = "test" + HEATING_KWH_PREDICTIONS_BUCKET = "test" + HOTWATER_KWH_PREDICTIONS_BUCKET = "test" + API_KEY = "test" + ENVIRONMENT = "test" + SECRET_KEY = "test" + PLAN_TRIGGER_BUCKET = "test" + DATA_BUCKET = "test" + EPC_AUTH_TOKEN = "test" + ENGINE_SQS_URL = "test" + ENERGY_ASSESSMENTS_BUCKET = "test" ADDRESS2UPRN_QUEUE_URL = data.terraform_remote_state.address2uprn.outputs.address2uprn_queue_url S3_BUCKET_NAME = data.terraform_remote_state.shared.outputs.retrofit_sap_data_bucket_name }, From 856ea6eb9358f10e89e6b574a3a4367b0e92a874 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Thu, 21 May 2026 10:12:08 +0000 Subject: [PATCH 4/5] undo postcodesplitter changes --- .github/workflows/deploy_terraform.yml | 4 +--- .../terraform/lambda/postcodeSplitter/main.tf | 22 ++----------------- 2 files changed, 3 insertions(+), 23 deletions(-) diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index 1af90291..8ba473ca 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -169,9 +169,7 @@ jobs: uses: ./.github/workflows/_build_image.yml with: ecr_repo: postcode_splitter-${{ needs.determine_stage.outputs.stage }} - # dockerfile_path: applications/postcode_splitter/Dockerfile - # Switch back to the old postcode_splitter due to hyde priority - interface for the new one isn't working atm - dockerfile_path: backend/postcode_splitter/handler/Dockerfile + dockerfile_path: applications/postcode_splitter/Dockerfile build_context: . build_args: | DEV_DB_HOST=$DEV_DB_HOST diff --git a/deployment/terraform/lambda/postcodeSplitter/main.tf b/deployment/terraform/lambda/postcodeSplitter/main.tf index e04ae00f..721cb2ea 100644 --- a/deployment/terraform/lambda/postcodeSplitter/main.tf +++ b/deployment/terraform/lambda/postcodeSplitter/main.tf @@ -38,26 +38,8 @@ module "lambda" { { STAGE = var.stage LOG_LEVEL = "info" - # POSTGRES_USERNAME = local.db_credentials.db_assessment_model_username - # POSTGRES_PASSWORD = local.db_credentials.db_assessment_model_password - # Switch back to the old postcode_splitter due to hyde priority - interface for the new one isn't working atm - DB_USERNAME = local.db_credentials.db_assessment_model_username - DB_PASSWORD = local.db_credentials.db_assessment_model_password - # Placeholder values so backend/app/config.py Settings doesn't fall back to "changeme" - GOOGLE_SOLAR_API_KEY = "test" - SAP_PREDICTIONS_BUCKET = "test" - CARBON_PREDICTIONS_BUCKET = "test" - HEAT_PREDICTIONS_BUCKET = "test" - HEATING_KWH_PREDICTIONS_BUCKET = "test" - HOTWATER_KWH_PREDICTIONS_BUCKET = "test" - API_KEY = "test" - ENVIRONMENT = "test" - SECRET_KEY = "test" - PLAN_TRIGGER_BUCKET = "test" - DATA_BUCKET = "test" - EPC_AUTH_TOKEN = "test" - ENGINE_SQS_URL = "test" - ENERGY_ASSESSMENTS_BUCKET = "test" + POSTGRES_USERNAME = local.db_credentials.db_assessment_model_username + POSTGRES_PASSWORD = local.db_credentials.db_assessment_model_password ADDRESS2UPRN_QUEUE_URL = data.terraform_remote_state.address2uprn.outputs.address2uprn_queue_url S3_BUCKET_NAME = data.terraform_remote_state.shared.outputs.retrofit_sap_data_bucket_name }, From dbd03de842933fa189de077d48e5c13ecf9729f4 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Thu, 21 May 2026 10:37:13 +0000 Subject: [PATCH 5/5] local run changes --- .../postcode_splitter/local_handler/invoke_local_lambda.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/applications/postcode_splitter/local_handler/invoke_local_lambda.py b/applications/postcode_splitter/local_handler/invoke_local_lambda.py index 17d7e345..5f4b1d36 100755 --- a/applications/postcode_splitter/local_handler/invoke_local_lambda.py +++ b/applications/postcode_splitter/local_handler/invoke_local_lambda.py @@ -12,9 +12,9 @@ payload = { { "body": json.dumps( { - "task_id": "f4b3332f-c0cc-481f-96a5-d39860a647cf", - "sub_task_id": "14c042de-40c4-473b-8cd8-72c983a94a8d", - "s3_uri": "s3://retrofit-data-dev/bulk_onboarding_inputs/hyde2.csv", + "task_id": "e295d89b-a7c5-4a9a-8b4e-b405fab1f298", + "sub_task_id": "f4a9944f-41f0-4a33-8669-5016ec574068", + "s3_uri": "s3://retrofit-data-dev/bulk_onboarding_inputs/hyde2 (1).csv", } ) }