diff --git a/applications/postcode_splitter/local_handler/invoke_local_lambda.py b/applications/postcode_splitter/local_handler/invoke_local_lambda.py index 21fa9b9e..5f4b1d36 100755 --- a/applications/postcode_splitter/local_handler/invoke_local_lambda.py +++ b/applications/postcode_splitter/local_handler/invoke_local_lambda.py @@ -12,9 +12,9 @@ payload = { { "body": json.dumps( { - "task_id": "f4b3332f-c0cc-481f-96a5-d39860a647cf", - "sub_task_id": "14c042de-40c4-473b-8cd8-72c983a94a8d", - "s3_uri": "s3://retrofit-data-dev/ara_raw_inputs/calico/Calico Homes Full list EPC Properties(Sheet2) (1) (1).csv", + "task_id": "e295d89b-a7c5-4a9a-8b4e-b405fab1f298", + "sub_task_id": "f4a9944f-41f0-4a33-8669-5016ec574068", + "s3_uri": "s3://retrofit-data-dev/bulk_onboarding_inputs/hyde2 (1).csv", } ) } diff --git a/asset_list/app.py b/asset_list/app.py index 9b10d7f3..424f4df6 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -79,23 +79,23 @@ def app(): """ data_folder = "/workspaces/model/asset_list" - data_filename = "lincs_address_list.xlsx" - sheet_name = "Sheet1" + data_filename = "hyde.xlsx" + sheet_name = "AddressProfilingResults" postcode_column = "Postcode" - address1_column = "Deal Name" + address1_column = "Address" address1_method = None - fulladdress_column = "Deal Name" + fulladdress_column = "Postcode" address_cols_to_concat = [] missing_postcodes_method = None landlord_year_built = None landlord_os_uprn = None - landlord_property_type = None # Good to include if landlord gave + landlord_property_type = "Property Type" # Good to include if landlord gave landlord_built_form = None # Good to include if landlord gave landlord_wall_construction = None landlord_roof_construction = None landlord_heating_system = None landlord_existing_pv = None - landlord_property_id = "landlord_id" + landlord_property_id = "Organisation Reference" landlord_sap = None outcomes_filename = None outcomes_sheetname = None @@ -468,3 +468,4 @@ def app(): asset_list.duplicated_addresses.to_excel( writer, sheet_name="Duplicate Properties", index=False ) + diff --git a/backend/epc_client/epc_client_service.py b/backend/epc_client/epc_client_service.py index 86caeea3..72dbf142 100644 --- a/backend/epc_client/epc_client_service.py +++ b/backend/epc_client/epc_client_service.py @@ -47,8 +47,14 @@ class EpcClientService: latest = max(results, key=lambda r: r.registration_date) return self.get_by_certificate_number(latest.certificate_number) + @staticmethod + def _normalise_postcode(postcode: str) -> str: + """Return the postcode with all spaces removed and uppercased.""" + return postcode.replace(" ", "").upper() + def search_by_postcode(self, postcode: str) -> list[EpcSearchResult]: - return call_with_retry(lambda: self._search(postcode=postcode)) + normalised = self._normalise_postcode(postcode) + return call_with_retry(lambda: self._search(postcode=normalised)) # ------------------------------------------------------------------ # Private helperEpcRateLimpolarss diff --git a/infrastructure/csv_s3_client.py b/infrastructure/csv_s3_client.py index 055d1ce3..8af8de73 100644 --- a/infrastructure/csv_s3_client.py +++ b/infrastructure/csv_s3_client.py @@ -13,7 +13,12 @@ class CsvS3Client(S3Client): f"s3_uri bucket {bucket!r} does not match client bucket {self.bucket!r}" ) raw = self.get_object(key) - text = raw.decode("utf-8-sig") + try: + text = raw.decode("utf-8-sig") + except UnicodeDecodeError: + # Some uploads are Windows-1252 (e.g. £ as byte 0xA3), not UTF-8. + text = raw.decode("cp1252") + reader = csv.DictReader(StringIO(text)) return [dict(row) for row in reader]