Merge pull request #1113 from Hestia-Homes/feature/hyde

some excel files are formatted differently
This commit is contained in:
Jun-te Kim 2026-05-21 12:33:57 +01:00 committed by GitHub
commit 617342ef85
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 23 additions and 11 deletions

View file

@ -12,9 +12,9 @@ payload = {
{
"body": json.dumps(
{
"task_id": "f4b3332f-c0cc-481f-96a5-d39860a647cf",
"sub_task_id": "14c042de-40c4-473b-8cd8-72c983a94a8d",
"s3_uri": "s3://retrofit-data-dev/ara_raw_inputs/calico/Calico Homes Full list EPC Properties(Sheet2) (1) (1).csv",
"task_id": "e295d89b-a7c5-4a9a-8b4e-b405fab1f298",
"sub_task_id": "f4a9944f-41f0-4a33-8669-5016ec574068",
"s3_uri": "s3://retrofit-data-dev/bulk_onboarding_inputs/hyde2 (1).csv",
}
)
}

View file

@ -79,23 +79,23 @@ def app():
"""
data_folder = "/workspaces/model/asset_list"
data_filename = "lincs_address_list.xlsx"
sheet_name = "Sheet1"
data_filename = "hyde.xlsx"
sheet_name = "AddressProfilingResults"
postcode_column = "Postcode"
address1_column = "Deal Name"
address1_column = "Address"
address1_method = None
fulladdress_column = "Deal Name"
fulladdress_column = "Postcode"
address_cols_to_concat = []
missing_postcodes_method = None
landlord_year_built = None
landlord_os_uprn = None
landlord_property_type = None # Good to include if landlord gave
landlord_property_type = "Property Type" # Good to include if landlord gave
landlord_built_form = None # Good to include if landlord gave
landlord_wall_construction = None
landlord_roof_construction = None
landlord_heating_system = None
landlord_existing_pv = None
landlord_property_id = "landlord_id"
landlord_property_id = "Organisation Reference"
landlord_sap = None
outcomes_filename = None
outcomes_sheetname = None
@ -468,3 +468,4 @@ def app():
asset_list.duplicated_addresses.to_excel(
writer, sheet_name="Duplicate Properties", index=False
)

View file

@ -47,8 +47,14 @@ class EpcClientService:
latest = max(results, key=lambda r: r.registration_date)
return self.get_by_certificate_number(latest.certificate_number)
@staticmethod
def _normalise_postcode(postcode: str) -> str:
"""Return the postcode with all spaces removed and uppercased."""
return postcode.replace(" ", "").upper()
def search_by_postcode(self, postcode: str) -> list[EpcSearchResult]:
return call_with_retry(lambda: self._search(postcode=postcode))
normalised = self._normalise_postcode(postcode)
return call_with_retry(lambda: self._search(postcode=normalised))
# ------------------------------------------------------------------
# Private helperEpcRateLimpolarss

View file

@ -13,7 +13,12 @@ class CsvS3Client(S3Client):
f"s3_uri bucket {bucket!r} does not match client bucket {self.bucket!r}"
)
raw = self.get_object(key)
text = raw.decode("utf-8-sig")
try:
text = raw.decode("utf-8-sig")
except UnicodeDecodeError:
# Some uploads are Windows-1252 (e.g. £ as byte 0xA3), not UTF-8.
text = raw.decode("cp1252")
reader = csv.DictReader(StringIO(text))
return [dict(row) for row in reader]