some excel files are formatted differently

This commit is contained in:
Jun-te Kim 2026-05-20 17:36:20 +00:00
parent bd36f203e8
commit e5583aac1f
2 changed files with 7 additions and 2 deletions

View file

@ -14,7 +14,7 @@ payload = {
{
"task_id": "f4b3332f-c0cc-481f-96a5-d39860a647cf",
"sub_task_id": "14c042de-40c4-473b-8cd8-72c983a94a8d",
"s3_uri": "s3://retrofit-data-dev/ara_raw_inputs/calico/Calico Homes Full list EPC Properties(Sheet2) (1) (1).csv",
"s3_uri": "s3://retrofit-data-dev/bulk_onboarding_inputs/hyde2.csv",
}
)
}

View file

@ -13,7 +13,12 @@ class CsvS3Client(S3Client):
f"s3_uri bucket {bucket!r} does not match client bucket {self.bucket!r}"
)
raw = self.get_object(key)
text = raw.decode("utf-8-sig")
try:
text = raw.decode("utf-8-sig")
except UnicodeDecodeError:
# Some uploads are Windows-1252 (e.g. £ as byte 0xA3), not UTF-8.
text = raw.decode("cp1252")
reader = csv.DictReader(StringIO(text))
return [dict(row) for row in reader]