diff --git a/applications/postcode_splitter/local_handler/invoke_local_lambda.py b/applications/postcode_splitter/local_handler/invoke_local_lambda.py index 21fa9b9e..17d7e345 100755 --- a/applications/postcode_splitter/local_handler/invoke_local_lambda.py +++ b/applications/postcode_splitter/local_handler/invoke_local_lambda.py @@ -14,7 +14,7 @@ payload = { { "task_id": "f4b3332f-c0cc-481f-96a5-d39860a647cf", "sub_task_id": "14c042de-40c4-473b-8cd8-72c983a94a8d", - "s3_uri": "s3://retrofit-data-dev/ara_raw_inputs/calico/Calico Homes Full list EPC Properties(Sheet2) (1) (1).csv", + "s3_uri": "s3://retrofit-data-dev/bulk_onboarding_inputs/hyde2.csv", } ) } diff --git a/infrastructure/csv_s3_client.py b/infrastructure/csv_s3_client.py index 055d1ce3..8af8de73 100644 --- a/infrastructure/csv_s3_client.py +++ b/infrastructure/csv_s3_client.py @@ -13,7 +13,12 @@ class CsvS3Client(S3Client): f"s3_uri bucket {bucket!r} does not match client bucket {self.bucket!r}" ) raw = self.get_object(key) - text = raw.decode("utf-8-sig") + try: + text = raw.decode("utf-8-sig") + except UnicodeDecodeError: + # Some uploads are Windows-1252 (e.g. £ as byte 0xA3), not UTF-8. + text = raw.decode("cp1252") + reader = csv.DictReader(StringIO(text)) return [dict(row) for row in reader]