From 98c9a1df74f9330ef88542b76918cbc71915f310 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 13 Apr 2026 15:15:42 +0000 Subject: [PATCH] added fix for utf --- backend/address2UPRN/main.py | 3 --- utils/s3.py | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/backend/address2UPRN/main.py b/backend/address2UPRN/main.py index 647d46be..79c0de69 100644 --- a/backend/address2UPRN/main.py +++ b/backend/address2UPRN/main.py @@ -424,9 +424,6 @@ def handler(event, context, local=False): bucket, key = parse_s3_uri(s3_uri) csv_data = read_csv_from_s3_dict(bucket, key) df = pd.DataFrame(csv_data) - df.columns = [ - c.lstrip("\ufeff") for c in df.columns - ] # strip BOM from column names logger.info(f"Loaded {len(df)} rows from S3") except Exception as s3_error: logger.error(f"Failed to read data from S3: {s3_error}") diff --git a/utils/s3.py b/utils/s3.py index 242e0db5..930e2e15 100644 --- a/utils/s3.py +++ b/utils/s3.py @@ -330,7 +330,7 @@ def read_csv_from_s3(bucket_name: str, filepath: str) -> list[dict[str, str]]: body = s3_object["Body"].read() # Use StringIO to create a file-like object from the string - csv_data = StringIO(body.decode("utf-8")) + csv_data = StringIO(body.decode("utf-8-sig")) # Use csv library to read it into a list of dictionaries reader = csv.DictReader(csv_data)