added fix for utf

This commit is contained in:
Jun-te Kim 2026-04-13 15:15:42 +00:00
parent 680edb5c38
commit 98c9a1df74
2 changed files with 1 additions and 4 deletions

View file

@ -424,9 +424,6 @@ def handler(event, context, local=False):
bucket, key = parse_s3_uri(s3_uri)
csv_data = read_csv_from_s3_dict(bucket, key)
df = pd.DataFrame(csv_data)
df.columns = [
c.lstrip("\ufeff") for c in df.columns
] # strip BOM from column names
logger.info(f"Loaded {len(df)} rows from S3")
except Exception as s3_error:
logger.error(f"Failed to read data from S3: {s3_error}")

View file

@ -330,7 +330,7 @@ def read_csv_from_s3(bucket_name: str, filepath: str) -> list[dict[str, str]]:
body = s3_object["Body"].read()
# Use StringIO to create a file-like object from the string
csv_data = StringIO(body.decode("utf-8"))
csv_data = StringIO(body.decode("utf-8-sig"))
# Use csv library to read it into a list of dictionaries
reader = csv.DictReader(csv_data)