source of the problem in address2uprn

This commit is contained in:
Jun-te Kim 2026-06-05 19:03:33 +00:00
parent 8b9dcc73f2
commit e60ca6ee5d

View file

@ -510,6 +510,17 @@ def handler(event, context, local=False):
# Create results DataFrame
result_df = pd.DataFrame(results_data)
# The UPRN is integer-valued, but the no-match rows append None, so the
# mixed column lands as float64 and would serialise as "100020933699.0".
# Coerce to a nullable integer so it round-trips as "100020933699"
# (empty when missing) — the form the finaliser and the combined-results
# UI expect. `to_numeric(errors="coerce")` also folds the
# "invalid postcode" sentinel + blanks to NA (read back as missing).
if "address2uprn_uprn" in result_df.columns:
result_df["address2uprn_uprn"] = pd.to_numeric(
result_df["address2uprn_uprn"], errors="coerce"
).astype("Int64")
# Save results to S3
try:
save_results_to_s3(result_df, str(task_id), str(subtask_id))