Merge pull request #1188 from Hestia-Homes/main
Some checks failed
Fast Api Backend Deploy / deploy (push) Has been cancelled
Deploy infrastructure / determine_stage (push) Has been cancelled
Deploy infrastructure / shared_terraform (push) Has been cancelled
Deploy infrastructure / ara_engine_image (push) Has been cancelled
Deploy infrastructure / ara_engine_lambda (push) Has been cancelled
Deploy infrastructure / address2uprn_image (push) Has been cancelled
Deploy infrastructure / address2uprn_lambda (push) Has been cancelled
Deploy infrastructure / postcodeSplitter_image (push) Has been cancelled
Deploy infrastructure / postcodeSplitter_lambda (push) Has been cancelled
Deploy infrastructure / landlordDescriptionOverrides_image (push) Has been cancelled
Deploy infrastructure / landlordDescriptionOverrides_lambda (push) Has been cancelled
Deploy infrastructure / bulk_address2uprn_combiner_image (push) Has been cancelled
Deploy infrastructure / bulk_address2uprn_combiner_lambda (push) Has been cancelled
Deploy infrastructure / bulkUploadFinaliser_image (push) Has been cancelled
Deploy infrastructure / bulkUploadFinaliser_lambda (push) Has been cancelled
Deploy infrastructure / condition_etl_image (push) Has been cancelled
Deploy infrastructure / condition_etl_lambda (push) Has been cancelled
Deploy infrastructure / categorisation_image (push) Has been cancelled
Deploy infrastructure / categorisation_lambda (push) Has been cancelled
Deploy infrastructure / ordnanceSurvey_image (push) Has been cancelled
Deploy infrastructure / ordnanceSurvey_lambda (push) Has been cancelled
Deploy infrastructure / pashub_to_ara_image (push) Has been cancelled
Deploy infrastructure / pashub_to_ara_lambda (push) Has been cancelled
Deploy infrastructure / fast_api_lambda (push) Has been cancelled
Deploy infrastructure / cloudfront_acm (push) Has been cancelled
Deploy infrastructure / cloudfront_cdn (push) Has been cancelled
Deploy infrastructure / hubspot_etl_image (push) Has been cancelled
Deploy infrastructure / magic_plan_image (push) Has been cancelled
Deploy infrastructure / magic_plan_lambda (push) Has been cancelled
Deploy infrastructure / hubspot_etl_lambda (push) Has been cancelled

address2uprn now does an int
This commit is contained in:
Jun-te Kim 2026-06-05 20:05:01 +01:00 committed by GitHub
commit 956cf5ae02
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 79 additions and 7 deletions

View file

@ -510,6 +510,17 @@ def handler(event, context, local=False):
# Create results DataFrame
result_df = pd.DataFrame(results_data)
# The UPRN is integer-valued, but the no-match rows append None, so the
# mixed column lands as float64 and would serialise as "100020933699.0".
# Coerce to a nullable integer so it round-trips as "100020933699"
# (empty when missing) — the form the finaliser and the combined-results
# UI expect. `to_numeric(errors="coerce")` also folds the
# "invalid postcode" sentinel + blanks to NA (read back as missing).
if "address2uprn_uprn" in result_df.columns:
result_df["address2uprn_uprn"] = pd.to_numeric(
result_df["address2uprn_uprn"], errors="coerce"
).astype("Int64")
# Save results to S3
try:
save_results_to_s3(result_df, str(task_id), str(subtask_id))

View file

@ -107,8 +107,18 @@ def _parse_uprn(raw: Any) -> Optional[int]:
return None
try:
return int(val)
except ValueError:
pass
# The combiner writes the UPRN column via pandas; when any row in the batch is
# unmatched (NaN), pandas coerces the whole column to float64, so a UPRN
# arrives as e.g. "100020933699.0". Recover the integer from that float form
# (otherwise every matched row in a mixed batch parses to None — the cause of
# NULL property.uprn and empty property_overrides for matched rows).
try:
as_float = float(val)
except ValueError:
return None
return int(as_float) if as_float.is_integer() else None
def _parse_lexiscore(raw: Any) -> Optional[float]:
@ -189,15 +199,22 @@ class BulkUploadFinaliserOrchestrator:
multi_entry_ordering: Optional[dict[str, list[int]]],
column_mapping: Optional[dict[str, str]],
) -> None:
# Nothing to do without the classifier inputs or the override
# collaborators (e.g. an upload with no classifier columns mapped).
# Nothing to do without the override collaborators (the v1 property-only
# path / its tests construct without them).
if (
not classifier_rows
or not column_mapping
or self._property_override_repo is None
self._property_override_repo is None
or self._landlord_override_reader is None
):
return
# Log WHY when we write nothing, so an empty property_overrides is never a
# silent mystery (e.g. an upload whose CSVs predate the source_row_id join
# key, or one with no classifier columns mapped).
if not classifier_rows:
logger.info("Finalise: no classifier CSV rows; no property_overrides.")
return
if not column_mapping:
logger.info("Finalise: empty column_mapping; no property_overrides.")
return
override_rows = self._build_overrides(
combiner_rows,
classifier_rows,
@ -205,7 +222,8 @@ class BulkUploadFinaliserOrchestrator:
column_mapping,
portfolio_id,
)
self._property_override_repo.upsert_all(override_rows)
affected = self._property_override_repo.upsert_all(override_rows)
logger.info("Finalise: upserted %d property_overrides.", affected)
def _build_overrides(
self,
@ -224,12 +242,33 @@ class BulkUploadFinaliserOrchestrator:
# UPRN-matched combiner rows: source_row_id -> uprn.
uprn_by_row_id: dict[str, int] = {}
combiner_with_srid = 0
combiner_with_uprn = 0
for row in combiner_rows:
row_id = _normalize(row.get(SOURCE_ROW_ID_COL))
uprn = _parse_uprn(row.get(UPRN_COL))
if row_id:
combiner_with_srid += 1
if uprn is not None:
combiner_with_uprn += 1
if row_id and uprn is not None:
uprn_by_row_id[row_id] = uprn
if not uprn_by_row_id:
classifier_with_srid = sum(
1 for r in classifier_rows if _normalize(r.get(SOURCE_ROW_ID_COL))
)
logger.warning(
"Finalise: 0 override candidates — %d combiner rows (%d with "
"source_row_id, %d with a UPRN), %d/%d classifier rows with "
"source_row_id. If source_row_id counts are 0, the upload's CSVs "
"predate the join-key change (ADR-0006) — re-run from "
"start-address-matching so both CSVs carry it.",
len(combiner_rows),
combiner_with_srid,
combiner_with_uprn,
classifier_with_srid,
len(classifier_rows),
)
return []
ids_by_uprn = self._property_repo.ids_by_uprn(

View file

@ -135,6 +135,27 @@ def test_finalise_falls_back_to_user_input_when_unmatched() -> None:
assert row.lexiscore is None
def test_finalise_parses_pandas_float_uprn() -> None:
# The combiner writes UPRNs as float strings ("100023.0") whenever the batch
# has any unmatched rows; the finaliser must still recover the integer, else
# matched properties get NULL uprn and no overrides.
orchestrator, prop, _status = _orchestrator()
rows = [
{
"Address 1": "1 Some Street",
"postcode": "A0 0AA",
"address2uprn_uprn": "100023.0",
"address2uprn_address": "1 SOME STREET, TOWN, A0 0AA",
"address2uprn_lexiscore": "0.9",
},
]
orchestrator.finalise(rows, portfolio_id=7, task_id=uuid4())
(row,) = prop.inserted
assert row.uprn == 100023
# --- v2: property_overrides (ADR-0006) ------------------------------------
@ -158,7 +179,8 @@ def test_finalise_writes_overrides_for_uprn_rows_splitting_by_part() -> None:
combiner = [
{
"Address 1": "1 Some Street",
"address2uprn_uprn": "100023",
# pandas float form, as the combiner actually writes it in a mixed batch.
"address2uprn_uprn": "100023.0",
"source_row_id": "row-a",
},
# A no-UPRN row: must get NO overrides (deferred to v3).