Merge branch 'main' into feature/handle-new-magicplan-response-structure

2026-07-27 23:35:01 +00:00 · 2026-06-08 09:57:26 +00:00 · 2026-06-08 09:57:26 +00:00 · c22ee3821b
commit c22ee3821b
parent 41a40c9ba0 98297f803a
4 changed files with 83 additions and 10 deletions
--- a/backend/address2UPRN/main.py
+++ b/backend/address2UPRN/main.py
@ -510,6 +510,17 @@ def handler(event, context, local=False):
            # Create results DataFrame
            result_df = pd.DataFrame(results_data)

+            # The UPRN is integer-valued, but the no-match rows append None, so the
+            # mixed column lands as float64 and would serialise as "100020933699.0".
+            # Coerce to a nullable integer so it round-trips as "100020933699"
+            # (empty when missing) — the form the finaliser and the combined-results
+            # UI expect. `to_numeric(errors="coerce")` also folds the
+            # "invalid postcode" sentinel + blanks to NA (read back as missing).
+            if "address2uprn_uprn" in result_df.columns:
+                result_df["address2uprn_uprn"] = pd.to_numeric(
+                    result_df["address2uprn_uprn"], errors="coerce"
+                ).astype("Int64")
+
            # Save results to S3
            try:
                save_results_to_s3(result_df, str(task_id), str(subtask_id))
--- a/deployment/terraform/shared/main.tf
+++ b/deployment/terraform/shared/main.tf
@ -572,8 +572,9 @@ module "bulk_upload_finaliser_registry" {
  stage  = var.stage
 }

-# The finaliser only reads the combiner output (bulk_final_outputs) to insert
-# property rows; it writes to Postgres, not S3.
+# The finaliser reads the combiner output (bulk_final_outputs) to insert property
+# rows, and — for v2 (ADR-0006) — the classifier CSV (bulk_onboarding_inputs) to
+# populate property_overrides. It writes to Postgres, not S3.
 module "bulk_upload_finaliser_s3_read" {
  source = "../modules/s3_iam_policy"

@ -581,7 +582,7 @@ module "bulk_upload_finaliser_s3_read" {
  policy_description = "Allow bulk_upload_finaliser Lambda to read combiner output from retrofit-data bucket"
  bucket_arns        = ["arn:aws:s3:::retrofit-data-${var.stage}"]
  actions            = ["s3:GetObject", "s3:ListBucket"]
-  resource_paths     = ["/bulk_final_outputs/*"]
+  resource_paths     = ["/bulk_final_outputs/*", "/bulk_onboarding_inputs/*"]
 }

 output "bulk_upload_finaliser_s3_read_arn" {
--- a/orchestration/bulk_upload_finaliser_orchestrator.py
+++ b/orchestration/bulk_upload_finaliser_orchestrator.py
@ -107,8 +107,18 @@ def _parse_uprn(raw: Any) -> Optional[int]:
        return None
    try:
        return int(val)
+    except ValueError:
+        pass
+    # The combiner writes the UPRN column via pandas; when any row in the batch is
+    # unmatched (NaN), pandas coerces the whole column to float64, so a UPRN
+    # arrives as e.g. "100020933699.0". Recover the integer from that float form
+    # (otherwise every matched row in a mixed batch parses to None — the cause of
+    # NULL property.uprn and empty property_overrides for matched rows).
+    try:
+        as_float = float(val)
    except ValueError:
        return None
+    return int(as_float) if as_float.is_integer() else None


 def _parse_lexiscore(raw: Any) -> Optional[float]:
@ -189,15 +199,22 @@ class BulkUploadFinaliserOrchestrator:
        multi_entry_ordering: Optional[dict[str, list[int]]],
        column_mapping: Optional[dict[str, str]],
    ) -> None:
-        # Nothing to do without the classifier inputs or the override
-        # collaborators (e.g. an upload with no classifier columns mapped).
+        # Nothing to do without the override collaborators (the v1 property-only
+        # path / its tests construct without them).
        if (
-            not classifier_rows
-            or not column_mapping
-            or self._property_override_repo is None
+            self._property_override_repo is None
            or self._landlord_override_reader is None
        ):
            return
+        # Log WHY when we write nothing, so an empty property_overrides is never a
+        # silent mystery (e.g. an upload whose CSVs predate the source_row_id join
+        # key, or one with no classifier columns mapped).
+        if not classifier_rows:
+            logger.info("Finalise: no classifier CSV rows; no property_overrides.")
+            return
+        if not column_mapping:
+            logger.info("Finalise: empty column_mapping; no property_overrides.")
+            return
        override_rows = self._build_overrides(
            combiner_rows,
            classifier_rows,
@ -205,7 +222,8 @@ class BulkUploadFinaliserOrchestrator:
            column_mapping,
            portfolio_id,
        )
-        self._property_override_repo.upsert_all(override_rows)
+        affected = self._property_override_repo.upsert_all(override_rows)
+        logger.info("Finalise: upserted %d property_overrides.", affected)

    def _build_overrides(
        self,
@ -224,12 +242,33 @@ class BulkUploadFinaliserOrchestrator:

        # UPRN-matched combiner rows: source_row_id -> uprn.
        uprn_by_row_id: dict[str, int] = {}
+        combiner_with_srid = 0
+        combiner_with_uprn = 0
        for row in combiner_rows:
            row_id = _normalize(row.get(SOURCE_ROW_ID_COL))
            uprn = _parse_uprn(row.get(UPRN_COL))
+            if row_id:
+                combiner_with_srid += 1
+            if uprn is not None:
+                combiner_with_uprn += 1
            if row_id and uprn is not None:
                uprn_by_row_id[row_id] = uprn
        if not uprn_by_row_id:
+            classifier_with_srid = sum(
+                1 for r in classifier_rows if _normalize(r.get(SOURCE_ROW_ID_COL))
+            )
+            logger.warning(
+                "Finalise: 0 override candidates — %d combiner rows (%d with "
+                "source_row_id, %d with a UPRN), %d/%d classifier rows with "
+                "source_row_id. If source_row_id counts are 0, the upload's CSVs "
+                "predate the join-key change (ADR-0006) — re-run from "
+                "start-address-matching so both CSVs carry it.",
+                len(combiner_rows),
+                combiner_with_srid,
+                combiner_with_uprn,
+                classifier_with_srid,
+                len(classifier_rows),
+            )
            return []

        ids_by_uprn = self._property_repo.ids_by_uprn(
--- a/tests/orchestration/test_bulk_upload_finaliser_orchestrator.py
+++ b/tests/orchestration/test_bulk_upload_finaliser_orchestrator.py
@ -135,6 +135,27 @@ def test_finalise_falls_back_to_user_input_when_unmatched() -> None:
    assert row.lexiscore is None


+def test_finalise_parses_pandas_float_uprn() -> None:
+    # The combiner writes UPRNs as float strings ("100023.0") whenever the batch
+    # has any unmatched rows; the finaliser must still recover the integer, else
+    # matched properties get NULL uprn and no overrides.
+    orchestrator, prop, _status = _orchestrator()
+    rows = [
+        {
+            "Address 1": "1 Some Street",
+            "postcode": "A0 0AA",
+            "address2uprn_uprn": "100023.0",
+            "address2uprn_address": "1 SOME STREET, TOWN, A0 0AA",
+            "address2uprn_lexiscore": "0.9",
+        },
+    ]
+
+    orchestrator.finalise(rows, portfolio_id=7, task_id=uuid4())
+
+    (row,) = prop.inserted
+    assert row.uprn == 100023
+
+
 # --- v2: property_overrides (ADR-0006) ------------------------------------


@ -158,7 +179,8 @@ def test_finalise_writes_overrides_for_uprn_rows_splitting_by_part() -> None:
    combiner = [
        {
            "Address 1": "1 Some Street",
-            "address2uprn_uprn": "100023",
+            # pandas float form, as the combiner actually writes it in a mixed batch.
+            "address2uprn_uprn": "100023.0",
            "source_row_id": "row-a",
        },
        # A no-UPRN row: must get NO overrides (deferred to v3).