From 8b9dcc73f219e3ed8e39e9208bafcffa6ebb4d60 Mon Sep 17 00:00:00 2001
From: Jun-te Kim <juntekim@googlemail.com>
Date: Fri, 5 Jun 2026 17:24:17 +0000
Subject: [PATCH 1/2] fix

---
 .../bulk_upload_finaliser_orchestrator.py     | 51 ++++++++++++++++---
 ...test_bulk_upload_finaliser_orchestrator.py | 24 ++++++++-
 2 files changed, 68 insertions(+), 7 deletions(-)

diff --git a/orchestration/bulk_upload_finaliser_orchestrator.py b/orchestration/bulk_upload_finaliser_orchestrator.py
index 4d16d7f4..4aa49ab8 100644
--- a/orchestration/bulk_upload_finaliser_orchestrator.py
+++ b/orchestration/bulk_upload_finaliser_orchestrator.py
@@ -107,8 +107,18 @@ def _parse_uprn(raw: Any) -> Optional[int]:
         return None
     try:
         return int(val)
+    except ValueError:
+        pass
+    # The combiner writes the UPRN column via pandas; when any row in the batch is
+    # unmatched (NaN), pandas coerces the whole column to float64, so a UPRN
+    # arrives as e.g. "100020933699.0". Recover the integer from that float form
+    # (otherwise every matched row in a mixed batch parses to None — the cause of
+    # NULL property.uprn and empty property_overrides for matched rows).
+    try:
+        as_float = float(val)
     except ValueError:
         return None
+    return int(as_float) if as_float.is_integer() else None
 
 
 def _parse_lexiscore(raw: Any) -> Optional[float]:
@@ -189,15 +199,22 @@ class BulkUploadFinaliserOrchestrator:
         multi_entry_ordering: Optional[dict[str, list[int]]],
         column_mapping: Optional[dict[str, str]],
     ) -> None:
-        # Nothing to do without the classifier inputs or the override
-        # collaborators (e.g. an upload with no classifier columns mapped).
+        # Nothing to do without the override collaborators (the v1 property-only
+        # path / its tests construct without them).
         if (
-            not classifier_rows
-            or not column_mapping
-            or self._property_override_repo is None
+            self._property_override_repo is None
             or self._landlord_override_reader is None
         ):
             return
+        # Log WHY when we write nothing, so an empty property_overrides is never a
+        # silent mystery (e.g. an upload whose CSVs predate the source_row_id join
+        # key, or one with no classifier columns mapped).
+        if not classifier_rows:
+            logger.info("Finalise: no classifier CSV rows; no property_overrides.")
+            return
+        if not column_mapping:
+            logger.info("Finalise: empty column_mapping; no property_overrides.")
+            return
         override_rows = self._build_overrides(
             combiner_rows,
             classifier_rows,
@@ -205,7 +222,8 @@ class BulkUploadFinaliserOrchestrator:
             column_mapping,
             portfolio_id,
         )
-        self._property_override_repo.upsert_all(override_rows)
+        affected = self._property_override_repo.upsert_all(override_rows)
+        logger.info("Finalise: upserted %d property_overrides.", affected)
 
     def _build_overrides(
         self,
@@ -224,12 +242,33 @@ class BulkUploadFinaliserOrchestrator:
 
         # UPRN-matched combiner rows: source_row_id -> uprn.
         uprn_by_row_id: dict[str, int] = {}
+        combiner_with_srid = 0
+        combiner_with_uprn = 0
         for row in combiner_rows:
             row_id = _normalize(row.get(SOURCE_ROW_ID_COL))
             uprn = _parse_uprn(row.get(UPRN_COL))
+            if row_id:
+                combiner_with_srid += 1
+            if uprn is not None:
+                combiner_with_uprn += 1
             if row_id and uprn is not None:
                 uprn_by_row_id[row_id] = uprn
         if not uprn_by_row_id:
+            classifier_with_srid = sum(
+                1 for r in classifier_rows if _normalize(r.get(SOURCE_ROW_ID_COL))
+            )
+            logger.warning(
+                "Finalise: 0 override candidates — %d combiner rows (%d with "
+                "source_row_id, %d with a UPRN), %d/%d classifier rows with "
+                "source_row_id. If source_row_id counts are 0, the upload's CSVs "
+                "predate the join-key change (ADR-0006) — re-run from "
+                "start-address-matching so both CSVs carry it.",
+                len(combiner_rows),
+                combiner_with_srid,
+                combiner_with_uprn,
+                classifier_with_srid,
+                len(classifier_rows),
+            )
             return []
 
         ids_by_uprn = self._property_repo.ids_by_uprn(
diff --git a/tests/orchestration/test_bulk_upload_finaliser_orchestrator.py b/tests/orchestration/test_bulk_upload_finaliser_orchestrator.py
index ae285fae..94742dca 100644
--- a/tests/orchestration/test_bulk_upload_finaliser_orchestrator.py
+++ b/tests/orchestration/test_bulk_upload_finaliser_orchestrator.py
@@ -135,6 +135,27 @@ def test_finalise_falls_back_to_user_input_when_unmatched() -> None:
     assert row.lexiscore is None
 
 
+def test_finalise_parses_pandas_float_uprn() -> None:
+    # The combiner writes UPRNs as float strings ("100023.0") whenever the batch
+    # has any unmatched rows; the finaliser must still recover the integer, else
+    # matched properties get NULL uprn and no overrides.
+    orchestrator, prop, _status = _orchestrator()
+    rows = [
+        {
+            "Address 1": "1 Some Street",
+            "postcode": "A0 0AA",
+            "address2uprn_uprn": "100023.0",
+            "address2uprn_address": "1 SOME STREET, TOWN, A0 0AA",
+            "address2uprn_lexiscore": "0.9",
+        },
+    ]
+
+    orchestrator.finalise(rows, portfolio_id=7, task_id=uuid4())
+
+    (row,) = prop.inserted
+    assert row.uprn == 100023
+
+
 # --- v2: property_overrides (ADR-0006) ------------------------------------
 
 
@@ -158,7 +179,8 @@ def test_finalise_writes_overrides_for_uprn_rows_splitting_by_part() -> None:
     combiner = [
         {
             "Address 1": "1 Some Street",
-            "address2uprn_uprn": "100023",
+            # pandas float form, as the combiner actually writes it in a mixed batch.
+            "address2uprn_uprn": "100023.0",
             "source_row_id": "row-a",
         },
         # A no-UPRN row: must get NO overrides (deferred to v3).

From e60ca6ee5deec26f4b17982c4a7b86acc8889a4f Mon Sep 17 00:00:00 2001
From: Jun-te Kim <juntekim@googlemail.com>
Date: Fri, 5 Jun 2026 19:03:33 +0000
Subject: [PATCH 2/2] source of the problem in address2uprn

---
 backend/address2UPRN/main.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/backend/address2UPRN/main.py b/backend/address2UPRN/main.py
index 02eb27dc..136f4344 100644
--- a/backend/address2UPRN/main.py
+++ b/backend/address2UPRN/main.py
@@ -510,6 +510,17 @@ def handler(event, context, local=False):
             # Create results DataFrame
             result_df = pd.DataFrame(results_data)
 
+            # The UPRN is integer-valued, but the no-match rows append None, so the
+            # mixed column lands as float64 and would serialise as "100020933699.0".
+            # Coerce to a nullable integer so it round-trips as "100020933699"
+            # (empty when missing) — the form the finaliser and the combined-results
+            # UI expect. `to_numeric(errors="coerce")` also folds the
+            # "invalid postcode" sentinel + blanks to NA (read back as missing).
+            if "address2uprn_uprn" in result_df.columns:
+                result_df["address2uprn_uprn"] = pd.to_numeric(
+                    result_df["address2uprn_uprn"], errors="coerce"
+                ).astype("Int64")
+
             # Save results to S3
             try:
                 save_results_to_s3(result_df, str(task_id), str(subtask_id))