mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
fix
This commit is contained in:
parent
9427a6d40b
commit
8b9dcc73f2
2 changed files with 68 additions and 7 deletions
|
|
@ -107,8 +107,18 @@ def _parse_uprn(raw: Any) -> Optional[int]:
|
|||
return None
|
||||
try:
|
||||
return int(val)
|
||||
except ValueError:
|
||||
pass
|
||||
# The combiner writes the UPRN column via pandas; when any row in the batch is
|
||||
# unmatched (NaN), pandas coerces the whole column to float64, so a UPRN
|
||||
# arrives as e.g. "100020933699.0". Recover the integer from that float form
|
||||
# (otherwise every matched row in a mixed batch parses to None — the cause of
|
||||
# NULL property.uprn and empty property_overrides for matched rows).
|
||||
try:
|
||||
as_float = float(val)
|
||||
except ValueError:
|
||||
return None
|
||||
return int(as_float) if as_float.is_integer() else None
|
||||
|
||||
|
||||
def _parse_lexiscore(raw: Any) -> Optional[float]:
|
||||
|
|
@ -189,15 +199,22 @@ class BulkUploadFinaliserOrchestrator:
|
|||
multi_entry_ordering: Optional[dict[str, list[int]]],
|
||||
column_mapping: Optional[dict[str, str]],
|
||||
) -> None:
|
||||
# Nothing to do without the classifier inputs or the override
|
||||
# collaborators (e.g. an upload with no classifier columns mapped).
|
||||
# Nothing to do without the override collaborators (the v1 property-only
|
||||
# path / its tests construct without them).
|
||||
if (
|
||||
not classifier_rows
|
||||
or not column_mapping
|
||||
or self._property_override_repo is None
|
||||
self._property_override_repo is None
|
||||
or self._landlord_override_reader is None
|
||||
):
|
||||
return
|
||||
# Log WHY when we write nothing, so an empty property_overrides is never a
|
||||
# silent mystery (e.g. an upload whose CSVs predate the source_row_id join
|
||||
# key, or one with no classifier columns mapped).
|
||||
if not classifier_rows:
|
||||
logger.info("Finalise: no classifier CSV rows; no property_overrides.")
|
||||
return
|
||||
if not column_mapping:
|
||||
logger.info("Finalise: empty column_mapping; no property_overrides.")
|
||||
return
|
||||
override_rows = self._build_overrides(
|
||||
combiner_rows,
|
||||
classifier_rows,
|
||||
|
|
@ -205,7 +222,8 @@ class BulkUploadFinaliserOrchestrator:
|
|||
column_mapping,
|
||||
portfolio_id,
|
||||
)
|
||||
self._property_override_repo.upsert_all(override_rows)
|
||||
affected = self._property_override_repo.upsert_all(override_rows)
|
||||
logger.info("Finalise: upserted %d property_overrides.", affected)
|
||||
|
||||
def _build_overrides(
|
||||
self,
|
||||
|
|
@ -224,12 +242,33 @@ class BulkUploadFinaliserOrchestrator:
|
|||
|
||||
# UPRN-matched combiner rows: source_row_id -> uprn.
|
||||
uprn_by_row_id: dict[str, int] = {}
|
||||
combiner_with_srid = 0
|
||||
combiner_with_uprn = 0
|
||||
for row in combiner_rows:
|
||||
row_id = _normalize(row.get(SOURCE_ROW_ID_COL))
|
||||
uprn = _parse_uprn(row.get(UPRN_COL))
|
||||
if row_id:
|
||||
combiner_with_srid += 1
|
||||
if uprn is not None:
|
||||
combiner_with_uprn += 1
|
||||
if row_id and uprn is not None:
|
||||
uprn_by_row_id[row_id] = uprn
|
||||
if not uprn_by_row_id:
|
||||
classifier_with_srid = sum(
|
||||
1 for r in classifier_rows if _normalize(r.get(SOURCE_ROW_ID_COL))
|
||||
)
|
||||
logger.warning(
|
||||
"Finalise: 0 override candidates — %d combiner rows (%d with "
|
||||
"source_row_id, %d with a UPRN), %d/%d classifier rows with "
|
||||
"source_row_id. If source_row_id counts are 0, the upload's CSVs "
|
||||
"predate the join-key change (ADR-0006) — re-run from "
|
||||
"start-address-matching so both CSVs carry it.",
|
||||
len(combiner_rows),
|
||||
combiner_with_srid,
|
||||
combiner_with_uprn,
|
||||
classifier_with_srid,
|
||||
len(classifier_rows),
|
||||
)
|
||||
return []
|
||||
|
||||
ids_by_uprn = self._property_repo.ids_by_uprn(
|
||||
|
|
|
|||
|
|
@ -135,6 +135,27 @@ def test_finalise_falls_back_to_user_input_when_unmatched() -> None:
|
|||
assert row.lexiscore is None
|
||||
|
||||
|
||||
def test_finalise_parses_pandas_float_uprn() -> None:
|
||||
# The combiner writes UPRNs as float strings ("100023.0") whenever the batch
|
||||
# has any unmatched rows; the finaliser must still recover the integer, else
|
||||
# matched properties get NULL uprn and no overrides.
|
||||
orchestrator, prop, _status = _orchestrator()
|
||||
rows = [
|
||||
{
|
||||
"Address 1": "1 Some Street",
|
||||
"postcode": "A0 0AA",
|
||||
"address2uprn_uprn": "100023.0",
|
||||
"address2uprn_address": "1 SOME STREET, TOWN, A0 0AA",
|
||||
"address2uprn_lexiscore": "0.9",
|
||||
},
|
||||
]
|
||||
|
||||
orchestrator.finalise(rows, portfolio_id=7, task_id=uuid4())
|
||||
|
||||
(row,) = prop.inserted
|
||||
assert row.uprn == 100023
|
||||
|
||||
|
||||
# --- v2: property_overrides (ADR-0006) ------------------------------------
|
||||
|
||||
|
||||
|
|
@ -158,7 +179,8 @@ def test_finalise_writes_overrides_for_uprn_rows_splitting_by_part() -> None:
|
|||
combiner = [
|
||||
{
|
||||
"Address 1": "1 Some Street",
|
||||
"address2uprn_uprn": "100023",
|
||||
# pandas float form, as the combiner actually writes it in a mixed batch.
|
||||
"address2uprn_uprn": "100023.0",
|
||||
"source_row_id": "row-a",
|
||||
},
|
||||
# A no-UPRN row: must get NO overrides (deferred to v3).
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue