debugging property duplicated uprns

This commit is contained in:
Khalim Conn-Kowlessar 2025-12-22 17:10:39 +08:00
parent 2bc80b4d50
commit c503da05a7
4 changed files with 49 additions and 26 deletions

View file

@ -81,6 +81,7 @@ class Property:
postcode,
address,
epc_record,
uprn=None, # Pass as an optional input
property_valuation=None,
already_installed=None,
non_invasive_recommendations=None,
@ -121,7 +122,7 @@ class Property:
self.valuation = property_valuation
self.uprn = epc_record.get("uprn")
self.uprn = uprn if uprn is not None else epc_record.get("uprn")
self.uprn_source = self.data.get("uprn-source")
self.full_sap_epc = epc_record.get("full_sap_epc")

View file

@ -553,22 +553,31 @@ class SearchEpc:
else:
raise ValueError("Multiple UPRNs found - investigate me")
if uprns:
uprn = uprns.pop()
# Convert to int
if not pd.isnull(uprn):
uprn = int(uprn)
else:
newest_epc["uprn-source"] = self.UPRN_SOURCE_SIMULATED
uprn = hash(self.address1 + self.postcode)
# if uprns:
# epc_uprn = uprns.pop()
# # Convert to int
# if not pd.isnull(epc_uprn):
# uprn = int(epc_uprn)
# else:
# newest_epc["uprn-source"] = self.UPRN_SOURCE_SIMULATED
# uprn = hash(self.address1 + self.postcode)
if self.uprn is not None and uprns:
epc_uprn = uprns.pop()
if int(epc_uprn) != self.uprn:
logger.warning(
f"Provided UPRN {self.uprn} does not match EPC UPRN {epc_uprn}, using provided UPRN"
)
# We overwrite but in this instance, we've likely got the wrong EPC data
newest_epc["uprn"] = self.uprn
if self.fast:
return newest_epc, [], {}, "", "", None, ""
return newest_epc, [], {}, "", "", ""
# Retrieve postcode and address
address_epc, postcode_epc, address_postal_town = self.format_address(newest_epc=newest_epc)
return newest_epc, older_epcs, full_sap_epc, address_epc, postcode_epc, uprn, address_postal_town
return newest_epc, older_epcs, full_sap_epc, address_epc, postcode_epc, address_postal_town
@staticmethod
def filter_newest_epc(list_of_epcs: List):
@ -923,7 +932,7 @@ class SearchEpc:
@staticmethod
def calculate_weighted_lodgement_datetime(epc_data):
numeric_dates = pd.to_datetime(epc_data['lodgement-datetime']).view('int64')
numeric_dates = pd.to_datetime(epc_data['lodgement-datetime']).astype('int64')
# Calculate the weighted sum of dates
weighted_sum = (numeric_dates * epc_data['weight']).sum()
@ -991,7 +1000,7 @@ class SearchEpc:
if response["status"] == 200:
(
self.newest_epc, self.older_epcs, self.full_sap_epc, self.address_clean, self.postcode_clean, self.uprn,
self.newest_epc, self.older_epcs, self.full_sap_epc, self.address_clean, self.postcode_clean,
self.address_postal_town
) = self.extract_epc_data(address=self.full_address)
@ -1085,7 +1094,7 @@ class SearchEpc:
response = self.get_epc()
if response["status"] == 200:
(
self.newest_epc, self.older_epcs, self.full_sap_epc, self.address_clean, self.postcode_clean, self.uprn,
self.newest_epc, self.older_epcs, self.full_sap_epc, self.address_clean, self.postcode_clean,
self.address_postal_town
) = self.extract_epc_data()
return

View file

@ -291,13 +291,13 @@ def bulk_update_properties(session: Session, property_updates: list[dict]):
if not property_updates:
return
now = datetime.now(pytz.utc)
now = datetime.datetime.now(pytz.utc)
stmt = (
update(PropertyModel)
update(PropertyModel.__table__)
.where(
PropertyModel.id == bindparam("property_id"),
PropertyModel.portfolio_id == bindparam("portfolio_id"),
PropertyModel.id == bindparam("b_id"),
PropertyModel.portfolio_id == bindparam("b_portfolio_id"),
)
.values(
**{k: bindparam(k) for k in property_updates[0]["data"].keys()},
@ -305,15 +305,20 @@ def bulk_update_properties(session: Session, property_updates: list[dict]):
)
)
payload = []
for row in property_updates:
payload.append({
"property_id": row["property_id"],
"portfolio_id": row["portfolio_id"],
payload = [
{
"b_id": row["property_id"], # renamed bind param
"b_portfolio_id": row["portfolio_id"],
**row["data"],
})
}
for row in property_updates
]
session.execute(stmt, payload)
session.execute(
stmt,
payload,
execution_options={"synchronize_session": False},
)
def bulk_upsert_property_details_epc(session: Session, rows: list[dict]):

View file

@ -408,6 +408,13 @@ def check_duplicate_property_ids(input_properties):
# de-dupe input_uprns
raise ValueError(f"Duplicate property IDs in the input data: {duplicates}")
# Check for dupe UPRNS
input_uprns = [x.uprn for x in input_properties if x.uprn is not None]
if input_uprns:
if len(input_uprns) != len(set(input_uprns)):
duplicates = set([x for x in input_uprns if input_uprns.count(x) > 1])
raise ValueError(f"Duplicate UPRNs in the input properties: {duplicates}")
return True
@ -781,7 +788,7 @@ async def model_engine(body: PlanTriggerRequest):
already_installed=already_installed,
non_invasive_recommendations=non_invasive_recommendations,
valuation_data=valuation_data,
uprn=epc_searcher.uprn,
uprn=addr.uprn,
)
# Pull this out as it may get overwritten
property_non_invasive_recommendations, patch = req_data.non_invasive_recommendations, req_data.patch
@ -819,6 +826,7 @@ async def model_engine(body: PlanTriggerRequest):
input_properties.append(
Property(
id=property_id,
uprn=addr.uprn,
is_new=is_new,
address=epc_searcher.address_clean,
postcode=epc_searcher.postcode_clean,