stringify task id to fix bugs

This commit is contained in:
Khalim Conn-Kowlessar 2025-11-28 12:43:02 +00:00
parent ac4f4b52a1
commit ba230a2ff8
4 changed files with 83 additions and 27 deletions

View file

@ -598,15 +598,7 @@ class Property:
if not self.data:
raise ValueError("Property does not contain data")
components = [
'floor-description', 'hotwater-description', 'main-fuel', 'mainheat-description',
'mainheatcont-description', 'roof-description', 'walls-description', 'windows-description',
'lighting-description'
]
for description in components:
cleaner_cls = all_cleaner_map[description]
for description, attribute in cleaned.items():
if self.data[description] in self.DATA_ANOMALY_MATCHES:
template = cleaned[description][0]
@ -624,22 +616,35 @@ class Property:
)
continue
if description == "lighting-description":
cleaner_cls = cleaner_cls(self.data[description], averages=None)
else:
cleaner_cls = cleaner_cls(self.data[description])
attributes = [
x
for x in cleaned[description]
if x["original_description"] == self.data[description]
]
processed = {
"original_description": self.data[description],
"clean_description": cleaner_cls.description.replace(
"(assumed)", ""
if len(attributes) > 1:
raise ValueError(
"Either No attributes or multiple found for %s" % description
)
.rstrip()
.capitalize(),
**cleaner_cls.process(),
}
attributes = [processed]
if len(attributes) == 0:
# We attempt to perform the clean on the fly
cleaner_cls = all_cleaner_map[description]
if description == "lighting-description":
cleaner_cls = cleaner_cls(self.data[description], averages=None)
else:
cleaner_cls = cleaner_cls(self.data[description])
processed = {
"original_description": self.data[description],
"clean_description": cleaner_cls.description.replace(
"(assumed)", ""
)
.rstrip()
.capitalize(),
**cleaner_cls.process(),
}
attributes = [processed]
setattr(self, self.ATTRIBUTE_MAP[description], attributes[0])
@ -1340,7 +1345,8 @@ class Property:
# If the property currently has an electric boiler, it will still benefit from the ASHP efficiency gain
remap_fuel_sources = [
"Natural Gas", "LPG", "Wood Logs", "Oil", "Electricity", "Coal", "Smokeless Fuel",
"Natural Gas + Solar Thermal", "Anthracite", "Wood Pellets", "LPG + Solar Thermal"
"Natural Gas + Solar Thermal", "Anthracite", "Wood Pellets", "LPG + Solar Thermal",
"Natural Gas (Community Scheme)"
]
heating_energy_source = self.heating_energy_source

View file

@ -88,6 +88,7 @@ DESCRIPTIONS_TO_FUEL_TYPES = {
"Electric ceiling heating": {"fuel": "Electricity", "cop": 1},
"Boiler and radiators, wood chips": {"fuel": "Wood Logs", "cop": 0.85},
"Oil range cooker, no cylinder thermostat": {"fuel": "Oil", "cop": 0.85},
"Air source heat pump, Warm air, electric": {"fuel": "Electricity", "cop": AVERAGE_ASHP_EFFICIENCY / 100},
}
# These are the measure types where if there is a ventilation recommendation, we force the inclusion of it

View file

@ -135,8 +135,8 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest):
inputs=data,
task_only=False
)
data["task_id"] = task_id
data["subtask_id"] = subtask_id
data["task_id"] = str(task_id)
data["subtask_id"] = str(subtask_id)
message_body = json.dumps(data)
response = sqs_client.send_message(
QueueUrl=settings.ENGINE_SQS_URL,

View file

@ -214,14 +214,26 @@ archetypes_85 = archetypes[archetypes["Cumulative Proportion"] <= 0.80]
archetypes_85["Archetypes_85_reference"] = archetypes_85.index + 1
archetypes_85["Archetypes_85_reference"] = "Archetype_Sample_" + archetypes_85["Archetypes_85_reference"].astype(str)
# For the sample, look for invalid looking UPRNs and remove them.
sample_from = sustainability_data.copy()
# 1) Check for UPRNs that are not numeric or begin with a Zero
sample_from["uprn_not_numeric"] = ~sample_from["UPRN"].apply(lambda x: str(x).isnumeric())
sample_from = sample_from[~sample_from["uprn_not_numeric"]]
sample_from["uprn_has_leading_zero"] = sample_from["UPRN"].apply(lambda x: str(x).startswith("0"))
sample_from = sample_from[~sample_from["uprn_has_leading_zero"]]
sample_from = sample_from[~pd.isnull(sample_from["UPRN"])]
# We now take a sample of the properties that represent 85% of the total properties
sustainability_data = sustainability_data.merge(
sample_from = sample_from.merge(
archetypes_85,
on=archetype_variables,
how="inner"
)
# We take 1 random property, by archetype 85 reference
modelling_sample = sustainability_data.groupby("Archetypes_85_reference").apply(
modelling_sample = sample_from.groupby("Archetypes_85_reference").apply(
lambda x: x.sample(1, random_state=42)
).reset_index(drop=True)
@ -238,6 +250,43 @@ for col in archetype_variables:
print(f"--- {col} ---")
print(compare_distributions(sustainability_data, modelling_sample, col))
# prepare
modelling_sample["domna_property_id"] = modelling_sample.index + 1
# Rename
modelling_sample = modelling_sample.rename(
columns={
"Org Ref": "landlord_property_id", "Address 1": "domna_address_1",
"Postcode": "postcode", "Type": "landlord_property_type",
"Attachment": "landlord_built_form",
"Heating": "landlord_heating_system",
"UPRN": "epc_os_uprn"
}
)
modelling_sample["domna_built_form"] = modelling_sample["domna_built_form"].map(
{
"MidTerrace": "Mid-Terrace",
"EndTerrace": "End-Terrace",
"SemiDetached": "Semi-Detached",
"Detached": "Detached",
"EnclosedEndTerrace": "Enclosed End-Terrace",
"EnclosedMidTerrace": "Enclosed Mid-Terrace",
}
)
if pd.isnull(modelling_sample["domna_built_form"]).sum():
raise ValueError("Some built forms are null after mapping")
# Placeholder copies
def make_full_address(x):
to_join = [x['domna_address_1'], x['Address 2'], x['Address 3']]
to_join = [x for x in to_join if not pd.isnull(x) and x != '']
return ", ".join(to_join)
modelling_sample["domna_full_address"] = modelling_sample.apply(lambda x: make_full_address(x), axis=1)
# Save this CSV as input
modelling_sample.to_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/modelling_sample.xlsx",