From ba230a2ff8f23809f9c86aa20324da7d98cab0d6 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 28 Nov 2025 12:43:02 +0000 Subject: [PATCH] stringify task id to fix bugs --- backend/Property.py | 52 ++++++++++-------- backend/app/assumptions.py | 1 + backend/app/plan/router.py | 4 +- .../Nov 2025 Consulting Project/data_prep.py | 53 ++++++++++++++++++- 4 files changed, 83 insertions(+), 27 deletions(-) diff --git a/backend/Property.py b/backend/Property.py index 58909c40..6148b40a 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -598,15 +598,7 @@ class Property: if not self.data: raise ValueError("Property does not contain data") - components = [ - 'floor-description', 'hotwater-description', 'main-fuel', 'mainheat-description', - 'mainheatcont-description', 'roof-description', 'walls-description', 'windows-description', - 'lighting-description' - ] - - for description in components: - - cleaner_cls = all_cleaner_map[description] + for description, attribute in cleaned.items(): if self.data[description] in self.DATA_ANOMALY_MATCHES: template = cleaned[description][0] @@ -624,22 +616,35 @@ class Property: ) continue - if description == "lighting-description": - cleaner_cls = cleaner_cls(self.data[description], averages=None) - else: - cleaner_cls = cleaner_cls(self.data[description]) + attributes = [ + x + for x in cleaned[description] + if x["original_description"] == self.data[description] + ] - processed = { - "original_description": self.data[description], - "clean_description": cleaner_cls.description.replace( - "(assumed)", "" + if len(attributes) > 1: + raise ValueError( + "Either No attributes or multiple found for %s" % description ) - .rstrip() - .capitalize(), - **cleaner_cls.process(), - } - attributes = [processed] + if len(attributes) == 0: + # We attempt to perform the clean on the fly + cleaner_cls = all_cleaner_map[description] + if description == "lighting-description": + cleaner_cls = cleaner_cls(self.data[description], averages=None) + else: + cleaner_cls = cleaner_cls(self.data[description]) + processed = { + "original_description": self.data[description], + "clean_description": cleaner_cls.description.replace( + "(assumed)", "" + ) + .rstrip() + .capitalize(), + **cleaner_cls.process(), + } + + attributes = [processed] setattr(self, self.ATTRIBUTE_MAP[description], attributes[0]) @@ -1340,7 +1345,8 @@ class Property: # If the property currently has an electric boiler, it will still benefit from the ASHP efficiency gain remap_fuel_sources = [ "Natural Gas", "LPG", "Wood Logs", "Oil", "Electricity", "Coal", "Smokeless Fuel", - "Natural Gas + Solar Thermal", "Anthracite", "Wood Pellets", "LPG + Solar Thermal" + "Natural Gas + Solar Thermal", "Anthracite", "Wood Pellets", "LPG + Solar Thermal", + "Natural Gas (Community Scheme)" ] heating_energy_source = self.heating_energy_source diff --git a/backend/app/assumptions.py b/backend/app/assumptions.py index 492b9042..1c46a5c8 100644 --- a/backend/app/assumptions.py +++ b/backend/app/assumptions.py @@ -88,6 +88,7 @@ DESCRIPTIONS_TO_FUEL_TYPES = { "Electric ceiling heating": {"fuel": "Electricity", "cop": 1}, "Boiler and radiators, wood chips": {"fuel": "Wood Logs", "cop": 0.85}, "Oil range cooker, no cylinder thermostat": {"fuel": "Oil", "cop": 0.85}, + "Air source heat pump, Warm air, electric": {"fuel": "Electricity", "cop": AVERAGE_ASHP_EFFICIENCY / 100}, } # These are the measure types where if there is a ventilation recommendation, we force the inclusion of it diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 5611a53d..b53da6c6 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -135,8 +135,8 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest): inputs=data, task_only=False ) - data["task_id"] = task_id - data["subtask_id"] = subtask_id + data["task_id"] = str(task_id) + data["subtask_id"] = str(subtask_id) message_body = json.dumps(data) response = sqs_client.send_message( QueueUrl=settings.ENGINE_SQS_URL, diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/data_prep.py b/etl/customers/peabody/Nov 2025 Consulting Project/data_prep.py index 77736aff..e05d82e4 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/data_prep.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/data_prep.py @@ -214,14 +214,26 @@ archetypes_85 = archetypes[archetypes["Cumulative Proportion"] <= 0.80] archetypes_85["Archetypes_85_reference"] = archetypes_85.index + 1 archetypes_85["Archetypes_85_reference"] = "Archetype_Sample_" + archetypes_85["Archetypes_85_reference"].astype(str) +# For the sample, look for invalid looking UPRNs and remove them. +sample_from = sustainability_data.copy() +# 1) Check for UPRNs that are not numeric or begin with a Zero +sample_from["uprn_not_numeric"] = ~sample_from["UPRN"].apply(lambda x: str(x).isnumeric()) +sample_from = sample_from[~sample_from["uprn_not_numeric"]] + +sample_from["uprn_has_leading_zero"] = sample_from["UPRN"].apply(lambda x: str(x).startswith("0")) +sample_from = sample_from[~sample_from["uprn_has_leading_zero"]] + +sample_from = sample_from[~pd.isnull(sample_from["UPRN"])] + # We now take a sample of the properties that represent 85% of the total properties -sustainability_data = sustainability_data.merge( +sample_from = sample_from.merge( archetypes_85, on=archetype_variables, how="inner" ) + # We take 1 random property, by archetype 85 reference -modelling_sample = sustainability_data.groupby("Archetypes_85_reference").apply( +modelling_sample = sample_from.groupby("Archetypes_85_reference").apply( lambda x: x.sample(1, random_state=42) ).reset_index(drop=True) @@ -238,6 +250,43 @@ for col in archetype_variables: print(f"--- {col} ---") print(compare_distributions(sustainability_data, modelling_sample, col)) +# prepare +modelling_sample["domna_property_id"] = modelling_sample.index + 1 +# Rename +modelling_sample = modelling_sample.rename( + columns={ + "Org Ref": "landlord_property_id", "Address 1": "domna_address_1", + "Postcode": "postcode", "Type": "landlord_property_type", + "Attachment": "landlord_built_form", + "Heating": "landlord_heating_system", + "UPRN": "epc_os_uprn" + } +) + +modelling_sample["domna_built_form"] = modelling_sample["domna_built_form"].map( + { + "MidTerrace": "Mid-Terrace", + "EndTerrace": "End-Terrace", + "SemiDetached": "Semi-Detached", + "Detached": "Detached", + "EnclosedEndTerrace": "Enclosed End-Terrace", + "EnclosedMidTerrace": "Enclosed Mid-Terrace", + } +) + +if pd.isnull(modelling_sample["domna_built_form"]).sum(): + raise ValueError("Some built forms are null after mapping") + + +# Placeholder copies +def make_full_address(x): + to_join = [x['domna_address_1'], x['Address 2'], x['Address 3']] + to_join = [x for x in to_join if not pd.isnull(x) and x != ''] + return ", ".join(to_join) + + +modelling_sample["domna_full_address"] = modelling_sample.apply(lambda x: make_full_address(x), axis=1) + # Save this CSV as input modelling_sample.to_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/modelling_sample.xlsx",