stringify task id to fix bugs

2026-07-27 23:35:01 +00:00 · 2025-11-28 12:43:02 +00:00 · 2025-11-28 12:43:02 +00:00 · ba230a2ff8
commit ba230a2ff8
parent ac4f4b52a1
4 changed files with 83 additions and 27 deletions
--- a/backend/Property.py
+++ b/backend/Property.py
@ -598,15 +598,7 @@ class Property:
        if not self.data:
            raise ValueError("Property does not contain data")

-        components = [
-            'floor-description', 'hotwater-description', 'main-fuel', 'mainheat-description',
-            'mainheatcont-description', 'roof-description', 'walls-description', 'windows-description',
-            'lighting-description'
-        ]
-
-        for description in components:
-
-            cleaner_cls = all_cleaner_map[description]
+        for description, attribute in cleaned.items():

            if self.data[description] in self.DATA_ANOMALY_MATCHES:
                template = cleaned[description][0]
@ -624,22 +616,35 @@ class Property:
                )
                continue

-            if description == "lighting-description":
-                cleaner_cls = cleaner_cls(self.data[description], averages=None)
-            else:
-                cleaner_cls = cleaner_cls(self.data[description])
+            attributes = [
+                x
+                for x in cleaned[description]
+                if x["original_description"] == self.data[description]
+            ]

-            processed = {
-                "original_description": self.data[description],
-                "clean_description": cleaner_cls.description.replace(
-                    "(assumed)", ""
+            if len(attributes) > 1:
+                raise ValueError(
+                    "Either No attributes or multiple found for %s" % description
                )
-                .rstrip()
-                .capitalize(),
-                **cleaner_cls.process(),
-            }

-            attributes = [processed]
+            if len(attributes) == 0:
+                # We attempt to perform the clean on the fly
+                cleaner_cls = all_cleaner_map[description]
+                if description == "lighting-description":
+                    cleaner_cls = cleaner_cls(self.data[description], averages=None)
+                else:
+                    cleaner_cls = cleaner_cls(self.data[description])
+                processed = {
+                    "original_description": self.data[description],
+                    "clean_description": cleaner_cls.description.replace(
+                        "(assumed)", ""
+                    )
+                    .rstrip()
+                    .capitalize(),
+                    **cleaner_cls.process(),
+                }
+
+                attributes = [processed]

            setattr(self, self.ATTRIBUTE_MAP[description], attributes[0])

@ -1340,7 +1345,8 @@ class Property:
        # If the property currently has an electric boiler, it will still benefit from the ASHP efficiency gain
        remap_fuel_sources = [
            "Natural Gas", "LPG", "Wood Logs", "Oil", "Electricity", "Coal", "Smokeless Fuel",
-            "Natural Gas + Solar Thermal", "Anthracite", "Wood Pellets", "LPG + Solar Thermal"
+            "Natural Gas + Solar Thermal", "Anthracite", "Wood Pellets", "LPG + Solar Thermal",
+            "Natural Gas (Community Scheme)"
        ]

        heating_energy_source = self.heating_energy_source
--- a/backend/app/assumptions.py
+++ b/backend/app/assumptions.py
@ -88,6 +88,7 @@ DESCRIPTIONS_TO_FUEL_TYPES = {
    "Electric ceiling heating": {"fuel": "Electricity", "cop": 1},
    "Boiler and radiators, wood chips": {"fuel": "Wood Logs", "cop": 0.85},
    "Oil range cooker, no cylinder thermostat": {"fuel": "Oil", "cop": 0.85},
+    "Air source heat pump, Warm air, electric": {"fuel": "Electricity", "cop": AVERAGE_ASHP_EFFICIENCY / 100},
 }

 # These are the measure types where if there is a ventilation recommendation, we force the inclusion of it
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@ -135,8 +135,8 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest):
                inputs=data,
                task_only=False
            )
-            data["task_id"] = task_id
-            data["subtask_id"] = subtask_id
+            data["task_id"] = str(task_id)
+            data["subtask_id"] = str(subtask_id)
            message_body = json.dumps(data)
            response = sqs_client.send_message(
                QueueUrl=settings.ENGINE_SQS_URL,
--- a/etl/customers/peabody/Nov
+++ b/etl/customers/peabody/Nov
@ -214,14 +214,26 @@ archetypes_85 = archetypes[archetypes["Cumulative Proportion"] <= 0.80]
 archetypes_85["Archetypes_85_reference"] = archetypes_85.index + 1
 archetypes_85["Archetypes_85_reference"] = "Archetype_Sample_" + archetypes_85["Archetypes_85_reference"].astype(str)

+# For the sample, look for invalid looking UPRNs and remove them.
+sample_from = sustainability_data.copy()
+# 1) Check for UPRNs that are not numeric or begin with a Zero
+sample_from["uprn_not_numeric"] = ~sample_from["UPRN"].apply(lambda x: str(x).isnumeric())
+sample_from = sample_from[~sample_from["uprn_not_numeric"]]
+
+sample_from["uprn_has_leading_zero"] = sample_from["UPRN"].apply(lambda x: str(x).startswith("0"))
+sample_from = sample_from[~sample_from["uprn_has_leading_zero"]]
+
+sample_from = sample_from[~pd.isnull(sample_from["UPRN"])]
+
 # We now take a sample of the properties that represent 85% of the total properties
-sustainability_data = sustainability_data.merge(
+sample_from = sample_from.merge(
    archetypes_85,
    on=archetype_variables,
    how="inner"
 )
+
 # We take 1 random property, by archetype 85 reference
-modelling_sample = sustainability_data.groupby("Archetypes_85_reference").apply(
+modelling_sample = sample_from.groupby("Archetypes_85_reference").apply(
    lambda x: x.sample(1, random_state=42)
 ).reset_index(drop=True)

@ -238,6 +250,43 @@ for col in archetype_variables:
    print(f"--- {col} ---")
    print(compare_distributions(sustainability_data, modelling_sample, col))

+# prepare
+modelling_sample["domna_property_id"] = modelling_sample.index + 1
+# Rename
+modelling_sample = modelling_sample.rename(
+    columns={
+        "Org Ref": "landlord_property_id", "Address 1": "domna_address_1",
+        "Postcode": "postcode", "Type": "landlord_property_type",
+        "Attachment": "landlord_built_form",
+        "Heating": "landlord_heating_system",
+        "UPRN": "epc_os_uprn"
+    }
+)
+
+modelling_sample["domna_built_form"] = modelling_sample["domna_built_form"].map(
+    {
+        "MidTerrace": "Mid-Terrace",
+        "EndTerrace": "End-Terrace",
+        "SemiDetached": "Semi-Detached",
+        "Detached": "Detached",
+        "EnclosedEndTerrace": "Enclosed End-Terrace",
+        "EnclosedMidTerrace": "Enclosed Mid-Terrace",
+    }
+)
+
+if pd.isnull(modelling_sample["domna_built_form"]).sum():
+    raise ValueError("Some built forms are null after mapping")
+
+
+# Placeholder copies
+def make_full_address(x):
+    to_join = [x['domna_address_1'], x['Address 2'], x['Address 3']]
+    to_join = [x for x in to_join if not pd.isnull(x) and x != '']
+    return ", ".join(to_join)
+
+
+modelling_sample["domna_full_address"] = modelling_sample.apply(lambda x: make_full_address(x), axis=1)
+
 # Save this CSV as input
 modelling_sample.to_excel(
    "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/modelling_sample.xlsx",