From ba230a2ff8f23809f9c86aa20324da7d98cab0d6 Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 28 Nov 2025 12:43:02 +0000
Subject: [PATCH] stringify task id to fix bugs

---
 backend/Property.py                           | 52 ++++++++++--------
 backend/app/assumptions.py                    |  1 +
 backend/app/plan/router.py                    |  4 +-
 .../Nov 2025 Consulting Project/data_prep.py  | 53 ++++++++++++++++++-
 4 files changed, 83 insertions(+), 27 deletions(-)

diff --git a/backend/Property.py b/backend/Property.py
index 58909c40..6148b40a 100644
--- a/backend/Property.py
+++ b/backend/Property.py
@@ -598,15 +598,7 @@ class Property:
         if not self.data:
             raise ValueError("Property does not contain data")
 
-        components = [
-            'floor-description', 'hotwater-description', 'main-fuel', 'mainheat-description',
-            'mainheatcont-description', 'roof-description', 'walls-description', 'windows-description',
-            'lighting-description'
-        ]
-
-        for description in components:
-
-            cleaner_cls = all_cleaner_map[description]
+        for description, attribute in cleaned.items():
 
             if self.data[description] in self.DATA_ANOMALY_MATCHES:
                 template = cleaned[description][0]
@@ -624,22 +616,35 @@ class Property:
                 )
                 continue
 
-            if description == "lighting-description":
-                cleaner_cls = cleaner_cls(self.data[description], averages=None)
-            else:
-                cleaner_cls = cleaner_cls(self.data[description])
+            attributes = [
+                x
+                for x in cleaned[description]
+                if x["original_description"] == self.data[description]
+            ]
 
-            processed = {
-                "original_description": self.data[description],
-                "clean_description": cleaner_cls.description.replace(
-                    "(assumed)", ""
+            if len(attributes) > 1:
+                raise ValueError(
+                    "Either No attributes or multiple found for %s" % description
                 )
-                .rstrip()
-                .capitalize(),
-                **cleaner_cls.process(),
-            }
 
-            attributes = [processed]
+            if len(attributes) == 0:
+                # We attempt to perform the clean on the fly
+                cleaner_cls = all_cleaner_map[description]
+                if description == "lighting-description":
+                    cleaner_cls = cleaner_cls(self.data[description], averages=None)
+                else:
+                    cleaner_cls = cleaner_cls(self.data[description])
+                processed = {
+                    "original_description": self.data[description],
+                    "clean_description": cleaner_cls.description.replace(
+                        "(assumed)", ""
+                    )
+                    .rstrip()
+                    .capitalize(),
+                    **cleaner_cls.process(),
+                }
+
+                attributes = [processed]
 
             setattr(self, self.ATTRIBUTE_MAP[description], attributes[0])
 
@@ -1340,7 +1345,8 @@ class Property:
         # If the property currently has an electric boiler, it will still benefit from the ASHP efficiency gain
         remap_fuel_sources = [
             "Natural Gas", "LPG", "Wood Logs", "Oil", "Electricity", "Coal", "Smokeless Fuel",
-            "Natural Gas + Solar Thermal", "Anthracite", "Wood Pellets", "LPG + Solar Thermal"
+            "Natural Gas + Solar Thermal", "Anthracite", "Wood Pellets", "LPG + Solar Thermal",
+            "Natural Gas (Community Scheme)"
         ]
 
         heating_energy_source = self.heating_energy_source
diff --git a/backend/app/assumptions.py b/backend/app/assumptions.py
index 492b9042..1c46a5c8 100644
--- a/backend/app/assumptions.py
+++ b/backend/app/assumptions.py
@@ -88,6 +88,7 @@ DESCRIPTIONS_TO_FUEL_TYPES = {
     "Electric ceiling heating": {"fuel": "Electricity", "cop": 1},
     "Boiler and radiators, wood chips": {"fuel": "Wood Logs", "cop": 0.85},
     "Oil range cooker, no cylinder thermostat": {"fuel": "Oil", "cop": 0.85},
+    "Air source heat pump, Warm air, electric": {"fuel": "Electricity", "cop": AVERAGE_ASHP_EFFICIENCY / 100},
 }
 
 # These are the measure types where if there is a ventilation recommendation, we force the inclusion of it
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 5611a53d..b53da6c6 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -135,8 +135,8 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest):
                 inputs=data,
                 task_only=False
             )
-            data["task_id"] = task_id
-            data["subtask_id"] = subtask_id
+            data["task_id"] = str(task_id)
+            data["subtask_id"] = str(subtask_id)
             message_body = json.dumps(data)
             response = sqs_client.send_message(
                 QueueUrl=settings.ENGINE_SQS_URL,
diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/data_prep.py b/etl/customers/peabody/Nov 2025 Consulting Project/data_prep.py
index 77736aff..e05d82e4 100644
--- a/etl/customers/peabody/Nov 2025 Consulting Project/data_prep.py	
+++ b/etl/customers/peabody/Nov 2025 Consulting Project/data_prep.py	
@@ -214,14 +214,26 @@ archetypes_85 = archetypes[archetypes["Cumulative Proportion"] <= 0.80]
 archetypes_85["Archetypes_85_reference"] = archetypes_85.index + 1
 archetypes_85["Archetypes_85_reference"] = "Archetype_Sample_" + archetypes_85["Archetypes_85_reference"].astype(str)
 
+# For the sample, look for invalid looking UPRNs and remove them.
+sample_from = sustainability_data.copy()
+# 1) Check for UPRNs that are not numeric or begin with a Zero
+sample_from["uprn_not_numeric"] = ~sample_from["UPRN"].apply(lambda x: str(x).isnumeric())
+sample_from = sample_from[~sample_from["uprn_not_numeric"]]
+
+sample_from["uprn_has_leading_zero"] = sample_from["UPRN"].apply(lambda x: str(x).startswith("0"))
+sample_from = sample_from[~sample_from["uprn_has_leading_zero"]]
+
+sample_from = sample_from[~pd.isnull(sample_from["UPRN"])]
+
 # We now take a sample of the properties that represent 85% of the total properties
-sustainability_data = sustainability_data.merge(
+sample_from = sample_from.merge(
     archetypes_85,
     on=archetype_variables,
     how="inner"
 )
+
 # We take 1 random property, by archetype 85 reference
-modelling_sample = sustainability_data.groupby("Archetypes_85_reference").apply(
+modelling_sample = sample_from.groupby("Archetypes_85_reference").apply(
     lambda x: x.sample(1, random_state=42)
 ).reset_index(drop=True)
 
@@ -238,6 +250,43 @@ for col in archetype_variables:
     print(f"--- {col} ---")
     print(compare_distributions(sustainability_data, modelling_sample, col))
 
+# prepare
+modelling_sample["domna_property_id"] = modelling_sample.index + 1
+# Rename
+modelling_sample = modelling_sample.rename(
+    columns={
+        "Org Ref": "landlord_property_id", "Address 1": "domna_address_1",
+        "Postcode": "postcode", "Type": "landlord_property_type",
+        "Attachment": "landlord_built_form",
+        "Heating": "landlord_heating_system",
+        "UPRN": "epc_os_uprn"
+    }
+)
+
+modelling_sample["domna_built_form"] = modelling_sample["domna_built_form"].map(
+    {
+        "MidTerrace": "Mid-Terrace",
+        "EndTerrace": "End-Terrace",
+        "SemiDetached": "Semi-Detached",
+        "Detached": "Detached",
+        "EnclosedEndTerrace": "Enclosed End-Terrace",
+        "EnclosedMidTerrace": "Enclosed Mid-Terrace",
+    }
+)
+
+if pd.isnull(modelling_sample["domna_built_form"]).sum():
+    raise ValueError("Some built forms are null after mapping")
+
+
+# Placeholder copies
+def make_full_address(x):
+    to_join = [x['domna_address_1'], x['Address 2'], x['Address 3']]
+    to_join = [x for x in to_join if not pd.isnull(x) and x != '']
+    return ", ".join(to_join)
+
+
+modelling_sample["domna_full_address"] = modelling_sample.apply(lambda x: make_full_address(x), axis=1)
+
 # Save this CSV as input
 modelling_sample.to_excel(
     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/modelling_sample.xlsx",