fixing ventilation negative kwh

2026-07-27 23:35:01 +00:00 · 2025-03-23 18:48:22 +00:00 · 2025-03-23 18:48:22 +00:00 · 1d48ede60e
commit 1d48ede60e
parent 746c42594c
8 changed files with 141 additions and 68 deletions
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@ -7,7 +7,7 @@
      <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
      <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
    </content>
-    <orderEntry type="jdk" jdkName="AssetList" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Fastapi-backend" jdkType="Python SDK" />
    <orderEntry type="sourceFolder" forTests="false" />
  </component>
  <component name="PyNamespacePackagesService">
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@ -3,7 +3,7 @@
  <component name="Black">
    <option name="sdkName" value="Python 3.10 (backend)" />
  </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="AssetList" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Fastapi-backend" project-jdk-type="Python SDK" />
  <component name="PyCharmProfessionalAdvertiser">
    <option name="shown" value="true" />
  </component>
--- a/asset_list/AssetList.py
+++ b/asset_list/AssetList.py
@ -1249,20 +1249,19 @@ class AssetList:
                (self.standardised_asset_list["non-intrusives: Insulated"].isin(["RETRO DRILLED", "FILLED AT BUILD"])) &
                (~self.standardised_asset_list['non-intrusives: Material'].isin(
                    ["GREY LOOSE BEAD", "COMPACTED BEAD", "FIBRE BATT NO CAVITY", "EMPTY NARROW BELOW 30mm"]
-                )
-                 )
+                ))
            )

            self.standardised_asset_list["non_intrusive_indicates_cavity_extraction"] = (
                extraction_wall_filter & (
                self.standardised_asset_list[self.ATTRIBUTE_SAP_THRESHOLD_AND_BELOW]
-            )
-            )
+            ))

            # Also include work without the SAP filter as optimistic
            self.standardised_asset_list["non_intrusive_indicates_cavity_extraction_no_sap_filter"] = (
-                extraction_wall_filter
-            )
+                extraction_wall_filter & (
+                ~self.standardised_asset_list[self.ATTRIBUTE_SAP_THRESHOLD_AND_BELOW]
+            ))

        elif self.old_format_non_intrusives_present:
            print("Review these categories with Kieran")
--- a/asset_list/app.py
+++ b/asset_list/app.py
@ -418,7 +418,7 @@ def app():
    epc_df = pd.concat(epc_data)
    epc_df["estimated"] = epc_df["estimated"].fillna(False)

-    z = epc_df[epc_df["domna_property_id"] == eg["domna_property_id"].values[0]]
+    epc_df["number-habitable-rooms"].mean() + 1

    # We expand out the recommendations
    recommendations_df = epc_df[[asset_list.DOMNA_PROPERTY_ID, "recommendations"]]
@ -545,26 +545,19 @@ def app():
        right_on=asset_list.STANDARD_LANDLORD_PROPERTY_ID
    )
    cavity_fills["cavity_reason"] = cavity_fills["cavity_reason"].fillna("Not identified")
-    cavity_fills["cavity_reason"].value_counts()
+    print(cavity_fills["cavity_reason"].value_counts())
    # Didn't identify 3 properties because they're bedsits
    # 4 properties were identified, not based on the non-intrusives but instead because
    # Westward said they were built in 2003/2007. Have adjusted this to use the age from the
    # epc as well, as EPC says 1975 and they look like 1975 properties
-    # 58 properties flagged as already having solar:
-    #
-
-    z = cavity_fills[
-        cavity_fills["cavity_reason"] == "Non-Intrusive Data Showed Empty Cavity - property already has solar"
-        ]
-
-    df = asset_list.standardised_asset_list[
-        asset_list.standardised_asset_list[asset_list.STANDARD_LANDLORD_PROPERTY_ID].isin(
-            z[asset_list.landlord_property_id].values)
-    ]
-    eg = df[df[asset_list.STANDARD_LANDLORD_PROPERTY_ID] == "TOTNEWINA0102300"]
-
-    z[["Address", "WFT EDIT Postcode", asset_list.landlord_property_id]]
-    z[[asset_list.STANDARD_FULL_ADDRESS, asset_list.STANDARD_POSTCODE, asset_list.ATTRIBUTE_HAS_SOLAR]]
+    # 37 properties flagged as already having solar - these are all because the landlord said they have solar
+    # e.g.
+    # https://earth.google.com/web/search/11+Winsland+Avenue+TOTNES+TQ9+5FT/@50.43354465,-3.71318276,46.57468503a,
+    # 59.14004365d,35y,0h,0t,
+    # 0r/data=CpABGmISXAolMHg0ODZkMWQxOGE4NWRiZjdkOjB4YjBhM2E5M2Q3YWVlMWEwYhlZYgp7fzdJQCHFfC9027QNwCohMTEgV2luc2xhbmQgQXZlbnVlIFRPVE5FUyBUUTkgNUZUGAIgASImCiQJbxsQEoo3SUARXQcp_HE3SUAZBmiZGJ6yDcAhCA0fqq63DcBCAggBOgMKATBCAggASg0I____________ARAA
+    # https://earth.google.com/web/search/15+St+Anne%27s+Ct,+Newton+Abbot+TQ12+1TL/@50.53068337,-3.61611128,
+    # 11.74908956a,135.73212429d,35y,0h,0t,
+    # 0r/data=CpUBGmcSYQolMHg0ODZkMDVkMjFhODhjZjgxOjB4MjBmMzE2Zjc3MGI2NGMwYxlCxHLw8UNJQCFZqyzALe4MwComMTUgU3QgQW5uZSdzIEN0LCBOZXd0b24gQWJib3QgVFExMiAxVEwYAiABIiYKJAm-r6U2iDdJQBHS5ICRdDdJQBmYGVpmiLINwCG8wcrtqbYNwEICCAE6AwoBMEICCABKDQj___________8BEAA

    # Check 2)
    cavity_fills_with_solar = pd.read_excel(
@ -580,37 +573,51 @@ def app():
        right_on=asset_list.STANDARD_LANDLORD_PROPERTY_ID
    )
    cavity_fills_with_solar["cavity_reason"] = cavity_fills_with_solar["cavity_reason"].fillna("Not identified")
+    print(cavity_fills_with_solar["cavity_reason"].value_counts())
    # 203 properties total
    # 140 properties were flagged up based on non-intrusives (Non-Intrusive Data Showed Empty Cavity)
+    # 63 property already has solar

-    check = cavity_fills_with_solar[
-        cavity_fills_with_solar["cavity_reason"] == "Non-Intrusive Data Showed Empty Cavity"
-        ]
-    z = asset_list.standardised_asset_list[
-        asset_list.standardised_asset_list[asset_list.STANDARD_LANDLORD_PROPERTY_ID].isin(
-            check[asset_list.landlord_property_id].values)
+    # Check 3) RDF
+    rdf = pd.read_excel(
+        os.path.join(data_folder, "WESTWARD - Route March Prep.xlsx"),
+        sheet_name="RDF CIGA checks"
+    )
+    rdf = rdf.merge(
+        asset_list.standardised_asset_list[
+            [asset_list.STANDARD_LANDLORD_PROPERTY_ID, "cavity_reason", "solar_reason"]
+        ],
+        how="left",
+        left_on=asset_list.landlord_property_id,
+        right_on=asset_list.STANDARD_LANDLORD_PROPERTY_ID
+    )
+    rdf["cavity_reason"] = rdf["cavity_reason"].fillna("Not identified")
+    print(rdf["cavity_reason"].value_counts())
+    # 264 properties are not identified, 261 of which are due to the fact they contain materials
+    # The other 3 were determined to be eligible for solar instead
+    # Many of these units that were identified for rdf works could be solar jobs
+
+    rdf_with_solar = pd.read_excel(
+        os.path.join(data_folder, "WESTWARD - Route March Prep.xlsx"),
+        sheet_name="Solar PV - RDF CIGA Checks"
+    )
+    rdf_with_solar = rdf_with_solar.merge(
+        asset_list.standardised_asset_list[
+            [asset_list.STANDARD_LANDLORD_PROPERTY_ID, "cavity_reason", "solar_reason"]
+        ],
+        how="left",
+        left_on=asset_list.landlord_property_id,
+        right_on=asset_list.STANDARD_LANDLORD_PROPERTY_ID
+    )
+    rdf_with_solar["cavity_reason"] = rdf_with_solar["cavity_reason"].fillna("Not identified")
+    rdf_with_solar["cavity_reason"].value_counts()
+
+    # All others identified - some flagged as empties due to EPC or landlord data suggesting as much
+    # 5 not identified due to containing COMPACTED BEAD
+
+    asset_list.standardised_asset_list = asset_list.standardised_asset_list[
+        asset_list.standardised_asset_list[asset_list.landlord_property_id]
    ]
-    z[asset_list.ATTRIBUTE_HAS_SOLAR].value_counts()
-    pd.set_option('display.max_columns', None)
-    z[[asset_list.STANDARD_FULL_ADDRESS, asset_list.STANDARD_POSTCODE, asset_list.ATTRIBUTE_HAS_SOLAR]]
-
-    not_flagged = asset_list.standardised_asset_list[
-        pd.isnull(asset_list.standardised_asset_list["solar_reason"])
-    ]
-    # For everything not flagged for solar, identify why
-    reasons = []
-    for _, x in not_flagged.iterrows():
-        if x[asset_list.STANDARD_PROPERTY_TYPE] == "flat":
-            reason = "property is a flat"
-        else:
-            x[asset_list.EPC_API_DATA_NAMES["mainheat-description"]]
-
-            reasons.append(
-                {
-                    asset_list.DOMNA_PROPERTY_ID: x["asset_list.DOMNA_PROPERTY_ID"],
-                    "solar_exclusion_reason": reason,
-                }
-            )

    asset_list.load_contact_details(
        local_filepath=os.path.join(data_folder, "Full property list wth D&V report V look up 12.2.25.xlsx"),
--- a/backend/Property.py
+++ b/backend/Property.py
@ -462,7 +462,7 @@ class Property:
        if self.simulation_epcs is None:
            raise ValueError("Simulation EPCs have not been created")

-        rec_ids = sorted(list(self.simulation_epcs.keys()))
+        rec_ids = list(self.simulation_epcs.keys())
        updated_simulation_epcs = []
        for rec_id in rec_ids:
            sim_epc = self.simulation_epcs[rec_id].copy()
@ -488,8 +488,6 @@ class Property:
        # Now we havet this data inthe
        self.updated_simulation_epcs = updated_simulation_epcs

-        return updated_simulation_epcs
-
    @staticmethod
    def create_recommendation_scoring_data(
        property_id,
--- a/etl/customers/mod/pilot/2.
+++ b/etl/customers/mod/pilot/2.
@ -78,7 +78,7 @@ def app():

    # Set the inputs:
    portfolio_id = 139
-    scenario_ids = [233, 234]
+    scenario_ids = [237, 238]

    properties_data, plans_data, recommendations_data = get_data(
        portfolio_id=portfolio_id, scenario_ids=scenario_ids
@ -299,6 +299,9 @@ def app():
            [
                "property_id", "uprn", "address", "postcode", "property_type", "walls", "roof", "heating", "windows",
                "current_epc_rating", "current_sap_points", "total_floor_area", "number_of_rooms",
+                "co2_emissions", "current_energy_demand", "current_energy_demand_heating_hotwater",
+                "heating_cost_current", "hot_water_cost_current", "lighting_cost_current",
+                "appliances_cost_current", "gas_standing_charge", "electricity_standing_charge"
            ]
        ].merge(
            recommendations_measures_pivot, how="left", on="property_id"
@ -306,6 +309,11 @@ def app():
            aggregated_metrics, how="left", on="property_id"
        )

+        df["bills_total_cost"] = (
+            df["heating_cost_current"] + df["hot_water_cost_current"] + df["lighting_cost_current"] +
+            df["appliances_cost_current"] + df["gas_standing_charge"] + df["electricity_standing_charge"]
+        )
+
        df = df.drop(columns=["property_id"])
        for c in ["sap_points", "co2_equivalent_savings", "energy_cost_savings", "kwh_savings"]:
            df[c] = df[c].fillna(0)
@ -332,6 +340,11 @@ def app():
        df["Predicted Post Works SAP"] = df["Predicted Post Works SAP"].round()
        df["Predicted Post Works EPC"] = df["Predicted Post Works SAP"].apply(lambda x: sap_to_epc(x))

+        # Calculate the relative savings on carbon, kwh, and bills
+        df["relative_carbon_savings"] = df["co2_equivalent_savings"] / df["co2_emissions"]
+        df["relative_kwh_savings"] = df["kwh_savings"] / df["current_energy_demand"]
+        df["relative_bill_savings"] = df["energy_cost_savings"] / df["bills_total_cost"]
+
        # For properties that don't make it to EPC B, check why. E.g. for a property that has an oil boiler, it
        # the bills go up recommending HHRSH, so it doesn't make it to EPC B
        # For mid-terrace units, use the ordnance survey API to check if there is space for a heat pump?
@ -360,13 +373,47 @@ def app():

        scenario_data[scenario] = df

-    measure_counts = {}
-    for scenario in scenario_ids:
-        recommendation_cols = [c for c in scenario_data[scenario].columns if "Recommendation:" in c]
-        measure_counts[scenario] = scenario_data[scenario][recommendation_cols].sum().to_dict()
+    printing_scenario_id = scenario_ids[0]
+    # EPC breakdown
+    print(scenario_data[printing_scenario_id]['Predicted Post Works EPC'].value_counts())
+    # Cost
+    # Total cost
+    print(scenario_data[printing_scenario_id]["total_cost"].sum())
+    # Base cost
+    print(scenario_data[printing_scenario_id]["estimated_cost"].sum())
+    # Contingency
+    print(scenario_data[printing_scenario_id]["contingency"].sum())
+    # Costs averaged per unit
+    print(scenario_data[printing_scenario_id]["total_cost"].mean())
+    print(scenario_data[printing_scenario_id]["estimated_cost"].mean())
+    print(scenario_data[printing_scenario_id]["contingency"].mean())

-    pprint(measure_counts[scenario_ids[0]])
-    pprint(measure_counts[scenario_ids[1]])
+    # Average relative savings
+    print(scenario_data[printing_scenario_id]["relative_carbon_savings"].mean())
+    print(scenario_data[printing_scenario_id]["relative_kwh_savings"].mean())
+    print(scenario_data[printing_scenario_id]["relative_bill_savings"].mean())
+
+    measure_details = {}
+    for scenario in scenario_ids:
+        measure_details[scenario] = {}
+        recommendation_cols = [c for c in scenario_data[scenario].columns if "Recommendation:" in c]
+        measure_details[scenario]["count"] = scenario_data[scenario][recommendation_cols].sum().to_dict()
+        # Get average cost per measure
+        measure_columns = [
+            c.split("Recommendation: ")[1] for c in scenario_data[scenario].columns if "Recommendation:" in c
+        ]
+        # Take the mean, drop zero columns
+        measure_costs = {}
+        for m in measure_columns:
+            measure_costs[m] = float(scenario_data[scenario][scenario_data[scenario][m] > 0][m].mean())
+        measure_details[scenario]["cost_per_measure"] = measure_costs
+
+    pprint(measure_details[scenario_ids[0]]["count"])
+    pprint(measure_details[scenario_ids[1]]["count"])
+
+    # Cost per measures
+    pprint(measure_details[scenario_ids[0]]["cost_per_measure"])
+    pprint(measure_details[scenario_ids[1]]["cost_per_measure"])

    # Do not get to EPC B:
    # 5 are flats
@ -392,13 +439,20 @@ def app():
    scenario_metrics = {}
    for scenario in scenario_ids:
        df = scenario_data[scenario].copy()
-        df["cost_per_sap_point"] = df["total_cost"] / df["sap_points"]
-        df["cost_per_carbon"] = df["total_cost"] / df["co2_equivalent_savings"]
+
        avg_savings = df[
            ["sap_points", "co2_equivalent_savings", "energy_cost_savings", "kwh_savings", "estimated_cost",
-             "cost_per_sap_point", "cost_per_carbon", "total_cost", "contingency"]
+             "total_cost", "contingency"]
        ].mean().to_dict()
+        avg_savings["cost_per_sap_point"] = avg_savings["total_cost"] / avg_savings["sap_points"]
+        avg_savings["cost_per_carbon"] = avg_savings["total_cost"] / avg_savings["co2_equivalent_savings"]
+        scenario_metrics[scenario] = avg_savings
+
+    pprint(scenario_metrics[scenario_ids[0]])
+    pprint(scenario_metrics[scenario_ids[1]])

    # TODO: Add a slide on valuation improvement, on a sample of properties?

    # TODO: Read in costing data and breakdown
+
+    zz = scenario_recommendations_df[scenario_recommendations_df["type"] == "mechanical_ventilation"]
--- a/recommendations/Costs.py
+++ b/recommendations/Costs.py
@ -104,7 +104,7 @@ INSTALLER_ASHP_COSTS = [
 BOILER_UPGRADE_SCHEME_ASHP_VALUE = 7500

 INSTALLER_SOLAR_BATTERY_COSTS = [
-    {'capacity_kwh': 5, 'description': 'Battery Add on', 'cost': 2030.40, 'installer': 'CEG'},
+    {'capacity_kwh': 5, 'description': 'Battery Add on', 'cost': 3769.89, 'installer': 'JJC'},
    # {'capacity_kwh': 10, 'description': 'Battery Add on', 'cost': 4300.00, 'installer': 'CEG'},
    # {'capacity_kwh': 5, 'description': 'Battery Retrofit existing system', 'cost': 4250.00, 'installer': 'CEG'},
    # {'capacity_kwh': 10, 'description': 'Battery Retrofit Existing system', 'cost': 5950.00, 'installer': 'CEG'}
@ -193,6 +193,8 @@ class Costs:
    # fittings and trimming doors, as well as scope for damage to the existing wall during preparation.
    IWI_CONTINGENCY = 0.2

+    # For air source heat pumps, we inflate the assume cost by quite a bit to account for design and installation
+    ASHP_CONTINGENCY = 0.35
    # Where there is more uncertainty, a higher contingency rate is used
    HIGH_RISK_CONTINGENCY = 0.2
    # When there is less uncertainty, a lower contingency rate is used
@ -1168,7 +1170,7 @@ class Costs:
            cost = [x for x in INSTALLER_ASHP_COSTS if x][0]["cost"]

        # We add some contingency since there are additional costs such as resizing radiators, that could be required
-        subtotal = cost * (1 + self.HIGH_RISK_CONTINGENCY)
+        subtotal = cost * (1 + self.ASHP_CONTINGENCY)
        # The costs from installers exclude VAT
        vat = subtotal * self.VAT_RATE
        total_cost = subtotal + vat
--- a/recommendations/Recommendations.py
+++ b/recommendations/Recommendations.py
@ -793,13 +793,26 @@ class Recommendations:
            ]
        ).sort_values(["phase", "recommendation_id"], ascending=True).reset_index(drop=True)

+        # We need the recommendaion type
+        rec_id_to_type = {
+            rec["recommendation_id"]: rec["type"] for recs in property_recommendations for rec in recs
+        }
+        rec_id_to_type[STARTING_DUMMY_ID_VALUE] = "starting_dummy"
+
        for i in range(0, len(kwh_impact_table)):
-            current_phase = kwh_impact_table.loc[i, 'phase']
+            current = kwh_impact_table.loc[i]
+            current_phase = current['phase']
            previous_phase_id = (current_phase - 1) if (current_phase > 0) else -9999
            previous_phase = kwh_impact_table[kwh_impact_table['phase'] == previous_phase_id]

            if not previous_phase.empty:
                for col in ["predictions_heating", "predictions_hotwater"]:
+                    # Check if the recommendation type is ventilation
+                    if rec_id_to_type[current["recommendation_id"]] == "mechanical_ventilation":
+                        # We expect the kwh to increase
+                        if kwh_impact_table.loc[i, col] > previous_phase[col].max():
+                            continue
+
                    if kwh_impact_table.loc[i, col] > previous_phase[col].max():
                        kwh_impact_table.loc[i, col] = previous_phase[col].max()