From 746c42594c559791e6f50964724793516e2b9251 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 20 Mar 2025 15:15:36 +0000 Subject: [PATCH] debugging flattening of find my epc data --- asset_list/AssetList.py | 200 ++++++++++++++---- asset_list/app.py | 113 +++++++++- backend/apis/GoogleSolarApi.py | 4 +- .../mod/pilot/2. Create Excel Model.py | 14 +- 4 files changed, 278 insertions(+), 53 deletions(-) diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py index df16a314..eddeabdc 100644 --- a/asset_list/AssetList.py +++ b/asset_list/AssetList.py @@ -1024,6 +1024,28 @@ class AssetList: def identify_worktypes(self, cleaned): + # Before we being, we identify if a property has solar already as we use this + # for identifying cavity jobs + if self.non_intrusives_present: + existing_solar_non_intrusives_check = ( + self.standardised_asset_list["non-intrusives: PV, ACCESS ISSUE, SEE NOTES"] == "SOLAR PV ON ROOF" + ) + elif self.old_format_non_intrusives_present: + existing_solar_non_intrusives_check = ( + self.standardised_asset_list["non-intrusives: WFT Findings"].str.lower().str.strip().isin( + ["solar pv on roof"] + ) + ) + else: + # We don't have an indication + existing_solar_non_intrusives_check = False + + self.standardised_asset_list["property_has_solar"] = ( + (self.standardised_asset_list[self.STANDARD_EXISTING_PV] == "already has PV") | + existing_solar_non_intrusives_check | + (self.standardised_asset_list[self.ATTRIBUTE_HAS_SOLAR]) + ) + # If we have non-intrusives completed, we can use this to identify work types ###################################################### # Empty cavity: @@ -1047,22 +1069,76 @@ class AssetList: # We set the filter to False, as we have no non-intrusives non_intrusives_wall_filter = False - self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] = ( - (~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"])) & - non_intrusives_wall_filter & - (self.standardised_asset_list[self.STANDARD_YEAR_BUILT] <= 2002) & - ( - self.standardised_asset_list[ - self.EPC_API_DATA_NAMES["current-energy-efficiency"] - ] <= self.EMPTY_CAVITY_SAP_THRESHOLD + if self.landlord_year_built is None: + # The landlord won't always give us year built + self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] = ( + (~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"])) & + non_intrusives_wall_filter & + ( + self.standardised_asset_list["epc_year_upper_bound"] <= 2002 + ) & + ( + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["current-energy-efficiency"] + ] <= self.EMPTY_CAVITY_SAP_THRESHOLD + ) ) - ) - # Let's also flag work that looks eligible without the SAP filter - self.standardised_asset_list["non_intrusive_indicates_empty_cavity_no_sap_filter"] = ( + # Let's also flag work that looks eligible without the SAP filter + self.standardised_asset_list["non_intrusive_indicates_empty_cavity_no_sap_filter"] = ( + (~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"])) & + non_intrusives_wall_filter & + (self.standardised_asset_list["epc_year_upper_bound"] <= 2002) + ) + + else: + self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] = ( + (~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"])) & + non_intrusives_wall_filter & + ( + (self.standardised_asset_list[self.STANDARD_YEAR_BUILT] <= 2002) | + (self.standardised_asset_list["epc_year_upper_bound"] <= 2002) + ) & + ( + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["current-energy-efficiency"] + ] <= self.EMPTY_CAVITY_SAP_THRESHOLD + ) & ( + # If the property has solar, there's a chance it won't qualify + ~self.standardised_asset_list["property_has_solar"] + ) + ) + + self.standardised_asset_list["non_intrusive_indicates_empty_cavity_has_solar"] = ( + (~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"])) & + non_intrusives_wall_filter & + ( + (self.standardised_asset_list[self.STANDARD_YEAR_BUILT] <= 2002) | + (self.standardised_asset_list["epc_year_upper_bound"] <= 2002) + ) & + ( + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["current-energy-efficiency"] + ] <= self.EMPTY_CAVITY_SAP_THRESHOLD + ) & ( + # If the property has solar, there's a chance it won't qualify + self.standardised_asset_list["property_has_solar"] + ) + ) + + # Let's also flag work that looks eligible without the SAP filter + self.standardised_asset_list["non_intrusive_indicates_empty_cavity_no_sap_filter"] = ( + (~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"])) & + non_intrusives_wall_filter & + (self.standardised_asset_list[self.STANDARD_YEAR_BUILT] <= 2002) & + # If the property has solar, there's a chance it won't qualify + ~self.standardised_asset_list["property_has_solar"] + ) + + # We also add a filter on anything that was generally identified by the none-intrusives + self.standardised_asset_list["non_intrusive_indicates_empty_cavity_no_sap_filter_no_year_filter"] = ( (~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"])) & - non_intrusives_wall_filter & - (self.standardised_asset_list[self.STANDARD_YEAR_BUILT] <= 2002) + non_intrusives_wall_filter ) # If non_intrusive_indicates_empty_cavity is True, @@ -1073,7 +1149,15 @@ class AssetList: self.standardised_asset_list["non_intrusive_indicates_empty_cavity_no_sap_filter"] ) + self.standardised_asset_list["non_intrusive_indicates_empty_cavity_no_sap_filter_no_year_filter"] = np.where( + self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] | + self.standardised_asset_list["non_intrusive_indicates_empty_cavity_no_sap_filter"], + False, + self.standardised_asset_list["non_intrusive_indicates_empty_cavity_no_sap_filter_no_year_filter"] + ) + self.standardised_asset_list["epc_indicates_empty_cavity"] = ( + self.standardised_asset_list[self.EPC_API_DATA_NAMES["walls-description"]].str.lower().isin( self.EPC_NO_WALL_INSULATION_DESCRIPTIONS ) & ( @@ -1083,6 +1167,8 @@ class AssetList: ) & ( self.standardised_asset_list[ self.EPC_API_DATA_NAMES["current-energy-efficiency"]] <= self.EMPTY_CAVITY_SAP_THRESHOLD + ) & ( + ~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"]) ) ) @@ -1096,26 +1182,38 @@ class AssetList: ) & ( self.standardised_asset_list[ self.EPC_API_DATA_NAMES["current-energy-efficiency"]] > self.EMPTY_CAVITY_SAP_THRESHOLD + ) & ( + ~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"]) ) ) self.standardised_asset_list["landlord_data_indicates_empty_cavity"] = ( self.standardised_asset_list[self.STANDARD_WALL_CONSTRUCTION].isin(["uninsulated cavity"]) & - (self.standardised_asset_list[self.STANDARD_YEAR_BUILT] <= 2002) & + ( + (self.standardised_asset_list[self.STANDARD_YEAR_BUILT] <= 2002) | + (self.standardised_asset_list["epc_year_upper_bound"] <= 1995) + ) & ( self.standardised_asset_list[ self.EPC_API_DATA_NAMES["current-energy-efficiency"] ] <= self.EMPTY_CAVITY_SAP_THRESHOLD + ) & ( + ~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"]) ) ) self.standardised_asset_list["landlord_data_indicates_empty_cavity_no_sap_filter"] = ( self.standardised_asset_list[self.STANDARD_WALL_CONSTRUCTION].isin(["uninsulated cavity"]) & - (self.standardised_asset_list[self.STANDARD_YEAR_BUILT] <= 2002) & + ( + (self.standardised_asset_list[self.STANDARD_YEAR_BUILT] <= 2002) | + (self.standardised_asset_list["epc_year_upper_bound"] <= 1995) + ) & ( self.standardised_asset_list[ self.EPC_API_DATA_NAMES["current-energy-efficiency"] ] > self.EMPTY_CAVITY_SAP_THRESHOLD + ) & ( + ~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"]) ) ) @@ -1243,27 +1341,6 @@ class AssetList: ).sum(): raise ValueError("Both heating system checks are true - this should not be possible") - # Check 2: Does the property have solar already - if self.non_intrusives_present: - existing_solar_non_intrusives_check = ( - self.standardised_asset_list["non-intrusives: PV, ACCESS ISSUE, SEE NOTES"] == "SOLAR PV ON ROOF" - ) - elif self.old_format_non_intrusives_present: - existing_solar_non_intrusives_check = ( - self.standardised_asset_list["non-intrusives: WFT Findings"].str.lower().str.strip().isin( - ["solar pv on roof"] - ) - ) - else: - # We don't have an indication - existing_solar_non_intrusives_check = False - - self.standardised_asset_list["property_has_solar"] = ( - (self.standardised_asset_list[self.STANDARD_EXISTING_PV] == "already has PV") | - existing_solar_non_intrusives_check | - (self.standardised_asset_list[self.ATTRIBUTE_HAS_SOLAR]) - ) - # Check 3: Does the property meet the fabric condition # Solar PV installs are subject to the minimum insulation requirements which means: # 1) one of the following insulation measures must be installed as part of the same @@ -1627,6 +1704,26 @@ class AssetList: # SAP below threshold self.standardised_asset_list[self.ATTRIBUTE_SAP_THRESHOLD_AND_BELOW] ) + + self.standardised_asset_list["test"] = ( + not_a_flat & + # Landlord data or EPC data indicates the heating system is appropriate + correct_heating_system & + # The property doesn't currently have solar + ~self.standardised_asset_list["property_has_solar"] & + # The walls are insulated + walls_meet_solar_requirements & + # Roof is insulated + self.standardised_asset_list["solar_epc_roof_insulated"] & + # SAP below threshold + self.standardised_asset_list[self.ATTRIBUTE_SAP_THRESHOLD_AND_BELOW] + ) + self.standardised_asset_list["test"] = np.where( + self.standardised_asset_list["solar_eligible_other_floor"], + False, + self.standardised_asset_list["test"] + ) + self.standardised_asset_list["solar_eligible_other_floor_sap_above_threshold"] = ( not_a_flat & # Landlord data or EPC data indicates the heating system is appropriate @@ -1773,14 +1870,30 @@ class AssetList: self.standardised_asset_list["cavity_reason"] ) self.standardised_asset_list["cavity_reason"] = np.where( - self.standardised_asset_list["non_intrusive_indicates_empty_cavity_no_sap_filter"], - "Non-Intrusive Data Showed Empty Cavity but all SAP scores allowed", + self.standardised_asset_list["non_intrusive_indicates_empty_cavity_has_solar"] & + pd.isnull(self.standardised_asset_list["cavity_reason"]), + "Non-Intrusive Data Showed Empty Cavity - property already has solar", self.standardised_asset_list["cavity_reason"] ) + self.standardised_asset_list["cavity_reason"] = np.where( + self.standardised_asset_list["non_intrusive_indicates_empty_cavity_no_sap_filter"] & + pd.isnull(self.standardised_asset_list["cavity_reason"]), + "Non-Intrusive Data Showed Empty Cavity but all SAP scores allowed", + self.standardised_asset_list["cavity_reason"] + ) + + self.standardised_asset_list["cavity_reason"] = np.where( + self.standardised_asset_list["non_intrusive_indicates_empty_cavity_no_sap_filter_no_year_filter"] & + pd.isnull(self.standardised_asset_list["cavity_reason"]), + "Non-Intrusive Data Showed Empty Cavity but all SAP scores and year built allowed", + self.standardised_asset_list["cavity_reason"] + ) + self.standardised_asset_list["cavity_reason"] = np.where( ( self.standardised_asset_list["epc_indicates_empty_cavity"] & - ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] + ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] & + pd.isnull(self.standardised_asset_list["cavity_reason"]) ), "EPC Data Showed Empty Cavity", self.standardised_asset_list["cavity_reason"] @@ -1788,7 +1901,8 @@ class AssetList: self.standardised_asset_list["cavity_reason"] = np.where( ( self.standardised_asset_list["epc_indicates_empty_cavity_no_sap_filter"] & - ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity_no_sap_filter"] + ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity_no_sap_filter"] & + pd.isnull(self.standardised_asset_list["cavity_reason"]) ), "EPC Data Showed Empty Cavity but all SAP scores allowed", self.standardised_asset_list["cavity_reason"] @@ -1798,7 +1912,8 @@ class AssetList: ( self.standardised_asset_list["landlord_data_indicates_empty_cavity"] & ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] & - ~self.standardised_asset_list["epc_indicates_empty_cavity"] + ~self.standardised_asset_list["epc_indicates_empty_cavity"] & + pd.isnull(self.standardised_asset_list["cavity_reason"]) ), "Landlord Data Showed Empty Cavity", self.standardised_asset_list["cavity_reason"] @@ -1807,7 +1922,8 @@ class AssetList: ( self.standardised_asset_list["landlord_data_indicates_empty_cavity_no_sap_filter"] & ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity_no_sap_filter"] & - ~self.standardised_asset_list["epc_indicates_empty_cavity_no_sap_filter"] + ~self.standardised_asset_list["epc_indicates_empty_cavity_no_sap_filter"] & + pd.isnull(self.standardised_asset_list["cavity_reason"]) ), "Landlord Data Showed Empty Cavity but all SAP scores allowed", self.standardised_asset_list["cavity_reason"], diff --git a/asset_list/app.py b/asset_list/app.py index d7b1b6cd..13621448 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -100,8 +100,8 @@ def app(): missing_postcodes_method = None landlord_year_built = None landlord_os_uprn = None - landlord_property_type = "Archetype" - landlord_built_form = "Archetype" + landlord_property_type = "Archetype (PFP)" + landlord_built_form = "Archetype (PFP)" landlord_wall_construction = None landlord_heating_system = None landlord_existing_pv = None @@ -418,6 +418,8 @@ def app(): epc_df = pd.concat(epc_data) epc_df["estimated"] = epc_df["estimated"].fillna(False) + z = epc_df[epc_df["domna_property_id"] == eg["domna_property_id"].values[0]] + # We expand out the recommendations recommendations_df = epc_df[[asset_list.DOMNA_PROPERTY_ID, "recommendations"]] @@ -457,10 +459,24 @@ def app(): if "find_my_epc_data" not in epc_df.columns: epc_df["find_my_epc_data"] = None - find_my_epc_data = epc_df[[asset_list.DOMNA_PROPERTY_ID, "find_my_epc_data"]].drop( - columns=["find_my_epc_data"]).join( - pd.json_normalize(epc_df["find_my_epc_data"]) - ) + find_my_epc_data = [] + for _, x in epc_df.iterrows(): + if x["find_my_epc_data"]: + find_my_epc_data.append( + { + asset_list.DOMNA_PROPERTY_ID: x[asset_list.DOMNA_PROPERTY_ID], + **x["find_my_epc_data"] + } + ) + else: + find_my_epc_data.append( + { + asset_list.DOMNA_PROPERTY_ID: x[asset_list.DOMNA_PROPERTY_ID] + } + ) + + find_my_epc_data = pd.DataFrame(find_my_epc_data) + find_my_epc_data = find_my_epc_data.merge( transformed_df[[asset_list.DOMNA_PROPERTY_ID, "epc_has_floor_recommendation"]], how="left", on=asset_list.DOMNA_PROPERTY_ID @@ -511,6 +527,91 @@ def app(): asset_list.flat_analysis() + ################################################################ + # WESTWARD - comparison between Kieran's method & automated + ################################################################ + + # Check 1) + cavity_fills = pd.read_excel( + os.path.join(data_folder, "WESTWARD - Route March Prep.xlsx"), + sheet_name="Straight Fill" + ) + cavity_fills = cavity_fills.merge( + asset_list.standardised_asset_list[ + [asset_list.STANDARD_LANDLORD_PROPERTY_ID, "cavity_reason"] + ], + how="left", + left_on=asset_list.landlord_property_id, + right_on=asset_list.STANDARD_LANDLORD_PROPERTY_ID + ) + cavity_fills["cavity_reason"] = cavity_fills["cavity_reason"].fillna("Not identified") + cavity_fills["cavity_reason"].value_counts() + # Didn't identify 3 properties because they're bedsits + # 4 properties were identified, not based on the non-intrusives but instead because + # Westward said they were built in 2003/2007. Have adjusted this to use the age from the + # epc as well, as EPC says 1975 and they look like 1975 properties + # 58 properties flagged as already having solar: + # + + z = cavity_fills[ + cavity_fills["cavity_reason"] == "Non-Intrusive Data Showed Empty Cavity - property already has solar" + ] + + df = asset_list.standardised_asset_list[ + asset_list.standardised_asset_list[asset_list.STANDARD_LANDLORD_PROPERTY_ID].isin( + z[asset_list.landlord_property_id].values) + ] + eg = df[df[asset_list.STANDARD_LANDLORD_PROPERTY_ID] == "TOTNEWINA0102300"] + + z[["Address", "WFT EDIT Postcode", asset_list.landlord_property_id]] + z[[asset_list.STANDARD_FULL_ADDRESS, asset_list.STANDARD_POSTCODE, asset_list.ATTRIBUTE_HAS_SOLAR]] + + # Check 2) + cavity_fills_with_solar = pd.read_excel( + os.path.join(data_folder, "WESTWARD - Route March Prep.xlsx"), + sheet_name="Solar PV - Straight Fill" + ) + cavity_fills_with_solar = cavity_fills_with_solar.merge( + asset_list.standardised_asset_list[ + [asset_list.STANDARD_LANDLORD_PROPERTY_ID, "cavity_reason"] + ], + how="left", + left_on=asset_list.landlord_property_id, + right_on=asset_list.STANDARD_LANDLORD_PROPERTY_ID + ) + cavity_fills_with_solar["cavity_reason"] = cavity_fills_with_solar["cavity_reason"].fillna("Not identified") + # 203 properties total + # 140 properties were flagged up based on non-intrusives (Non-Intrusive Data Showed Empty Cavity) + + check = cavity_fills_with_solar[ + cavity_fills_with_solar["cavity_reason"] == "Non-Intrusive Data Showed Empty Cavity" + ] + z = asset_list.standardised_asset_list[ + asset_list.standardised_asset_list[asset_list.STANDARD_LANDLORD_PROPERTY_ID].isin( + check[asset_list.landlord_property_id].values) + ] + z[asset_list.ATTRIBUTE_HAS_SOLAR].value_counts() + pd.set_option('display.max_columns', None) + z[[asset_list.STANDARD_FULL_ADDRESS, asset_list.STANDARD_POSTCODE, asset_list.ATTRIBUTE_HAS_SOLAR]] + + not_flagged = asset_list.standardised_asset_list[ + pd.isnull(asset_list.standardised_asset_list["solar_reason"]) + ] + # For everything not flagged for solar, identify why + reasons = [] + for _, x in not_flagged.iterrows(): + if x[asset_list.STANDARD_PROPERTY_TYPE] == "flat": + reason = "property is a flat" + else: + x[asset_list.EPC_API_DATA_NAMES["mainheat-description"]] + + reasons.append( + { + asset_list.DOMNA_PROPERTY_ID: x["asset_list.DOMNA_PROPERTY_ID"], + "solar_exclusion_reason": reason, + } + ) + asset_list.load_contact_details( local_filepath=os.path.join(data_folder, "Full property list wth D&V report V look up 12.2.25.xlsx"), sheet_name="Report 1", diff --git a/backend/apis/GoogleSolarApi.py b/backend/apis/GoogleSolarApi.py index ea8650b6..cda32faa 100644 --- a/backend/apis/GoogleSolarApi.py +++ b/backend/apis/GoogleSolarApi.py @@ -187,7 +187,9 @@ class GoogleSolarApi: # We constrain the roof area, based on the floor area to be more conservative self.roof_area = self.insights_data["solarPotential"]["wholeRoofStats"]['areaMeters2'] - if self.roof_area > property_instance.roof_area * self.ROOF_AREA_TOLERANCE: + if ( + self.roof_area > property_instance.roof_area * self.ROOF_AREA_TOLERANCE + ) | (self.roof_area < (2 - self.ROOF_AREA_TOLERANCE) * property_instance.roof_area): self.roof_area = property_instance.roof_area self.floor_area = self.insights_data["solarPotential"]["wholeRoofStats"]['groundAreaMeters2'] diff --git a/etl/customers/mod/pilot/2. Create Excel Model.py b/etl/customers/mod/pilot/2. Create Excel Model.py index 0e057a25..e656c96e 100644 --- a/etl/customers/mod/pilot/2. Create Excel Model.py +++ b/etl/customers/mod/pilot/2. Create Excel Model.py @@ -368,10 +368,16 @@ def app(): pprint(measure_counts[scenario_ids[0]]) pprint(measure_counts[scenario_ids[1]]) - df = scenario_data[scenario_ids[1]] - z = df[ - (df["Walls"] == "Cavity wall, as built, no insulation") & (~df["Recommendation: cavity_wall_insulation"]) - ] + # Do not get to EPC B: + # 5 are flats + # 1) 34 Luffenham Place, Chicksands SG17 5XH, has been surveyed as having a low performing heat pump - + # should be looked at but several surrounding properties have been surveyed in a similar fashion + # 2) 42, Muscott Close, Shipton Bellinger SP9 7TX, has an oil boiler and the bills go up recommending HHRSH. + # we could non-intrusively recommend a heat pump. + # 3) 33 Blenheim Crescent, Ruislip, HA4 7HA, 100021455241 Solar potential modelling returned nothing - + # manual review indicates that there are multiple trees surrouding the south facing side of the property + # 4) 10 Bower Green, Shrivenham, SN6 8TU - Solar isn't recommended without further survey due to the local + # area being surrounded by trees # Scenario adjustments: # Exclude: boiler_upgrade