From 4db9d48e366e121abcfe83e2dfd335d33151bc68 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 21 Feb 2025 12:39:06 +0000 Subject: [PATCH] adding the solar floor eligibiltiy criteria --- asset_list/AssetList.py | 85 ++++++++++++++++++++++++++++---- asset_list/requirements.txt | 3 +- etl/route_march_data_pull/app.py | 28 ++++++++++- 3 files changed, 105 insertions(+), 11 deletions(-) diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py index 81aa525a..4666cf63 100644 --- a/asset_list/AssetList.py +++ b/asset_list/AssetList.py @@ -306,6 +306,17 @@ class AssetList: "cavity wall, as built, no insulation", ] + # List of strings that we look for in the EPC data, where substrings indicate that the wall is insulated + EPC_INSULATED_WALLS_SUBSTRINGS = [ + ", insulated", "with external insulation", "with internal insulation", "filled cavity" + ] + + # List of strings that we look for in the EPC data, where substrings indicate that the roof is insulated + EPC_INSULATED_ROOF_SUBSTRINGS = [ + "(another dwelling above)", ", insulated", ", insulated (assumed) ", + ", ceiling insulated", + ] + def __init__( self, local_filepath, @@ -861,7 +872,10 @@ class AssetList: processed_age_band, how="left" ) - def identify_worktypes(self): + def identify_worktypes(self, cleaned): + + if not self.non_intrusives_present: + raise NotImplementedError("Need to implement the case for non-intrusives") # If we have non-intrusives completed, we can use this to identify work types @@ -892,6 +906,17 @@ class AssetList: ) ) + self.standardised_asset_list["empty_cavity"] = ( + self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] | + self.standardised_asset_list["epc_indicates_empty_cavity"] + ) + # We add a reason + self.standardised_asset_list["empty_cavity_reason"] = np.where( + self.standardised_asset_list["non_intrusive_indicates_empty_cavity"], + "Non-Intrusive Data", + "EPC Data" + ) + ###################################################### # Extraction ###################################################### @@ -967,13 +992,55 @@ class AssetList: ) ) - EPC_INSULATED_WALLS_SUBSTRINGS = [ - ", insulated", "with external insulation", "with internal insulation", "filled cavity" - ] + # TODO: We don't have information about the roof from this landlord + self.standardised_asset_list["solar_epc_walls_insulated"] = ( + self.standardised_asset_list[self.EPC_API_DATA_NAMES["walls-description"]].str.lower().str.contains( + "|".join(self.EPC_INSULATED_WALLS_SUBSTRINGS) + ) + ) - self.standardised_asset_list["landlord_wall_construction"].value_counts() + # We merge on the u-value for average thermal transmittance + roof_uvalue_data = pd.DataFrame(cleaned["roof-description"]) + roof_uvalue_data = roof_uvalue_data[ + ~pd.isnull(roof_uvalue_data["thermal_transmittance"]) + ][["original_description", "thermal_transmittance"]].rename( + columns={ + "original_description": self.EPC_API_DATA_NAMES["roof-description"], + "thermal_transmittance": "roof_u_value" + } + ) - EPC_INSULATED_ROOF_SUBSTRINGS = [ - "(another dwelling above)", "limited insulation", "(other premises above)", - ", no insulation", - ] + self.standardised_asset_list = self.standardised_asset_list.merge( + roof_uvalue_data, how="left", on=self.EPC_API_DATA_NAMES["roof-description"] + ) + + # If the u-value of a roof is less than 0.7 we consider it insulated + self.standardised_asset_list["solar_epc_roof_insulated"] = ( + self.standardised_asset_list[self.EPC_API_DATA_NAMES["roof-description"]].str.lower().str.contains( + "|".join(self.EPC_INSULATED_ROOF_SUBSTRINGS), regex=False + ) | ( + self.standardised_asset_list[self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS].apply( + lambda x: int(x) >= 270 if str(x).isdigit() else False + ) + ) | ( + self.standardised_asset_list["roof_u_value"].apply( + lambda x: x <= 0.7 if not pd.isnull(x) else False + ) + ) + ) + + self.standardised_asset_list["solar_epc_loft_needs_topup"] = self.standardised_asset_list[ + self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS].apply( + lambda x: int(x) < 270 if str(x).isdigit() else False + ) + + self.standardised_asset_list["solar_epc_floor_is_solid"] = self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["floor-description"] + ].str.lower().str.contains("solid") + self.standardised_asset_list["solar_epc_floor_is_solid"] = ( + self.standardised_asset_list["solar_epc_floor_is_solid"].fillna(False) + ) + + z = self.standardised_asset_list[ + self.standardised_asset_list["solar_epc_floor_is_solid"] == True + ] diff --git a/asset_list/requirements.txt b/asset_list/requirements.txt index 0c16c43a..fd045d46 100644 --- a/asset_list/requirements.txt +++ b/asset_list/requirements.txt @@ -7,4 +7,5 @@ fuzzywuzzy boto3 openpyxl openai -tiktoken \ No newline at end of file +tiktoken +msgpack \ No newline at end of file diff --git a/etl/route_march_data_pull/app.py b/etl/route_march_data_pull/app.py index fbf7e10d..32c36fe8 100644 --- a/etl/route_march_data_pull/app.py +++ b/etl/route_march_data_pull/app.py @@ -391,13 +391,28 @@ def app(): transformed_df = pd.DataFrame(transformed_data) # At the moment, we're only using a limited set of columns - let's jut keep cavity wall insulation # recommendations - transformed_df = transformed_df[[asset_list.DOMNA_PROPERTY_ID, "Cavity wall insulation"]] + transformed_df = transformed_df[ + [ + asset_list.DOMNA_PROPERTY_ID, "Cavity wall insulation", "Floor insulation (solid floor)", + "Floor insulation", "Floor insulation (suspended floor)" + ] + ] + + transformed_df["epc_has_floor_recommendation"] = ( + transformed_df["Floor insulation (solid floor)"] | transformed_df["Floor insulation"] | + transformed_df["Floor insulation (suspended floor)"] + ) # Get the find my epc data find_my_epc_data = epc_df[[asset_list.DOMNA_PROPERTY_ID, "find_my_epc_data"]].drop( columns=["find_my_epc_data"]).join( pd.json_normalize(epc_df["find_my_epc_data"]) ) + find_my_epc_data = find_my_epc_data.merge( + transformed_df[[asset_list.DOMNA_PROPERTY_ID, "epc_has_floor_recommendation"]], + how="left", on=asset_list.DOMNA_PROPERTY_ID + ) + # We check if we get the solar pv column: if "Solar photovoltaics" not in find_my_epc_data.columns: find_my_epc_data["Solar photovoltaics"] = False @@ -425,6 +440,17 @@ def app(): asset_list.extract_attributes() + # TODO - Use this! + import msgpack + from utils.s3 import read_from_s3 + cleaned = read_from_s3( + s3_file_name="cleaned_epc_data/cleaned.bson", + bucket_name="retrofit-data-dev" + ) + cleaned = msgpack.unpackb(cleaned, raw=False) + + asset_list.identify_worktypes(cleaned) + if HAS_NON_INTRUSIVES: # Empty cavity: # 1) Has been flagged on the non-intrusives as being empty or partially filled