From 2d71ad25efced2412edb987f80a977da0c291018 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 10 Apr 2025 23:10:52 +0100 Subject: [PATCH] added a patch method to scraping epc data --- .idea/Model.iml | 2 +- .idea/misc.xml | 2 +- asset_list/AssetList.py | 808 +++++++----------------- asset_list/app.py | 74 ++- asset_list/mappings/built_form.py | 29 +- asset_list/mappings/exising_pv.py | 8 + asset_list/mappings/heating_systems.py | 31 +- asset_list/mappings/property_type.py | 17 +- asset_list/mappings/roof.py | 26 + asset_list/mappings/walls.py | 12 +- backend/SearchEpc.py | 4 + etl/customers/remote_assessments/app.py | 36 +- etl/find_my_epc/AssetListEpcData.py | 47 +- etl/find_my_epc/RetrieveFindMyEpc.py | 89 ++- 14 files changed, 564 insertions(+), 621 deletions(-) create mode 100644 asset_list/mappings/roof.py diff --git a/.idea/Model.iml b/.idea/Model.iml index 96ad7a95..df6c4faa 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + diff --git a/.idea/misc.xml b/.idea/misc.xml index fb10c6b0..50cad4ca 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py index 5ae3029f..0dedc1fd 100644 --- a/asset_list/AssetList.py +++ b/asset_list/AssetList.py @@ -17,6 +17,7 @@ import asset_list.mappings.walls as walls_mappings import asset_list.mappings.heating_systems as heating_mappings import asset_list.mappings.exising_pv as existing_pv_mappings import asset_list.mappings.built_form as built_form_mappings +import asset_list.mappings.roof as roof_mappings from recommendations.recommendation_utils import ( estimate_perimeter, @@ -271,8 +272,10 @@ class AssetList: STANDARD_PROPERTY_TYPE = "landlord_property_type" STANDARD_BUILT_FORM = "landlord_built_form" STANDARD_WALL_CONSTRUCTION = "landlord_wall_construction" + STANDARD_ROOF_CONSTRUCTION = "landlord_roof_construction" STANDARD_HEATING_SYSTEM = "landlord_heating_system" STANDARD_EXISTING_PV = "landlord_existing_pv" + STANDARD_SAP = "landlord_sap_rating" DOMNA_PROPERTY_ID = "domna_property_id" @@ -286,6 +289,8 @@ class AssetList: "Any further surveyor notes", 'Surveyors Name' ] + NON_INTRUSIVES_ELIGIBILITY_COLUMN = "Eligibility (Red/Yellow/Green)" + OLD_FORMAT_NON_INTRUSIVE_COLNAMES = ['WFT Findings', 'ECO Eligibility'] # This SAP threshold is a key search criteria for properties that may be eligible for extraction @@ -295,6 +300,9 @@ class AssetList: # Any EPC deemed to have been conducted prior to this year is deemed to be unreliable EPC_YEAR_THRESHOLD = pd.Timestamp.now().year - 5 + # Properties before this year are more likely to have lower EPC ratings and more likely to qualify + EMPTY_CAVITY_YEAR_THRESHOLD = 2002 + # Attributes - these are columns that we produce, calcualted based on other pieces of data ATTRIBUTE_HAS_SOLAR = "attribute_has_solar" ATTRIBUTE_NUMBER_OF_FLOORS = "attribute_est_number_floors" @@ -347,8 +355,11 @@ class AssetList: landlord_property_type=None, landlord_built_form=None, landlord_wall_construction=None, + landlord_roof_construction=None, landlord_heating_system=None, landlord_existing_pv=None, + landlord_sap=None, + phase=False, header=0 ): self.local_filepath = local_filepath @@ -361,7 +372,6 @@ class AssetList: self.standardised_asset_list = self.raw_asset_list.copy() # Will be used to store aggregated figures against the various work types self.work_type_figures = {} - self.work_type_breakdowns = {} self.flat_data = None self.duplicated_addresses = None self.contact_details = None @@ -371,11 +381,19 @@ class AssetList: self.outcomes_for_output = pd.DataFrame() self.master_surveyed = None + # When this is True, we intend to break the programme into multiple phases. We may need to review + # how this is structured in the future, as depending on how we get future data, we may need to + # remove some existing phases from the reporting, or specifically highlight the phase (1 to n-1) + # properties, assuming the current phase is n. + self.phase = phase + # We detect the presence of the non-intrusive columns self.non_intrusives_present = "CIGA Check Required" in self.raw_asset_list.columns # We detect if we have the old format of non-intruvies self.old_format_non_intrusives_present = "WFT Findings" in self.raw_asset_list.columns + self.non_intrusives_eligibility = "Eligibility (Red/Yellow/Green)" in self.raw_asset_list.columns + # Names of columns self.landlord_property_id = landlord_property_id self.address1_colname = address1_colname @@ -386,8 +404,10 @@ class AssetList: self.landlord_property_type = landlord_property_type self.landlord_built_form = landlord_built_form self.landlord_wall_construction = landlord_wall_construction + self.landlord_roof_construction = landlord_roof_construction self.landlord_heating_system = landlord_heating_system self.landlord_existing_pv = landlord_existing_pv + self.landlord_sap = landlord_sap # parameters for cleaning self.full_address_cols_to_concat = full_address_cols_to_concat @@ -427,6 +447,13 @@ class AssetList: self.standardised_asset_list[self.landlord_property_type].copy() ) + # If landlord built form is None (which it often is) we use the built for from inspections + if (self.landlord_built_form is None) and self.non_intrusives_present: + self.landlord_built_form = self.STANDARD_BUILT_FORM + self.standardised_asset_list[self.landlord_built_form] = ( + self.standardised_asset_list["Archetype"].copy() + ) + def _extract_address1(self, asset_list, full_address_col, postcode_col, method="first_two_words"): if method not in self.ADDRESS_1_CLEANING_METHODS: @@ -604,8 +631,10 @@ class AssetList: self.landlord_built_form, self.landlord_year_built, self.landlord_wall_construction, + self.landlord_roof_construction, self.landlord_heating_system, - self.landlord_existing_pv + self.landlord_existing_pv, + self.landlord_sap, ] # Keep just non-null variables (e.g landlord may not provide uprn self.keep_variables = [v for v in variables if v is not None] @@ -619,8 +648,10 @@ class AssetList: self.landlord_built_form: self.STANDARD_BUILT_FORM, self.landlord_year_built: self.STANDARD_YEAR_BUILT, self.landlord_wall_construction: self.STANDARD_WALL_CONSTRUCTION, + self.landlord_roof_construction: self.STANDARD_ROOF_CONSTRUCTION, self.landlord_heating_system: self.STANDARD_HEATING_SYSTEM, - self.landlord_existing_pv: self.STANDARD_EXISTING_PV + self.landlord_existing_pv: self.STANDARD_EXISTING_PV, + self.landlord_sap: self.STANDARD_SAP, } self.rename_map = {k: v for k, v in self.rename_map.items() if k is not None} @@ -628,6 +659,9 @@ class AssetList: if self.non_intrusives_present: non_intrusive_columns = self.NON_INTRUSIVES_COLNAMES + if self.non_intrusives_eligibility: + non_intrusive_columns.append(self.NON_INTRUSIVES_ELIGIBILITY_COLUMN) + if self.old_format_non_intrusives_present: non_intrusive_columns = self.OLD_FORMAT_NON_INTRUSIVE_COLNAMES @@ -657,9 +691,13 @@ class AssetList: ) else: # We want to make sure that we have a column for wall construction - self.landlord_wall_construction = "landlord_wall_construction" + self.landlord_wall_construction = self.STANDARD_WALL_CONSTRUCTION self.standardised_asset_list[self.landlord_wall_construction] = None + if self.landlord_roof_construction is None: + self.landlord_roof_construction = self.STANDARD_ROOF_CONSTRUCTION + self.standardised_asset_list[self.landlord_roof_construction] = None + # Clear our build year column # We attempt to process the year built column if self.landlord_year_built is not None: @@ -750,6 +788,10 @@ class AssetList: self.landlord_existing_pv: { "standard_values": existing_pv_mappings.STANDARD_EXISTING_PV, "standard_map": existing_pv_mappings.EXISTING_PV_MAPPINGS + }, + self.landlord_roof_construction: { + "standard_values": roof_mappings.STANDARD_ROOF_CONSTRUCTIONS, + "standard_map": roof_mappings.ROOF_CONSTRUCTION_MAPPINGS } } # Keep just entries where the key is not None @@ -757,6 +799,8 @@ class AssetList: for variable, config in to_remap.items(): logger.info("Standardising variable: %s", variable) + # Strip each of these columns + self.standardised_asset_list[variable] = self.standardised_asset_list[variable].str.strip() values_to_remap = self.standardised_asset_list[variable].unique() # We want to map this to our standardised list of property types we're interested in remapper = DataRemapper(standard_values=config["standard_values"], standard_map=config["standard_map"]) @@ -779,6 +823,13 @@ class AssetList: if there are no categories which need remapping which is highly unlikely :return: """ + + if self.phase: + # We filter on just the properties that have had an inspection + self.standardised_asset_list = self.standardised_asset_list[ + ~self.standardised_asset_list['Surveyors Name'].isin(["YET TO BE SURVEYED"]) + ] + if not self.variable_mappings and not override_empty_mappings: raise ValueError("Please run init_standardise first") @@ -854,7 +905,7 @@ class AssetList: df, how="left", on=self.DOMNA_PROPERTY_ID ) - def extract_attributes(self): + def extract_attributes(self, pull_epc=True): # Used to extracty the typical attributes that we use to identify viable work self.standardised_asset_list[self.ATTRIBUTE_HAS_SOLAR] = ( @@ -1054,6 +1105,40 @@ class AssetList: def identify_worktypes(self, cleaned): + if self.STANDARD_SAP is not None: + # We add a SAP category for all work type identification + self.standardised_asset_list["SAP Category"] = np.where( + ( + (self.standardised_asset_list[self.EPC_API_DATA_NAMES["current-energy-efficiency"]] <= 68) | + (self.standardised_asset_list[self.STANDARD_SAP] <= 68) + ), + "SAP Rating 68 or less", + np.where( + ( + ( + self.standardised_asset_list[self.EPC_API_DATA_NAMES["current-energy-efficiency"]] <= + self.EMPTY_CAVITY_SAP_THRESHOLD + ) | (self.standardised_asset_list[self.STANDARD_SAP] <= self.EMPTY_CAVITY_SAP_THRESHOLD) + ), + f"SAP Rating 69-{self.EMPTY_CAVITY_SAP_THRESHOLD}", + f"SAP Rating {self.EMPTY_CAVITY_SAP_THRESHOLD + 1} or more" + ) + ) + else: + # We add a SAP category for all work type identification + self.standardised_asset_list["SAP Category"] = np.where( + self.standardised_asset_list[self.EPC_API_DATA_NAMES["current-energy-efficiency"]] <= 68, + "SAP Rating 68 or less", + np.where( + ( + self.standardised_asset_list[self.EPC_API_DATA_NAMES["current-energy-efficiency"]] <= + self.EMPTY_CAVITY_SAP_THRESHOLD + ), + f"SAP Rating 69-{self.EMPTY_CAVITY_SAP_THRESHOLD}", + f"SAP Rating {self.EMPTY_CAVITY_SAP_THRESHOLD + 1} or more" + ) + ) + # Before we being, we identify if a property has solar already as we use this # for identifying cavity jobs if self.non_intrusives_present: @@ -1107,132 +1192,53 @@ class AssetList: non_intrusives_wall_filter = False if self.landlord_year_built is None: - # The landlord won't always give us year built - self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] = ( - (~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"])) & - non_intrusives_wall_filter & - ( - self.standardised_asset_list["epc_year_upper_bound"] <= 2002 - ) & - ( - self.standardised_asset_list[ - self.EPC_API_DATA_NAMES["current-energy-efficiency"] - ] <= self.EMPTY_CAVITY_SAP_THRESHOLD - ) - ) - - # Let's also flag work that looks eligible without the SAP filter - self.standardised_asset_list["non_intrusive_indicates_empty_cavity_no_sap_filter"] = ( - (~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"])) & - non_intrusives_wall_filter & - (self.standardised_asset_list["epc_year_upper_bound"] <= 2002) - ) - - self.standardised_asset_list["non_intrusive_indicates_empty_cavity_has_solar"] = ( - (~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"])) & - non_intrusives_wall_filter & - (self.standardised_asset_list["epc_year_upper_bound"] <= 2002) & - ( - self.standardised_asset_list[ - self.EPC_API_DATA_NAMES["current-energy-efficiency"] - ] <= self.EMPTY_CAVITY_SAP_THRESHOLD - ) & ( - # If the property has solar, there's a chance it won't qualify - self.standardised_asset_list["property_has_solar"] - ) - ) - + year_built_filter = self.standardised_asset_list["epc_year_upper_bound"] <= self.EMPTY_CAVITY_YEAR_THRESHOLD else: - self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] = ( - (~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"])) & - non_intrusives_wall_filter & - ( - (self.standardised_asset_list[self.STANDARD_YEAR_BUILT] <= 2002) | - (self.standardised_asset_list["epc_year_upper_bound"] <= 2002) - ) & - ( - self.standardised_asset_list[ - self.EPC_API_DATA_NAMES["current-energy-efficiency"] - ] <= self.EMPTY_CAVITY_SAP_THRESHOLD - ) & ( - # If the property has solar, there's a chance it won't qualify - ~self.standardised_asset_list["property_has_solar"] - ) + year_built_filter = ( + (self.standardised_asset_list[self.STANDARD_YEAR_BUILT] <= self.EMPTY_CAVITY_YEAR_THRESHOLD) | + (self.standardised_asset_list["epc_year_upper_bound"] <= self.EMPTY_CAVITY_YEAR_THRESHOLD) ) - self.standardised_asset_list["non_intrusive_indicates_empty_cavity_has_solar"] = ( - (~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"])) & - non_intrusives_wall_filter & - ( - (self.standardised_asset_list[self.STANDARD_YEAR_BUILT] <= 2002) | - (self.standardised_asset_list["epc_year_upper_bound"] <= 2002) - ) & - ( - self.standardised_asset_list[ - self.EPC_API_DATA_NAMES["current-energy-efficiency"] - ] <= self.EMPTY_CAVITY_SAP_THRESHOLD - ) & ( - # If the property has solar, there's a chance it won't qualify - self.standardised_asset_list["property_has_solar"] - ) - ) - - # Let's also flag work that looks eligible without the SAP filter - self.standardised_asset_list["non_intrusive_indicates_empty_cavity_no_sap_filter"] = ( - (~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"])) & - non_intrusives_wall_filter & - (self.standardised_asset_list[self.STANDARD_YEAR_BUILT] <= 2002) & - # If the property has solar, there's a chance it won't qualify + # Criteria: + # The property isn't a bedsit + # Non-intrusives indicate it needs a fill + # The EPC year is before 2002 + # We also flag where the property has solar on the roof, because this is a signal of a high EPC rating + self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] = ( + (~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"])) & + non_intrusives_wall_filter & + year_built_filter & + ( ~self.standardised_asset_list["property_has_solar"] ) + ) + + self.standardised_asset_list["non_intrusive_indicates_empty_cavity_has_solar"] = ( + pd.isnull(self.standardised_asset_list["non_intrusive_indicates_empty_cavity"]) & + (~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"])) & + non_intrusives_wall_filter & + year_built_filter & + ( + # If the property has solar, there's a chance it won't qualify + self.standardised_asset_list["property_has_solar"] + ) + ) # We also add a filter on anything that was generally identified by the non-intrusives - self.standardised_asset_list["non_intrusive_indicates_empty_cavity_no_sap_filter_no_year_filter"] = ( + self.standardised_asset_list["non_intrusive_indicates_empty_cavity_no_year_filter"] = ( + pd.isnull(self.standardised_asset_list["non_intrusive_indicates_empty_cavity"]) & + pd.isnull(self.standardised_asset_list["non_intrusive_indicates_empty_cavity_has_solar"]) & (~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"])) & non_intrusives_wall_filter ) - # If non_intrusive_indicates_empty_cavity is True, - # set non_intrusive_indicates_empty_cavity_no_sap_filter to False - self.standardised_asset_list["non_intrusive_indicates_empty_cavity_no_sap_filter"] = np.where( - self.standardised_asset_list["non_intrusive_indicates_empty_cavity"], - False, - self.standardised_asset_list["non_intrusive_indicates_empty_cavity_no_sap_filter"] - ) - - self.standardised_asset_list["non_intrusive_indicates_empty_cavity_no_sap_filter_no_year_filter"] = np.where( - self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] | - self.standardised_asset_list["non_intrusive_indicates_empty_cavity_no_sap_filter"], - False, - self.standardised_asset_list["non_intrusive_indicates_empty_cavity_no_sap_filter_no_year_filter"] - ) - self.standardised_asset_list["epc_indicates_empty_cavity"] = ( - self.standardised_asset_list[self.EPC_API_DATA_NAMES["walls-description"]].str.lower().isin( self.EPC_NO_WALL_INSULATION_DESCRIPTIONS ) & ( - self.standardised_asset_list["epc_year_upper_bound"] <= 1995 + self.standardised_asset_list["epc_year_upper_bound"] <= self.EMPTY_CAVITY_YEAR_THRESHOLD ) & ( ~self.standardised_asset_list[self.ATTRIBUTE_EPC_PRE_YEAR_THRESHOLD] - ) & ( - self.standardised_asset_list[ - self.EPC_API_DATA_NAMES["current-energy-efficiency"]] <= self.EMPTY_CAVITY_SAP_THRESHOLD - ) & ( - ~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"]) - ) - ) - - self.standardised_asset_list["epc_indicates_empty_cavity_no_sap_filter"] = ( - self.standardised_asset_list[self.EPC_API_DATA_NAMES["walls-description"]].str.lower().isin( - self.EPC_NO_WALL_INSULATION_DESCRIPTIONS - ) & ( - self.standardised_asset_list["epc_year_upper_bound"] <= 1995 - ) & ( - ~self.standardised_asset_list[self.ATTRIBUTE_EPC_PRE_YEAR_THRESHOLD] - ) & ( - self.standardised_asset_list[ - self.EPC_API_DATA_NAMES["current-energy-efficiency"]] > self.EMPTY_CAVITY_SAP_THRESHOLD ) & ( ~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"]) ) @@ -1241,44 +1247,13 @@ class AssetList: self.standardised_asset_list["landlord_data_indicates_empty_cavity"] = ( self.standardised_asset_list[self.STANDARD_WALL_CONSTRUCTION].isin(["uninsulated cavity"]) & ( - (self.standardised_asset_list[self.STANDARD_YEAR_BUILT] <= 2002) | - (self.standardised_asset_list["epc_year_upper_bound"] <= 1995) - ) & - ( - self.standardised_asset_list[ - self.EPC_API_DATA_NAMES["current-energy-efficiency"] - ] <= self.EMPTY_CAVITY_SAP_THRESHOLD + (self.standardised_asset_list[self.STANDARD_YEAR_BUILT] <= self.EMPTY_CAVITY_YEAR_THRESHOLD) | + (self.standardised_asset_list["epc_year_upper_bound"] <= self.EMPTY_CAVITY_YEAR_THRESHOLD) ) & ( ~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"]) ) ) - self.standardised_asset_list["landlord_data_indicates_empty_cavity_no_sap_filter"] = ( - self.standardised_asset_list[self.STANDARD_WALL_CONSTRUCTION].isin(["uninsulated cavity"]) & - ( - (self.standardised_asset_list[self.STANDARD_YEAR_BUILT] <= 2002) | - (self.standardised_asset_list["epc_year_upper_bound"] <= 1995) - ) & - ( - self.standardised_asset_list[ - self.EPC_API_DATA_NAMES["current-energy-efficiency"] - ] > self.EMPTY_CAVITY_SAP_THRESHOLD - ) & ( - ~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"]) - ) - ) - - # If the EPC is esimtated, we defer to the non-intrusives - self.standardised_asset_list["epc_indicates_empty_cavity"] = np.where( - ( - self.standardised_asset_list["epc_indicates_empty_cavity"] & - ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] & - self.standardised_asset_list["estimated"] - ), - False, - self.standardised_asset_list["epc_indicates_empty_cavity"] - ) - # Finally, we create a flag to indicate that the cavity is empty, based on the criteria above self.standardised_asset_list["cavity_is_empty"] = ( non_intrusives_wall_filter | @@ -1303,19 +1278,21 @@ class AssetList: )) ) - self.standardised_asset_list["non_intrusive_indicates_cavity_extraction"] = ( - extraction_wall_filter & ( - self.standardised_asset_list[self.ATTRIBUTE_SAP_THRESHOLD_AND_BELOW] - )) + if self.non_intrusives_eligibility: + # If we have the eligibility column, we check if the wall is eligible + extraction_wall_filter = ( + extraction_wall_filter & + ~self.standardised_asset_list["non-intrusives: Eligibility (Red/Yellow/Green)"].isin( + ["RED"] + ) + ) - # Also include work without the SAP filter as optimistic - self.standardised_asset_list["non_intrusive_indicates_cavity_extraction_no_sap_filter"] = ( - extraction_wall_filter & ( - ~self.standardised_asset_list[self.ATTRIBUTE_SAP_THRESHOLD_AND_BELOW] - )) + self.standardised_asset_list["non_intrusive_indicates_cavity_extraction"] = ( + extraction_wall_filter & year_built_filter + ) elif self.old_format_non_intrusives_present: - print("Review these categories with Kieran") + print("Review these categories!!!!") extraction_wall_filter = ( self.standardised_asset_list['non-intrusives: WFT Findings'].str.lower().str.strip().isin( ["retro drilled", "retro filled", "fibre from build", "polybead", "retro drilled and filled", @@ -1324,12 +1301,6 @@ class AssetList: ) self.standardised_asset_list["non_intrusive_indicates_cavity_extraction"] = ( - extraction_wall_filter & ( - self.standardised_asset_list[self.ATTRIBUTE_SAP_THRESHOLD_AND_BELOW] - ) - ) - - self.standardised_asset_list["non_intrusive_indicates_cavity_extraction_no_sap_filter"] = ( extraction_wall_filter ) @@ -1337,13 +1308,6 @@ class AssetList: self.standardised_asset_list["non_intrusive_indicates_cavity_extraction"] = False self.standardised_asset_list["non_intrusive_indicates_cavity_extraction_no_sap_filter"] = False - # Adjust - self.standardised_asset_list["non_intrusive_indicates_cavity_extraction_no_sap_filter"] = np.where( - self.standardised_asset_list["non_intrusive_indicates_cavity_extraction"], - False, - self.standardised_asset_list["non_intrusive_indicates_cavity_extraction_no_sap_filter"] - ) - ###################################################### # Solar ###################################################### @@ -1351,8 +1315,12 @@ class AssetList: # Check 1: Does the property have a valid heating system? self.standardised_asset_list["solar_landlord_data_indicates_correct_heating_system"] = ( self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM].isin( - ["air source heat pump", "ground source heat pump", "high heat retention storage heaters", - "electric boiler"] + [ + "air source heat pump", + "ground source heat pump", + "high heat retention storage heaters", + "electric boiler" + ] ) ) self.standardised_asset_list["solar_landlord_data_indicates_needs_heating_upgrade"] = ( @@ -1435,8 +1403,6 @@ class AssetList: else: self.standardised_asset_list["solar_non_intrusives_walls_insulated"] = False - # TODO: We don't have information about the roof from this landlord - # We merge on the u-value for average thermal transmittance walls_uvalue_data = pd.DataFrame(cleaned["walls-description"]) walls_uvalue_data = walls_uvalue_data[ @@ -1454,22 +1420,16 @@ class AssetList: self.standardised_asset_list["solar_epc_walls_insulated"] = ( ( self.standardised_asset_list[ - self.EPC_API_DATA_NAMES[ - "walls-description"]].str.lower().str.contains( - "|".join( - self.EPC_INSULATED_WALLS_SUBSTRINGS) + self.EPC_API_DATA_NAMES["walls-description"]].str.lower().str.contains( + "|".join(self.EPC_INSULATED_WALLS_SUBSTRINGS) ) ) | ( - self.standardised_asset_list[ - "walls_u_value"].apply( - lambda x: x <= 0.7 if not pd.isnull(x) else False - ) + self.standardised_asset_list["walls_u_value"].apply(lambda x: x <= 0.7 if not pd.isnull(x) else False) ) ) # We merge on the u-value for average thermal transmittance - roof_roof_data = pd.DataFrame(cleaned["roof-description"]) - roof_roof_data = roof_roof_data[ + roof_roof_data = pd.DataFrame(cleaned["roof-description"])[ ["original_description", "thermal_transmittance", "is_pitched", "is_loft"] ].rename( columns={ @@ -1516,43 +1476,15 @@ class AssetList: self.standardised_asset_list["epc_has_floor_recommendation"].fillna(False) ) - # We merge on the u-value for average thermal transmittance - floors_uvalue_data = pd.DataFrame(cleaned["floor-description"]) - floors_uvalue_data = floors_uvalue_data[ - ~pd.isnull(floors_uvalue_data["thermal_transmittance"]) - ][["original_description", "thermal_transmittance"]].rename( - columns={ - "original_description": self.EPC_API_DATA_NAMES["floor-description"], - "thermal_transmittance": "floor_u_value" - } - ) - - # Merge on - self.standardised_asset_list = self.standardised_asset_list.merge( - floors_uvalue_data, how="left", on=self.EPC_API_DATA_NAMES["floor-description"] - ) - - # We assume that a U-value of 0.5 or below is indicative of an insulated floor - self.standardised_asset_list["solar_epc_floor_is_solid_no_recommendation"] = ( + # Check if the boiler is electric + # We check if it contains both the terms boiler & electric + self.standardised_asset_list["has_electric_boiler"] = ( ( - ( - self.standardised_asset_list[self.EPC_API_DATA_NAMES["floor-description"]].str - .lower().str.contains("solid") - ) & ( - ~self.standardised_asset_list["epc_has_floor_recommendation"] - ) & ( - # We do not utilise estimated EPCs for this method because we will always find that - # "epc_has_floor_recommendation" is False - (self.standardised_asset_list["estimated"] == False) - ) + self.standardised_asset_list[self.EPC_API_DATA_NAMES["mainheat-description"]] + .str.lower().isin( + ["boiler and radiators, electric"]) ) | ( - ( - self.standardised_asset_list[ - self.EPC_API_DATA_NAMES["floor-description"]].str.lower().str.contains("solid") - ) & ( - self.standardised_asset_list[self.EPC_API_DATA_NAMES["floor-description"]].str.lower() - .str.contains(", insulated") - ) + self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM] == "electric boiler" ) ) @@ -1563,7 +1495,8 @@ class AssetList: # Set up the filters to stop repetition correct_heating_system = ( self.standardised_asset_list["solar_landlord_data_indicates_correct_heating_system"] | - self.standardised_asset_list["solar_epc_data_indicates_correct_heating_system"] + self.standardised_asset_list["solar_epc_data_indicates_correct_heating_system"] | + self.standardised_asset_list["has_electric_boiler"] ) needs_heating_upgrade = ( @@ -1574,11 +1507,17 @@ class AssetList: # The requirements for walls are: # 1) walls are insulated # 2) property is a cavity (can be done insulated or not) + walls_meet_solar_requirements = ( + # The landlord is saying the walls are insulated self.standardised_asset_list["solar_landlord_walls_insulated"] | + # EPC data is saying the walls are insulated self.standardised_asset_list["solar_epc_walls_insulated"] | + # Non-intrusives are saying the walls are insulated self.standardised_asset_list["solar_non_intrusives_walls_insulated"] | + # It's empty cavity self.standardised_asset_list["cavity_is_empty"] | + # It's a cavity wall (self.standardised_asset_list[self.STANDARD_WALL_CONSTRUCTION].str.contains("cavity")) ) @@ -1586,24 +1525,12 @@ class AssetList: self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] != "flat" ) - self.standardised_asset_list["solar_eligible_solid_floor"] = ( - # Property isn't a flag - not_a_flat & - # Landlord data or EPC data indicates the heating system is appropriate - correct_heating_system & - # The property doesn't currently have solar - ~self.standardised_asset_list["property_has_solar"] & - # The walls are insulated - walls_meet_solar_requirements & - # Roof is insulated - self.standardised_asset_list["solar_epc_roof_insulated"] & - # Floor type check - self.standardised_asset_list["solar_epc_floor_is_solid_no_recommendation"] & - # SAP below threshold - self.standardised_asset_list[self.ATTRIBUTE_SAP_THRESHOLD_AND_BELOW] + solar_roof_meets_criteria = ( + self.standardised_asset_list["solar_epc_roof_insulated"] | + self.standardised_asset_list["solar_epc_loft_needs_topup"] ) - self.standardised_asset_list["solar_eligible_solid_floor_sap_above_threshold"] = ( + self.standardised_asset_list["solar_eligible"] = ( # Property isn't a flag not_a_flat & # Landlord data or EPC data indicates the heating system is appropriate @@ -1612,16 +1539,12 @@ class AssetList: ~self.standardised_asset_list["property_has_solar"] & # The walls are insulated walls_meet_solar_requirements & - # Roof is insulated - self.standardised_asset_list["solar_epc_roof_insulated"] & - # Floor type check - self.standardised_asset_list["solar_epc_floor_is_solid_no_recommendation"] & - # SAP above threshold - ~self.standardised_asset_list[self.ATTRIBUTE_SAP_THRESHOLD_AND_BELOW] + # Roof meets criteria + solar_roof_meets_criteria ) # With heating upgrade - self.standardised_asset_list["solar_eligible_solid_floor_needs_heating_upgrade"] = ( + self.standardised_asset_list["solar_eligible_needs_heating_upgrade"] = ( not_a_flat & # Needs heating upgrade needs_heating_upgrade & @@ -1629,322 +1552,81 @@ class AssetList: ~self.standardised_asset_list["property_has_solar"] & # The walls are insulated walls_meet_solar_requirements & - # Roof is insulated - self.standardised_asset_list["solar_epc_roof_insulated"] & - # Floor type check - self.standardised_asset_list["solar_epc_floor_is_solid_no_recommendation"] & - # SAP Below threshold - self.standardised_asset_list[self.ATTRIBUTE_SAP_THRESHOLD_AND_BELOW] - ) - # With heating upgrade, above threshold - self.standardised_asset_list["solar_eligible_solid_floor_needs_heating_upgrade_sap_above_threshold"] = ( - not_a_flat & - # Needs heating upgrade - needs_heating_upgrade & - # The property doesn't currently have solar - ~self.standardised_asset_list["property_has_solar"] & - # The walls are insulated - walls_meet_solar_requirements & - # Roof is insulated - self.standardised_asset_list["solar_epc_roof_insulated"] & - # Floor type check - self.standardised_asset_list["solar_epc_floor_is_solid_no_recommendation"] & - # SAP above threshold - ~self.standardised_asset_list[self.ATTRIBUTE_SAP_THRESHOLD_AND_BELOW] - ) - - # Because the EPC data can be contradictrory, we remove any overlap - self.standardised_asset_list["solar_eligible_solid_floor_needs_heating_upgrade"] = np.where( - self.standardised_asset_list["solar_eligible_solid_floor"], - False, - self.standardised_asset_list["solar_eligible_solid_floor_needs_heating_upgrade"] - ) - self.standardised_asset_list["solar_eligible_solid_floor_needs_heating_upgrade_sap_above_threshold"] = np.where( - self.standardised_asset_list["solar_eligible_solid_floor_sap_above_threshold"], - False, - self.standardised_asset_list["solar_eligible_solid_floor_needs_heating_upgrade_sap_above_threshold"] + # Roof meets criteria + solar_roof_meets_criteria ) # We shouldn't have an overlap if ( - self.standardised_asset_list["solar_eligible_solid_floor"] & - self.standardised_asset_list["solar_eligible_solid_floor_needs_heating_upgrade"] + self.standardised_asset_list["solar_eligible"] & + self.standardised_asset_list["solar_eligible_needs_heating_upgrade"] ).sum(): raise ValueError("Both heating upgrade and no heating upgrade are true - this should not be possible") - # Solid floor but needs a loft top-up - self.standardised_asset_list["solar_eligible_solid_floor_needs_loft"] = ( - not_a_flat & - # Landlord data or EPC data indicates the heating system is appropriate - correct_heating_system & - # The property doesn't currently have solar - ~self.standardised_asset_list["property_has_solar"] & - # The walls are insulated - walls_meet_solar_requirements & - # Roof is insulated - self.standardised_asset_list["solar_epc_loft_needs_topup"] & - # Check floor - self.standardised_asset_list["solar_epc_floor_is_solid_no_recommendation"] & - # SAP below threshold - self.standardised_asset_list[self.ATTRIBUTE_SAP_THRESHOLD_AND_BELOW] - ) - - # Solid floor, needs loft, above SAP thresold - self.standardised_asset_list["solar_eligible_solid_floor_needs_loft_sap_above_threshold"] = ( - not_a_flat & - # Landlord data or EPC data indicates the heating system is appropriate - correct_heating_system & - # The property doesn't currently have solar - ~self.standardised_asset_list["property_has_solar"] & - # The walls are insulated - walls_meet_solar_requirements & - # Roof is insulated - self.standardised_asset_list["solar_epc_loft_needs_topup"] & - # Check floor - self.standardised_asset_list["solar_epc_floor_is_solid_no_recommendation"] & - # SAP above threshold - ~self.standardised_asset_list[self.ATTRIBUTE_SAP_THRESHOLD_AND_BELOW] - ) - # Needs loft & heating - self.standardised_asset_list["solar_eligible_solid_floor_needs_loft_needs_heating_upgrade"] = ( - not_a_flat & - # Needs heating upgrade - needs_heating_upgrade & - # The property doesn't currently have solar - ~self.standardised_asset_list["property_has_solar"] & - # The walls are insulated - walls_meet_solar_requirements & - # Roof is insulated - self.standardised_asset_list["solar_epc_loft_needs_topup"] & - # Floor type - self.standardised_asset_list["solar_epc_floor_is_solid_no_recommendation"] & - # SAP below threshold - self.standardised_asset_list[self.ATTRIBUTE_SAP_THRESHOLD_AND_BELOW] - ) - - self.standardised_asset_list[ - "solar_eligible_solid_floor_needs_loft_needs_heating_upgrade_sap_above_threshold" - ] = ( - not_a_flat & - # Needs heating upgrade - needs_heating_upgrade & - # The property doesn't currently have solar - ~self.standardised_asset_list["property_has_solar"] & - # The walls are insulated - walls_meet_solar_requirements & - # Roof is insulated - self.standardised_asset_list["solar_epc_loft_needs_topup"] & - # Floor type - self.standardised_asset_list["solar_epc_floor_is_solid_no_recommendation"] & - # SAP above threshold - ~self.standardised_asset_list[self.ATTRIBUTE_SAP_THRESHOLD_AND_BELOW] - ) - - # Other floor type, fully insulated - self.standardised_asset_list["solar_eligible_other_floor"] = ( - not_a_flat & - # Landlord data or EPC data indicates the heating system is appropriate - correct_heating_system & - # The property doesn't currently have solar - ~self.standardised_asset_list["property_has_solar"] & - # The walls are insulated - walls_meet_solar_requirements & - # Roof is insulated - self.standardised_asset_list["solar_epc_roof_insulated"] & - # Floor type - ~self.standardised_asset_list["solar_epc_floor_is_solid_no_recommendation"] & - # SAP below threshold - self.standardised_asset_list[self.ATTRIBUTE_SAP_THRESHOLD_AND_BELOW] - ) - - self.standardised_asset_list["solar_eligible_other_floor_sap_above_threshold"] = ( - not_a_flat & - # Landlord data or EPC data indicates the heating system is appropriate - correct_heating_system & - # The property doesn't currently have solar - ~self.standardised_asset_list["property_has_solar"] & - # The walls are insulated - walls_meet_solar_requirements & - # Roof is insulated - self.standardised_asset_list["solar_epc_roof_insulated"] & - # Floor type - other types - ~self.standardised_asset_list["solar_epc_floor_is_solid_no_recommendation"] & - # SAP above threshold - ~self.standardised_asset_list[self.ATTRIBUTE_SAP_THRESHOLD_AND_BELOW] - ) - - # With heating upgrade - self.standardised_asset_list["solar_eligible_other_floor_needs_heating_upgrade"] = ( - not_a_flat & - # Needs heating upgrade - needs_heating_upgrade & - # The property doesn't currently have solar - ~self.standardised_asset_list["property_has_solar"] & - # The walls are insulated - walls_meet_solar_requirements & - # Roof is insulated - self.standardised_asset_list["solar_epc_roof_insulated"] & - # Other floor types - ~self.standardised_asset_list["solar_epc_floor_is_solid_no_recommendation"] & - # SAP below threshold - self.standardised_asset_list[self.ATTRIBUTE_SAP_THRESHOLD_AND_BELOW] - ) - # With heating upgrade, SAP above threshold - self.standardised_asset_list["solar_eligible_other_floor_needs_heating_upgrade_sap_above_threshold"] = ( - not_a_flat & - # Needs heating upgrade - needs_heating_upgrade & - # The property doesn't currently have solar - ~self.standardised_asset_list["property_has_solar"] & - # The walls are insulated - walls_meet_solar_requirements & - # Roof is insulated - self.standardised_asset_list["solar_epc_roof_insulated"] & - # Other floor types - ~self.standardised_asset_list["solar_epc_floor_is_solid_no_recommendation"] & - # SAP above threshold - ~self.standardised_asset_list[self.ATTRIBUTE_SAP_THRESHOLD_AND_BELOW] - ) - # Check for overlap - if ( - self.standardised_asset_list["solar_eligible_other_floor_needs_heating_upgrade"] & - self.standardised_asset_list["solar_eligible_other_floor_needs_heating_upgrade_sap_above_threshold"] - ).sum(): - raise ValueError("Both heating upgrade and no heating upgrade are true - this should not be possible") - - # Other floor type, needs loft top-up - self.standardised_asset_list["solar_eligible_other_floor_needs_loft"] = ( - not_a_flat & - # Landlord data or EPC data indicates the heating system is appropriate - correct_heating_system & - # The property doesn't currently have solar - ~self.standardised_asset_list["property_has_solar"] & - # The walls are insulated - walls_meet_solar_requirements & - # Roof need loft top-up - self.standardised_asset_list["solar_epc_loft_needs_topup"] & - # Other floor types - ~self.standardised_asset_list["solar_epc_floor_is_solid_no_recommendation"] & - # SAP below threshold - self.standardised_asset_list[self.ATTRIBUTE_SAP_THRESHOLD_AND_BELOW] - ) - # Other floor type, needs loft top-up, SAP above threshold - self.standardised_asset_list["solar_eligible_other_floor_needs_loft_sap_above_threshold"] = ( - not_a_flat & - # Landlord data or EPC data indicates the heating system is appropriate - correct_heating_system & - # The property doesn't currently have solar - ~self.standardised_asset_list["property_has_solar"] & - # The walls are insulated - walls_meet_solar_requirements & - # Roof need loft top-up - self.standardised_asset_list["solar_epc_loft_needs_topup"] & - # Other floor types - ~self.standardised_asset_list["solar_epc_floor_is_solid_no_recommendation"] & - # SAP above threshold - ~self.standardised_asset_list[self.ATTRIBUTE_SAP_THRESHOLD_AND_BELOW] - ) - - # With heating upgrade - self.standardised_asset_list["solar_eligible_other_floor_needs_loft_needs_heating_upgrade"] = ( - not_a_flat & - # Landlord data or EPC data indicates the heating system is appropriate - needs_heating_upgrade & - # The property doesn't currently have solar - ~self.standardised_asset_list["property_has_solar"] & - # The walls are insulated - walls_meet_solar_requirements & - # Roof need loft top-up - self.standardised_asset_list["solar_epc_loft_needs_topup"] & - # Other floor types - ~self.standardised_asset_list["solar_epc_floor_is_solid_no_recommendation"] & - # SAP below threshold - self.standardised_asset_list[self.ATTRIBUTE_SAP_THRESHOLD_AND_BELOW] - ) - - self.standardised_asset_list[ - "solar_eligible_other_floor_needs_loft_needs_heating_upgrade_sap_above_threshold" - ] = ( - not_a_flat & - # Landlord data or EPC data indicates the heating system is appropriate - needs_heating_upgrade & - # The property doesn't currently have solar - ~self.standardised_asset_list["property_has_solar"] & - # The walls are insulated - walls_meet_solar_requirements & - # Roof need loft top-up - self.standardised_asset_list["solar_epc_loft_needs_topup"] & - # Other floor types - ~self.standardised_asset_list["solar_epc_floor_is_solid_no_recommendation"] & - # SAP above threshold - ~self.standardised_asset_list[self.ATTRIBUTE_SAP_THRESHOLD_AND_BELOW] - ) - - # Check if the boiler is electric - # We check if it contains both the terms boiler & electric - has_electric_boiler = ( - ( - self.standardised_asset_list[self.EPC_API_DATA_NAMES["mainheat-description"]] - .str.lower().isin( - ["boiler and radiators, electric", "boiler and underfloor heating, electric"]) - ) | ( - self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM] == "electric boiler" - ) - ) - # We check for a specific sub-set of properties which are uninsulated solid wall properties that are EPC E # or below (we'll use 57 as a threshold) - These are for a pilot with Net Zero Renewables self.standardised_asset_list["solar_eligible_solid_wall_uninsulated"] = ( not_a_flat & # Landlord data or EPC data indicates the heating system is appropriate - in this case, we can also take # electric boilers - (correct_heating_system | has_electric_boiler) & + correct_heating_system & # The property doesn't currently have solar ~self.standardised_asset_list["property_has_solar"] & - # The walls are uninsulated solic + # The walls are uninsulated solid ~walls_meet_solar_requirements & (self.standardised_asset_list[self.EPC_API_DATA_NAMES["current-energy-efficiency"]] <= 57) ) # Drop anything we don't need self.standardised_asset_list = self.standardised_asset_list.drop( - columns=["walls_u_value", "roof_u_value", "floor_u_value"] + columns=["walls_u_value", "roof_u_value"] ) # Adjust flagged extraction jobs to remove anything for solar self.standardised_asset_list["non_intrusive_indicates_cavity_extraction"] = ( self.standardised_asset_list["non_intrusive_indicates_cavity_extraction"] & - ~self.standardised_asset_list["solar_eligible_solid_floor"] & - ~self.standardised_asset_list["solar_eligible_solid_floor_needs_loft"] - # ~self.standardised_asset_list["solar_eligible_other_floor"] & - # ~self.standardised_asset_list["solar_eligible_other_floor_needs_loft"] + ~self.standardised_asset_list["solar_eligible"] ) # Finally, we note why each property has been flagged self.standardised_asset_list["cavity_reason"] = None + empty_cavity_map = { + "non_intrusive_indicates_empty_cavity": "Non-Intrusive Data Shows Empty Cavity: ", + "non_intrusive_indicates_empty_cavity_has_solar": "Non-Intrusive Data Shows Empty Cavity - property " + "already has solar: ", + "non_intrusive_indicates_empty_cavity_no_year_filter": f"Non-Intrusive Data Shows Empty Cavity, " + f"built after {self.EMPTY_CAVITY_YEAR_THRESHOLD}: ", + + } + for variable, description in empty_cavity_map.items(): + self.standardised_asset_list["cavity_reason"] = np.where( + self.standardised_asset_list[variable] & + pd.isnull(self.standardised_asset_list["cavity_reason"]), + description + self.standardised_asset_list["SAP Category"], + self.standardised_asset_list["cavity_reason"] + ) + + # We break the cavity reason into a few different categories, when the EPC is different from inspections self.standardised_asset_list["cavity_reason"] = np.where( - self.standardised_asset_list["non_intrusive_indicates_empty_cavity"], - "Non-Intrusive Data Showed Empty Cavity", - self.standardised_asset_list["cavity_reason"] - ) - self.standardised_asset_list["cavity_reason"] = np.where( - self.standardised_asset_list["non_intrusive_indicates_empty_cavity_has_solar"] & - pd.isnull(self.standardised_asset_list["cavity_reason"]), - "Non-Intrusive Data Showed Empty Cavity - property already has solar", - self.standardised_asset_list["cavity_reason"] - ) - self.standardised_asset_list["cavity_reason"] = np.where( - self.standardised_asset_list["non_intrusive_indicates_empty_cavity_no_sap_filter"] & - pd.isnull(self.standardised_asset_list["cavity_reason"]), - "Non-Intrusive Data Showed Empty Cavity but all SAP scores allowed", + ( + self.standardised_asset_list["epc_indicates_empty_cavity"] & + ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] & + (self.standardised_asset_list['non-intrusives: Insulated'] == "RETRO DRILLED") & + pd.isnull(self.standardised_asset_list["cavity_reason"]) + ), + "EPC Shows Empty Cavity, inspections show retro drilled: " + self.standardised_asset_list["SAP Category"], self.standardised_asset_list["cavity_reason"] ) self.standardised_asset_list["cavity_reason"] = np.where( - self.standardised_asset_list["non_intrusive_indicates_empty_cavity_no_sap_filter_no_year_filter"] & - pd.isnull(self.standardised_asset_list["cavity_reason"]), - "Non-Intrusive Data Showed Empty Cavity but all SAP scores and year built allowed", + ( + self.standardised_asset_list["epc_indicates_empty_cavity"] & + ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] & + (self.standardised_asset_list['non-intrusives: Insulated'] == "FILLED AT BUILD") & + pd.isnull(self.standardised_asset_list["cavity_reason"]) + ), + "EPC Shows Empty Cavity, inspections show filled at build: " + self.standardised_asset_list["SAP Category"], self.standardised_asset_list["cavity_reason"] ) @@ -1954,19 +1636,12 @@ class AssetList: ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] & pd.isnull(self.standardised_asset_list["cavity_reason"]) ), - "EPC Data Showed Empty Cavity", + "EPC Shows Empty Cavity, inspections show non-cavity build: " + self.standardised_asset_list[ + "SAP Category"], self.standardised_asset_list["cavity_reason"] ) - self.standardised_asset_list["cavity_reason"] = np.where( - ( - self.standardised_asset_list["epc_indicates_empty_cavity_no_sap_filter"] & - ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity_no_sap_filter"] & - pd.isnull(self.standardised_asset_list["cavity_reason"]) - ), - "EPC Data Showed Empty Cavity but all SAP scores allowed", - self.standardised_asset_list["cavity_reason"] - ) - # Landlord data + # Landlord data: The landlord's data indicates that the wall is an uninsulated cavity wall, but EPC and + # inspections show filled self.standardised_asset_list["cavity_reason"] = np.where( ( self.standardised_asset_list["landlord_data_indicates_empty_cavity"] & @@ -1974,35 +1649,18 @@ class AssetList: ~self.standardised_asset_list["epc_indicates_empty_cavity"] & pd.isnull(self.standardised_asset_list["cavity_reason"]) ), - "Landlord Data Showed Empty Cavity", + "Landlord Data Shows Empty Cavity, EPC & Inspections Shows Filled: " + self.standardised_asset_list[ + "SAP Category"], self.standardised_asset_list["cavity_reason"] ) - self.standardised_asset_list["cavity_reason"] = np.where( - ( - self.standardised_asset_list["landlord_data_indicates_empty_cavity_no_sap_filter"] & - ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity_no_sap_filter"] & - ~self.standardised_asset_list["epc_indicates_empty_cavity_no_sap_filter"] & - pd.isnull(self.standardised_asset_list["cavity_reason"]) - ), - "Landlord Data Showed Empty Cavity but all SAP scores allowed", - self.standardised_asset_list["cavity_reason"], - ) + # Flag extraction self.standardised_asset_list["cavity_reason"] = np.where( ( self.standardised_asset_list["non_intrusive_indicates_cavity_extraction"] & pd.isnull(self.standardised_asset_list["cavity_reason"]) ), - "Non-Intrusive Data Showed Cavity Extraction", - self.standardised_asset_list["cavity_reason"] - ) - # extraction no sap filter - self.standardised_asset_list["cavity_reason"] = np.where( - ( - self.standardised_asset_list["non_intrusive_indicates_cavity_extraction_no_sap_filter"] & - pd.isnull(self.standardised_asset_list["cavity_reason"]) - ), - "Non-Intrusive Data Showed Cavity Extraction but all SAP scores allowed", + "Non-Intrusive Data Shows Cavity Extraction: " + self.standardised_asset_list["SAP Category"], self.standardised_asset_list["cavity_reason"] ) @@ -2013,47 +1671,17 @@ class AssetList: # Map of variables and fill values for the solar_reason variable solar_reason_map = { - "solar_eligible_solid_floor": "Solar Eligible, Solid Floor", - "solar_eligible_solid_floor_sap_above_threshold": "Solar Eligible, Solid Floor, SAP Above Threshold", - "solar_eligible_solid_floor_needs_heating_upgrade": ( - "Solar Eligible, Solid Floor, Needs Heating Upgrade" + "solar_eligible": "Solar Eligible: ", + "solar_eligible_needs_heating_upgrade": ( + "Solar Eligible, Solid Floor, Needs Heating Upgrade: " ), - "solar_eligible_solid_floor_needs_heating_upgrade_sap_above_threshold": ( - "Solar Eligible, Solid Floor, Needs Heating Upgrade, SAP Above Threshold" - ), - "solar_eligible_solid_floor_needs_loft": "Solar Eligible, Solid Floor, Needs Loft", - "solar_eligible_solid_floor_needs_loft_sap_above_threshold": ( - "Solar Eligible, Solid Floor, Needs Loft, SAP Above Threshold" - ), - "solar_eligible_solid_floor_needs_loft_needs_heating_upgrade": ( - "Solar Eligible, Solid Floor, Needs Loft, Needs Heating Upgrade" - ), - "solar_eligible_solid_floor_needs_loft_needs_heating_upgrade_sap_above_threshold": ( - "Solar Eligible, Solid Floor, Needs Loft, Needs Heating Upgrade, SAP Above Threshold" - ), - "solar_eligible_other_floor": "Solar Eligible, Other Floor", - "solar_eligible_other_floor_sap_above_threshold": "Solar Eligible, Other Floor, SAP Above Threshold", - "solar_eligible_other_floor_needs_heating_upgrade": "Solar Eligible, Other Floor, Needs Heating Upgrade", - "solar_eligible_other_floor_needs_heating_upgrade_sap_above_threshold": ( - "Solar Eligible, Other Floor, Needs Heating Upgrade, SAP Above Threshold" - ), - "solar_eligible_other_floor_needs_loft": "Solar Eligible, Other Floor, Needs Loft", - "solar_eligible_other_floor_needs_loft_sap_above_threshold": ( - "Solar Eligible, Other Floor, Needs Loft, SAP Above Threshold" - ), - "solar_eligible_other_floor_needs_loft_needs_heating_upgrade": ( - "Solar Eligible, Other Floor, Needs Loft, Needs Heating Upgrade" - ), - "solar_eligible_other_floor_needs_loft_needs_heating_upgrade_sap_above_threshold": ( - "Solar Eligible, Other Floor, Needs Loft, Needs Heating Upgrade, SAP Above Threshold" - ), - "solar_eligible_solid_wall_uninsulated": "Solar Eligible, Solid Wall Uninsulated, EPC E or Below", + "solar_eligible_solid_wall_uninsulated": "Solar Eligible, Solid Wall Uninsulated, EPC E or Below: ", } for variable, reason in solar_reason_map.items(): self.standardised_asset_list["solar_reason"] = np.where( self.standardised_asset_list[variable], - reason, + reason + self.standardised_asset_list["SAP Category"], self.standardised_asset_list["solar_reason"] ) diff --git a/asset_list/app.py b/asset_list/app.py index 67e18dac..ae4b3cef 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -10,6 +10,7 @@ from asset_list.mappings.built_form import BUILT_FORM_MAPPINGS from asset_list.mappings.walls import WALL_CONSTRUCTION_MAPPINGS from asset_list.mappings.heating_systems import HEATING_MAPPINGS from asset_list.mappings.exising_pv import EXISTING_PV_MAPPINGS +from asset_list.mappings.roof import ROOF_CONSTRUCTION_MAPPINGS from asset_list.utils import get_data from dotenv import load_dotenv @@ -88,6 +89,63 @@ def app(): # - We want: fully insulated property (all wall types), EPC D or below (floors should be solid) # - Or the insulation required is loft/cavity (floors should be solid) + # Torus + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Torus/Phase 1" + data_filename = "Torus Property Asset List - Phase 1.xlsx" + sheet_name = "TORUS" + postcode_column = 'Postcode' + fulladdress_column = None + address1_column = "AddressLine1" + address1_method = None + address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"] + missing_postcodes_method = None + landlord_year_built = "Property Age" + landlord_os_uprn = "NatUPRN" + landlord_property_type = "Property Type" + landlord_built_form = "Built Form" + landlord_wall_construction = "Wall Construction" + landlord_roof_construction = "Roof Construction" + landlord_heating_system = "Space Heating Source" + landlord_existing_pv = "Low Carbon Technology (Solar PV)" + landlord_property_id = "UPRN" + landlord_sap = "SAP Score" + outcomes_filename = None + outcomes_sheetname = None + outcomes_postcode = None + outcomes_houseno = None + outcomes_id = None + outcomes_address = None + master_filepaths = [] + master_to_asset_list_filepath = None + phase = True + + # Ealing - houses + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing" + data_filename = "Ealing_rechecked_cleaned_05042025.csv" + sheet_name = None + postcode_column = 'Postcode' + fulladdress_column = "Address" + address1_column = None + address1_method = "house_number_extraction" + address_cols_to_concat = [] + missing_postcodes_method = None + landlord_year_built = "Year Built" + landlord_os_uprn = None + landlord_property_type = "Property Type Code" + landlord_built_form = None + landlord_wall_construction = None + landlord_heating_system = None + landlord_existing_pv = None + landlord_property_id = "Property ref" + outcomes_filename = None + outcomes_sheetname = None + outcomes_postcode = None + outcomes_houseno = None + outcomes_id = None + outcomes_address = None + master_filepaths = [] + master_to_asset_list_filepath = None + # Southern Midlands data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Southern/Midlands Properties - Apr 2025" data_filename = "Southern Housing Midlands Property List - combined.xlsx" @@ -446,8 +504,11 @@ def app(): landlord_property_type=landlord_property_type, landlord_built_form=landlord_built_form, landlord_wall_construction=landlord_wall_construction, + landlord_roof_construction=landlord_roof_construction, landlord_heating_system=landlord_heating_system, - landlord_existing_pv=landlord_existing_pv + landlord_existing_pv=landlord_existing_pv, + landlord_sap=landlord_sap, + phase=phase ) asset_list.init_standardise() @@ -486,6 +547,13 @@ def app(): ).items() if k not in EXISTING_PV_MAPPINGS } + new_roof_construction_map = { + k: v for k, v in ( + asset_list.variable_mappings[asset_list.landlord_roof_construction] if + asset_list.landlord_roof_construction else {} + ).items() + if k not in ROOF_CONSTRUCTION_MAPPINGS + } asset_list.apply_standardiation() @@ -511,7 +579,7 @@ def app(): epc_api_only = False force_retrieve_data = False skip = None # Used to skip already completed chunks - chunk_size = 5000 + chunk_size = 1000 filename = "Chunk {i}.csv" download_folder = os.path.join(data_folder, "Chunks") if not os.path.exists(download_folder): @@ -529,8 +597,6 @@ def app(): if any(x in folder_contents for x in downloaded_files): skip = max([i for i in chunk_indexes if filename.format(i=i) in folder_contents]) - # folder_contents = [f for f in folder_contents if "nodata" not in f and f.endswith(".csv")] - for i in range(0, len(asset_list.standardised_asset_list), chunk_size): print(f"Processing chunk {i} to {i + chunk_size}") if skip is not None and not force_retrieve_data: diff --git a/asset_list/mappings/built_form.py b/asset_list/mappings/built_form.py index aad36fce..cabd970e 100644 --- a/asset_list/mappings/built_form.py +++ b/asset_list/mappings/built_form.py @@ -80,5 +80,32 @@ BUILT_FORM_MAPPINGS = { 'House: MidTerrace': 'mid-terrace', 'House: EndTerrace': 'end-terrace', 'Bungalow: EndTerrace': 'end-terrace', - 'Bungalow: MidTerrace': 'mid-terrace' + 'Bungalow: MidTerrace': 'mid-terrace', + 'Flat: Semi Detached: Mid Floor': 'semi-detached', + 'Maisonette: Mid Terrace: Top Floor': 'mid-terrace', + 'Flat: Enclosed Mid Terrace: Mid Floor': 'mid-terrace', + 'Flat: Enclosed Mid Terrace: Ground Floor': 'mid-terrace', + 'Flat: Detached: Ground Floor': 'detached', + 'Flat: Detached: Mid Floor': 'detached', + 'Flat: Detached: Top Floor': 'detached', + 'Flat: Enclosed End Terrace: Mid Floor': 'end-terrace', + 'Bungalow: Detached': 'detached', + 'Maisonette: End Terrace: Mid Floor': 'end-terrace', + 'Maisonette: Detached: Top Floor': 'detached', + 'Flat: Enclosed End Terrace: Ground Floor': 'end-terrace', + 'Flat: Enclosed Mid Terrace: Top Floor': 'mid-terrace', + 'House: EnclosedEndTerrace': 'end-terrace', + '3 Ext. Wall Flat': 'semi-detached', + 'Bungalow Detached': 'detached', + 'Bungalow End Terrace': 'end-terrace', + 'Bungalow Mid Terrace': 'mid-terrace', + 'Bungalow Semi Detached': 'detached', + 'Maisonette 2 Ext. Wall': 'mid-terrace', + 'Maisonette 3 Ext. Wall': 'semi-detached', + 'End-terrace': 'end-terrace', + 'Mid-terrace': 'mid-terrace', + 'Semi-detached': 'semi-detached', + 'Detached': 'detached', + 'Flat / maisonette': 'unknown', + '2014 onwards': 'unknown' } diff --git a/asset_list/mappings/exising_pv.py b/asset_list/mappings/exising_pv.py index 06e77bba..51f5f922 100644 --- a/asset_list/mappings/exising_pv.py +++ b/asset_list/mappings/exising_pv.py @@ -1,3 +1,5 @@ +import numpy as np + STANDARD_EXISTING_PV = { "already has PV", "no PV", "unknown" } @@ -9,4 +11,10 @@ EXISTING_PV_MAPPINGS = { "yes": "already has PV", True: "already has PV", False: "no PV", + np.nan: 'unknown', + 'PV: 2kWp array': 'already has PV', + 'PV: 25% roof area, PV: 3.6kWp array': 'already has PV', + 'PV: 10% roof area, PV: 2kWp array': 'already has PV', + 'PV: 50% roof area': 'already has PV', + 'Solar PV': 'already has PV' } diff --git a/asset_list/mappings/heating_systems.py b/asset_list/mappings/heating_systems.py index 714f5434..42326575 100644 --- a/asset_list/mappings/heating_systems.py +++ b/asset_list/mappings/heating_systems.py @@ -21,7 +21,9 @@ STANDARD_HEATING_SYSTEMS = { 'oil fuel', 'solid fuel', 'gas combi boiler', - 'unknown' + 'unknown', + "electric ceiling", + "electric underfloor" } HEATING_MAPPINGS = { @@ -143,5 +145,30 @@ HEATING_MAPPINGS = { 'Boiler: A rated Regular Boiler Electricity: Electricity': 'electric boiler', 'Community Heating Systems: Community boilers only (RdSAP) Gas: Mains Gas (Community)': 'communal gas boiler', 'Boiler: A rated Combi Gas: Mains Gas': 'gas condensing combi', - 'Boiler: A rated CPSU Electricity: Electricity': 'electric boiler' + 'Boiler: A rated CPSU Electricity: Electricity': 'electric boiler', + 'Heat Pump: Electric Heat pumps: Ground source heat pump with flow temperature <= 35°C': 'ground source heat pump', + 'Heat Pump: Electric Heat pumps: Ground source heat pump in other cases': 'ground source heat pump', + 'Electric Storage Systems: High heat retention storage heaters': 'high heat retention storage heaters', + 'Heat Pump: Electric Heat pumps: Air source heat pump with flow temperature <= 35°C': 'air source heat pump', + 'Electric (direct acting) room heaters: Panel, convector or radiant heaters': 'room heaters', + 'Boiler: C rated Combi': 'gas combi boiler', + 'Boiler: B rated Regular Boiler': 'gas condensing boiler', + 'Boiler: E rated Combi': 'gas combi boiler', + 'Boiler: A rated Combi': 'gas combi boiler', + 'Boiler: E rated Regular Boiler': 'gas condensing boiler', + 'Community Heating Systems: Community boilers only (RdSAP)': 'district heating', + 'Boiler: C rated Regular Boiler': 'gas condensing boiler', + 'Boiler: A rated Regular Boiler': 'gas condensing boiler', + 'Electric Storage Systems: Fan storage heaters': 'electric storage heaters', + 'Boiler: F rated Combi': 'gas combi boiler', + + 'Room heaters': 'room heaters', + 'Room Heaters': 'room heaters', + 'Boiler': 'gas condensing boiler', + 'Heat Pump (Wet)': 'air source heat pump', + 'Community Heating': 'district heating', + 'Heat pump (wet)': 'air source heat pump', + 'Electric ceiling heating': 'electric ceiling', + 'Electric under floor heating': 'electric underfloor', + 'Community heating': 'district heating' } diff --git a/asset_list/mappings/property_type.py b/asset_list/mappings/property_type.py index 139b1622..f208081a 100644 --- a/asset_list/mappings/property_type.py +++ b/asset_list/mappings/property_type.py @@ -136,5 +136,20 @@ PROPERTY_MAPPING = { 'Flat: Semi Detached: Top Floor': 'flat', 'Flat: Mid Terrace: Ground Floor': 'flat', 'Bungalow: MidTerrace': 'bungalow', - 'Flat: Enclosed End Terrace: Top Floor': 'flat' + 'Flat: Enclosed End Terrace: Top Floor': 'flat', + 'Flat: Semi Detached: Mid Floor': 'flat', + 'Maisonette: Mid Terrace: Top Floor': 'maisonette', + 'House: EnclosedEndTerrace': 'house', + 'Flat: Detached: Ground Floor': 'flat', + 'Flat: Detached: Mid Floor': 'flat', + 'Flat: Detached: Top Floor': 'flat', + 'Bungalow: Detached': 'bungalow', + 'Maisonette: End Terrace: Mid Floor': 'maisonette', + 'Maisonette: Detached: Top Floor': 'maisonette', + 'Flat: Enclosed Mid Terrace: Mid Floor': 'flat', + 'Flat: Enclosed Mid Terrace: Ground Floor': 'flat', + 'Flat: Enclosed End Terrace: Mid Floor': 'flat', + 'Flat: Enclosed End Terrace: Ground Floor': 'flat', + 'Flat: Enclosed Mid Terrace: Top Floor': 'flat', + '2013 onwards': 'unknown' } diff --git a/asset_list/mappings/roof.py b/asset_list/mappings/roof.py new file mode 100644 index 00000000..b98a773c --- /dev/null +++ b/asset_list/mappings/roof.py @@ -0,0 +1,26 @@ +import numpy as np + +STANDARD_ROOF_CONSTRUCTIONS = { + "pitched access to loft", + "pitched no access to loft", + "pitched unknown access to loft", + "piched unknown insulation", + "pitched insulated", + "another dwelling above", + "flat unknown insulation", + "unknown insulated", + "unknown", +} + +ROOF_CONSTRUCTION_MAPPINGS = { + 'Flat': 'flat unknown insulation', + 'Pitched (access to loft)': 'pitched access to loft', + 'Pitched (no access to loft)': 'pitched no access to loft', + 'Another dwelling above': 'another dwelling above', + 'Same dwelling above': 'another dwelling above', + 'As-built': 'unknown', + 'ND (inferred)': 'unknown', + '2018 onwards': 'unknown', + 'Pitched (vaulted ceiling)': 'pitched insulated', + np.nan: "unknown" +} diff --git a/asset_list/mappings/walls.py b/asset_list/mappings/walls.py index e5f22f13..128e84af 100644 --- a/asset_list/mappings/walls.py +++ b/asset_list/mappings/walls.py @@ -147,5 +147,15 @@ WALL_CONSTRUCTION_MAPPINGS = { 'Cavity: AsBuilt (1983-1995), Cavity: FilledCavity': 'filled cavity', 'SolidBrick: AsBuilt': 'solid brick unknown insulation', 'Cavity: FilledCavity': 'filled cavity', - 'SolidBrick: Internal': 'insulated solid brick' + 'SolidBrick: Internal': 'insulated solid brick', + 'Cavity: External': 'filled cavity', + 'Sandstone: Internal': 'sandstone or limestone', + 'Cavity: AsBuilt (Pre 1976)': 'cavity unknown insulation', + 'System build': 'system built', + 'Solid brick': 'solid brick unknown insulation', + 'Stone': 'sandstone or limestone', + 'Timber frame': 'timber frame unknown insulation', + '2017 onwards': 'new build - average thermal transmittance', + 'ND (inferred)': 'unknown', + 'Flat / maisonette': 'other' } diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py index 2b3f0c02..96b7c5de 100644 --- a/backend/SearchEpc.py +++ b/backend/SearchEpc.py @@ -755,6 +755,10 @@ class SearchEpc: "photo-supply"] ) + estimated_epc["co2-emiss-curr-per-floor-area"] = ( + estimated_epc["co2-emissions-current"] / estimated_epc["total-floor-area"] + ) + estimated_epc["postcode"] = self.postcode if not self.uprn: # Update self.uprn too diff --git a/etl/customers/remote_assessments/app.py b/etl/customers/remote_assessments/app.py index a4d60d85..7e15c1f4 100644 --- a/etl/customers/remote_assessments/app.py +++ b/etl/customers/remote_assessments/app.py @@ -4,7 +4,7 @@ from dotenv import load_dotenv from utils.s3 import save_csv_to_s3 from etl.find_my_epc.AssetListEpcData import AssetListEpcData -PORTFOLIO_ID = 138 +PORTFOLIO_ID = 140 USER_ID = 8 load_dotenv(dotenv_path="backend/.env") @@ -19,14 +19,17 @@ def app(): asset_list = [ { - "address": "42 Rippolson Road", - "postcode": "SE18 1NS", - "uprn": 100020999275, + "address": "Brow Cottage", + "postcode": "YO18 7PZ", + "uprn": 10007630752, + "property_type": "House", + "built_form": "Semi-Detached", + "patch": True }, { - "address": "66 Riverdale Road", - "postcode": "DA8 1PX", - "uprn": 100020235516 + "address": "Wyburn", + "postcode": "DT1 2LL", + "uprn": 100040630290 }, ] asset_list = pd.DataFrame(asset_list) @@ -46,6 +49,7 @@ def app(): ) asset_list_epc_client.get_data() asset_list_epc_client.get_non_invasive_recommendations() + asset_list_epc_client.get_patch() # Store non-invasive recommendations in S3 non_invasive_recommendations_filename = f"{USER_ID}/{PORTFOLIO_ID}/non_invasive_recommendations.csv" @@ -55,14 +59,24 @@ def app(): file_name=non_invasive_recommendations_filename ) + # Store patches in S3 + patches_filename = "" + if asset_list_epc_client.patches: + patches_filename = f"{USER_ID}/{PORTFOLIO_ID}/patches.csv" + save_csv_to_s3( + dataframe=pd.DataFrame(asset_list_epc_client.patches), + bucket_name="retrofit-plan-inputs-dev", + file_name=patches_filename + ) + valuation_data = [ { "valuation": 469_000, - "uprn": 100020999275, + "uprn": 10007630752, }, { - "valuation": 382_000, - "uprn": 100020235516 + "valuation": 373_000, + "uprn": 100040630290 }, ] # Store valuation data to s3 @@ -80,7 +94,7 @@ def app(): "goal_value": "C", "trigger_file_path": filename, "already_installed_file_path": "", - "patches_file_path": "", + "patches_file_path": patches_filename, "non_invasive_recommendations_file_path": non_invasive_recommendations_filename, "valuation_file_path": valuation_filename, "scenario_name": "Full package remote assessment", diff --git a/etl/find_my_epc/AssetListEpcData.py b/etl/find_my_epc/AssetListEpcData.py index 1d2e1472..f085c8fb 100644 --- a/etl/find_my_epc/AssetListEpcData.py +++ b/etl/find_my_epc/AssetListEpcData.py @@ -26,6 +26,7 @@ class AssetListEpcData: self.extracted_data = None self.non_invasive_recommendations = None + self.patches = None @staticmethod def check_asset_list(asset_list): @@ -52,6 +53,21 @@ class AssetListEpcData: } for r in self.extracted_data ] + def get_patch(self): + """ + + :return: + """ + if self.extracted_data is None: + raise ValueError("extracted data is missing - run get_data first") + + self.patches = [ + { + "uprn": r.get("uprn"), + **r.get("patch") + } for r in self.extracted_data if r.get("patch") + ] + def get_data(self): logger.info("Retrieving data for given asset list") @@ -67,11 +83,18 @@ class AssetListEpcData: postcode=pc, uprn=home.get("uprn"), auth_token=self.epc_auth_token, - os_api_key="" + os_api_key="", ) + epc_searcher.ordnance_survey_client.property_type = home.get("property_type") + epc_searcher.ordnance_survey_client.built_form = home.get("built_form") epc_searcher.find_property(skip_os=True) + if epc_searcher.newest_epc is None: continue + + if not pd.isnull(home.get("patch")): + epc_searcher.newest_epc["address1"] = add1 + # Attempt both methods: try: find_epc_searcher = RetrieveFindMyEpc( @@ -89,14 +112,22 @@ class AssetListEpcData: time.sleep(0.5) # We need uprn - extracted_data.append( - { - "uprn": home.get("uprn"), - "address": home["address"], - "postcode": home["postcode"], - **find_epc_data, + to_append = { + "uprn": home.get("uprn"), + "address": home["address"], + "postcode": home["postcode"], + **find_epc_data, + } + if not pd.isnull(home.get("patch")): + to_append["patch"] = { + "current-energy-rating": find_epc_data["current_epc_rating"], + "current-energy-efficiency": find_epc_data["current_epc_efficiency"], + "potential-energy-rating": find_epc_data["potential_epc_rating"], + "potential-energy-efficiency": find_epc_data["potential_epc_efficiency"], + **find_epc_data["epc_data"] } - ) + + extracted_data.append(to_append) self.extracted_data = extracted_data logger.info("Data Extrction complete") diff --git a/etl/find_my_epc/RetrieveFindMyEpc.py b/etl/find_my_epc/RetrieveFindMyEpc.py index 5e05d56f..86c3fda1 100644 --- a/etl/find_my_epc/RetrieveFindMyEpc.py +++ b/etl/find_my_epc/RetrieveFindMyEpc.py @@ -1,3 +1,4 @@ +import re import pandas as pd import requests from bs4 import BeautifulSoup @@ -45,6 +46,85 @@ class RetrieveFindMyEpc: sources = {item.get_text(strip=True): True for item in energy_list.find_all("li")} return sources + @staticmethod + def get_text(elem): + return elem.get_text(strip=True) if elem else None + + def extract_epc_data(self, soup): + + results = {} + + # 1. Total floor area + results['total-floor-area'] = int(self.get_text( + soup.find("dt", string="Total floor area").find_next_sibling("dd") + ).split(" ")[0]) + + # Table with features + rows = soup.select("table.govuk-table tbody tr") + + rating_map = { + "Very poor": "Very Poor", + "Very good": "Very Good" + } + + def get_feature_row_text(feature_name, index=0): + matches = [row for row in rows if row.find("th") and feature_name in row.find("th").text] + if len(matches) > index: + cells = matches[index].find_all("td") + description = self.get_text(cells[0]) + rating = self.get_text(cells[1]) + return description, rating_map.get(rating, rating) + return None, None + + # 2-3. First wall description and rating + results['walls-description'], results['walls-energy-eff'] = get_feature_row_text("Wall", 0) + + # 4-5. First roof description and rating + results['roof-description'], results['roof-energy-eff'] = get_feature_row_text("Roof", 0) + + # 6-7. Windows description and rating + results['windows-description'], results['windows-energy-eff'] = get_feature_row_text("Window") + + # 8-9. Main heating description and rating + results['mainheat-description'], results['mainheat-energy-eff'] = get_feature_row_text("Main heating") + + # 10-11. Main heating control description and rating + results['mainheatcont-description'], results['mainheatc-energy-eff'] = get_feature_row_text( + "Main heating control" + ) + + # 12-13. Hot water description and rating + results['hotwater-description'], results['hot-water-energy-ef'] = get_feature_row_text("Hot water") + + # 14-15. Lighting description and rating + results['lighting-description'], results['lighting-energy-eff'] = get_feature_row_text("Lighting") + + # 16. Floor description + results['floor-description'], _ = get_feature_row_text("Floor") + + # 17. Secondary heating description + results['secondheat-description'], _ = get_feature_row_text("Secondary heating") + + # 18. Primary energy use + p_energy = soup.find(string=lambda t: "primary energy use for this property per year" in t.lower()) + # We should always have this + match = re.search(r"(\d+)\s+kilowatt", p_energy) + results['energy-consumption-current'] = int(match.group(1)) if match else None + + # 19. Current CO2 emissions + co2_now = soup.find("dd", id="eir-property-produces") + # We should always have this + match = re.search(r"([\d.]+)", co2_now.text) + results['co2-emissions-current'] = float(match.group(1)) if match else None + # Need co2-emiss-curr-per-floor-area + + # 20. Potential CO2 emissions + co2_pot = soup.find("dd", id="eir-potential-production") + match = re.search(r"([\d.]+)", co2_pot.text) + results['co2-emissions-potential'] = float(match.group(1)) if match else None + + return results + def retrieve_newest_find_my_epc_data(self, sap_2012_date=None): """ For a post code and address, we pull out all the required data from the find my epc website @@ -115,6 +195,9 @@ class RetrieveFindMyEpc: potential_rating = ratings.split(".")[1] current_sap = int(current_rating.split(' ')[-1]) + # Floor area + address_res.find() + # Retrieve the energy consumption bills = address_res.find('div', {'id': 'bills-affected'}) bills_list = bills.find_all('li') @@ -232,6 +315,9 @@ class RetrieveFindMyEpc: # 4) Low and zero carbon energy sources low_carbon_energy_sources = self.extract_low_carbon_sources(address_res) + # 5) Pull out the EPC data + epc_data = self.extract_epc_data(address_res) + resulting_data = { 'epc_certificate': epc_certificate, 'current_epc_rating': current_rating.split(' ')[-6], @@ -241,8 +327,9 @@ class RetrieveFindMyEpc: "heating_text": heating_text, "hot_water_text": hot_water_text, "recommendations": recommendations, + "epc_data": epc_data, **assessment_data, - **low_carbon_energy_sources + **low_carbon_energy_sources, } return resulting_data