diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py index 74469c63..5f4436b8 100644 --- a/asset_list/AssetList.py +++ b/asset_list/AssetList.py @@ -21,6 +21,8 @@ from recommendations.recommendation_utils import ( estimate_number_of_floors ) +from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes + logger = setup_logger() # OpenAI API Key (set this in your environment variables for security) @@ -279,9 +281,19 @@ class AssetList: "Any further surveyor notes", 'Surveyors Name' ] + # This SAP threshold is a key search criteria for properties that may be eligible for extraction + SAP_RATING_THRESHOLD = 75 + # Any EPC deemed to have been conducted prior to this year is deemed to be unreliable + EPC_YEAR_THRESHOLD = pd.Timestamp.now().year - 5 + # Attributes - these are columns that we produce, calcualted based on other pieces of data ATTRIBUTE_HAS_SOLAR = "attribute_has_solar" ATTRIBUTE_NUMBER_OF_FLOORS = "attribute_est_number_floors" + ATTRIBUTE_ESTIMATED_PERIMETER = "attribute_est_perimter" + ATTRIBUTE_HEAT_LOSS_AREA = "attribute_heat_loss_area" + ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS = "attribute_epc_roof_insulation_thickness" + ATTRIBUTE_SAP_THRESHOLD_AND_BELOW = f"sap_rating_{SAP_RATING_THRESHOLD}_and_below" + ATTRIBUTE_EPC_PRE_YEAR_THRESHOLD = f"EPC is pre {EPC_YEAR_THRESHOLD}" def __init__( self, @@ -672,3 +684,56 @@ class AssetList: ), axis=1 ) + + self.standardised_asset_list[self.EPC_API_DATA_NAMES["total-floor-area"]] = ( + self.standardised_asset_list[self.EPC_API_DATA_NAMES["total-floor-area"]].astype(float) + ) + # Replace "" value with None + self.standardised_asset_list[self.EPC_API_DATA_NAMES["number-habitable-rooms"]] = ( + self.standardised_asset_list[self.EPC_API_DATA_NAMES["number-habitable-rooms"]].replace("", None) + ) + self.standardised_asset_list[self.EPC_API_DATA_NAMES["number-habitable-rooms"]] = ( + self.standardised_asset_list[self.EPC_API_DATA_NAMES["number-habitable-rooms"]].astype(float) + ) + + # Estimate the perimeter + self.standardised_asset_list[self.ATTRIBUTE_ESTIMATED_PERIMETER] = self.standardised_asset_list.apply( + lambda x: estimate_perimeter( + floor_area=x[self.EPC_API_DATA_NAMES["total-floor-area"]] / x[self.ATTRIBUTE_NUMBER_OF_FLOORS], + num_rooms=x[self.EPC_API_DATA_NAMES["number-habitable-rooms"]] / x[self.ATTRIBUTE_NUMBER_OF_FLOORS], + ), axis=1 + ) + + self.standardised_asset_list[self.ATTRIBUTE_HEAT_LOSS_AREA] = self.standardised_asset_list.apply( + lambda x: estimate_external_wall_area( + num_floors=x[self.ATTRIBUTE_NUMBER_OF_FLOORS], + floor_height=( + float(x[self.EPC_API_DATA_NAMES["floor-height"]]) if + x[self.EPC_API_DATA_NAMES["floor-height"]] else 2.5 + ), + perimeter=x[self.ATTRIBUTE_ESTIMATED_PERIMETER], + built_form=x[self.EPC_API_DATA_NAMES["built-form"]] + ), + axis=1 + ) + + self.standardised_asset_list[self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS] = self.standardised_asset_list.apply( + lambda x: RoofAttributes(description=x[self.EPC_API_DATA_NAMES["roof-description"]]).process()[ + "insulation_thickness"] if not pd.isnull( + x[self.EPC_API_DATA_NAMES["roof-description"]]) else None, + axis=1 + ) + + # We produce some additional fields + # 1) Is the SAP rating below C75 + self.standardised_asset_list[self.ATTRIBUTE_SAP_THRESHOLD_AND_BELOW] = ( + self.standardised_asset_list[self.EPC_API_DATA_NAMES["current-energy-efficiency"]] <= + self.SAP_RATING_THRESHOLD + ) + # 2) Flag anything where the EPC is older than 5 years + + self.standardised_asset_list[self.ATTRIBUTE_EPC_PRE_YEAR_THRESHOLD] = ( + pd.to_datetime( + self.standardised_asset_list[self.EPC_API_DATA_NAMES["lodgement-date"]] + ).dt.year < self.EPC_YEAR_THRESHOLD + )