diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py index 2d224daa..54f6cd96 100644 --- a/asset_list/AssetList.py +++ b/asset_list/AssetList.py @@ -296,7 +296,7 @@ class AssetList: ATTRIBUTE_HEAT_LOSS_AREA = "attribute_heat_loss_area" ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS = "attribute_epc_roof_insulation_thickness" ATTRIBUTE_SAP_THRESHOLD_AND_BELOW = f"sap_rating_{FILLED_CAVITY_SAP_THRESHOLD}_and_below" - ATTRIBUTE_EPC_PRE_YEAR_THRESHOLD = f"EPC is pre {EPC_YEAR_THRESHOLD}" + ATTRIBUTE_EPC_PRE_YEAR_THRESHOLD = f"epc_is_pre_{EPC_YEAR_THRESHOLD}" # These are the descriptions that we look for in the EPC data that are indicative of no insulation EPC_NO_WALL_INSULATION_DESCRIPTIONS = [ @@ -775,7 +775,7 @@ class AssetList: self.DOMNA_PROPERTY_ID: x[self.DOMNA_PROPERTY_ID], "epc_year_lower_bound": None, "epc_year_upper_bound": None, - "Does Age Match EPC Age Band?": "No EPC Age Band" + "does_age_band_match_epc_age_band": "No EPC Age Band" } ) continue @@ -800,7 +800,7 @@ class AssetList: self.DOMNA_PROPERTY_ID: x[self.DOMNA_PROPERTY_ID], "epc_year_lower_bound": year_lower_bound, "epc_year_upper_bound": None, - "Does Age Match EPC Age Band?": age_band_matches + "does_age_band_match_epc_age_band": age_band_matches } ) continue @@ -820,7 +820,7 @@ class AssetList: self.DOMNA_PROPERTY_ID: x[self.DOMNA_PROPERTY_ID], "epc_year_lower_bound": None, "epc_year_upper_bound": 1899, - "Does Age Match EPC Age Band?": age_band_matches + "does_age_band_match_epc_age_band": age_band_matches } ) continue @@ -842,7 +842,7 @@ class AssetList: self.DOMNA_PROPERTY_ID: x[self.DOMNA_PROPERTY_ID], "epc_year_lower_bound": int(x[self.EPC_API_DATA_NAMES["construction-age-band"]]), "epc_year_upper_bound": int(x[self.EPC_API_DATA_NAMES["construction-age-band"]]), - "Does Age Match EPC Age Band?": age_band_matches + "does_age_band_match_epc_age_band": age_band_matches } ) continue @@ -864,7 +864,7 @@ class AssetList: self.DOMNA_PROPERTY_ID: x[self.DOMNA_PROPERTY_ID], "epc_year_lower_bound": int(lower_date), "epc_year_upper_bound": int(upper_date), - "Does Age Match EPC Age Band?": age_band_matches + "does_age_band_match_epc_age_band": age_band_matches } ) @@ -892,7 +892,12 @@ class AssetList: (~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"])) & (self.standardised_asset_list['non-intrusives: Construction'] == "CAVITY") & self.standardised_asset_list['non-intrusives: Insulated'].isin(["EMPTY", "PARTIAL"]) & - (self.standardised_asset_list[self.STANDARD_YEAR_BUILT] <= 2000) + (self.standardised_asset_list[self.STANDARD_YEAR_BUILT] <= 2000) & + ( + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["current-energy-efficiency"] + ] <= self.EMPTY_CAVITY_SAP_THRESHOLD + ) ) self.standardised_asset_list["epc_indicates_empty_cavity"] = ( @@ -1206,6 +1211,11 @@ class AssetList: self.standardised_asset_list["solar_epc_floor_is_other_insulated"] ) + # Drop anything we don't need + self.standardised_asset_list = self.standardised_asset_list.drop( + columns=["walls_u_value", "roof_u_value", "floor_u_value"] + ) + # Produce some aggregate figures self.work_type_figures = { # Empty cavity from non-intrusives @@ -1219,7 +1229,11 @@ class AssetList: ).sum() ), "Cavity Extraction": ( - self.standardised_asset_list["non_intrusive_indicates_cavity_extraction"].sum() + ( + ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] & + ~self.standardised_asset_list["epc_indicates_empty_cavity"] & + self.standardised_asset_list["non_intrusive_indicates_cavity_extraction"] + ).sum() ), "Solar PV (Solid Floor)": ( self.standardised_asset_list["solar_eligible_solid_floor"].sum() @@ -1234,3 +1248,51 @@ class AssetList: self.standardised_asset_list["solar_eligible_other_floor_needs_loft"].sum() ) } + + # Finally, we note why each property has been flagged + self.standardised_asset_list["cavity_reason"] = None + self.standardised_asset_list["cavity_reason"] = np.where( + self.standardised_asset_list["non_intrusive_indicates_empty_cavity"], + "Non-Intrusive Data Showed Empty Cavity", + self.standardised_asset_list["cavity_reason"] + ) + self.standardised_asset_list["cavity_reason"] = np.where( + ( + self.standardised_asset_list["epc_indicates_empty_cavity"] & + ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] + ), + "EPC Data Showed Empty Cavity", + self.standardised_asset_list["cavity_reason"] + ) + # Flag extraction + self.standardised_asset_list["cavity_reason"] = np.where( + ( + self.standardised_asset_list["non_intrusive_indicates_cavity_extraction"] & + pd.isnull(self.standardised_asset_list["cavity_reason"]) + ), + "Non-Intrusive Data Showed Cavity Extraction", + self.standardised_asset_list["cavity_reason"] + ) + + # Flag solar + self.standardised_asset_list["solar_reason"] = None + self.standardised_asset_list["solar_reason"] = np.where( + self.standardised_asset_list["solar_eligible_solid_floor"], + "Solid Floor, Insulated, No Solar", + self.standardised_asset_list["solar_reason"] + ) + self.standardised_asset_list["solar_reason"] = np.where( + self.standardised_asset_list["solar_eligible_solid_floor_needs_loft"], + "Solid Floor, Insulated, Needs Loft", + self.standardised_asset_list["solar_reason"] + ) + self.standardised_asset_list["solar_reason"] = np.where( + self.standardised_asset_list["solar_eligible_other_floor"], + "Other Floor, Insulated, No Solar", + self.standardised_asset_list["solar_reason"] + ) + self.standardised_asset_list["solar_reason"] = np.where( + self.standardised_asset_list["solar_eligible_other_floor_needs_loft"], + "Other Floor, Insulated, Needs Loft", + self.standardised_asset_list["solar_reason"] + ) diff --git a/asset_list/app.py b/asset_list/app.py index 3c1ab627..65d4ab87 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -376,6 +376,7 @@ def app(): epc_data.append(csv_data) epc_df = pd.concat(epc_data) + epc_df["estimated"] = epc_df["estimated"].fillna(False) # We expand out the recommendations recommendations_df = epc_df[[asset_list.DOMNA_PROPERTY_ID, "recommendations"]] @@ -454,36 +455,40 @@ def app(): asset_list.identify_worktypes(cleaned) + from pprint import pprint + pprint(asset_list.work_type_figures) + # TODO: We should do this breakdown for flats def flat_analysis(asset_list): # We need to deduce the building name - we strip out the house number - def extract_building_name(x): - # TODO: This doesn't really work - if pd.isnull(x): - return None - house_no = SearchEpc.get_house_number(address=x, postcode=None) - if house_no: - return x.replace(house_no, "").strip() - return x.split(",")[0].strip() # We want to deduce if flats have 50% of the properties below C75 # We group by postcode and property type - grouped = asset_list.groupby([POSTCODE_COLUMN, "Property Type"]) + grouped = asset_list.standardised_asset_list.groupby( + [asset_list.STANDARD_POSTCODE, asset_list.STANDARD_PROPERTY_TYPE] + ) flat_data = [] for _, group in grouped: - if "flat" in group["Property Type"].str.lower().values: - num_flats = group["Property Type"].str.lower().value_counts().get("flat", 0) - num_below_c75 = group["SAP score on register"].lt(75).sum() + if "flat" in group[asset_list.STANDARD_PROPERTY_TYPE].values: + num_flats = group[asset_list.STANDARD_PROPERTY_TYPE].shape[0] + num_below_c75 = group[ + asset_list.EPC_API_DATA_NAMES["current-energy-efficiency"] + ].lt(asset_list.FILLED_CAVITY_SAP_THRESHOLD).sum() + # Check if any flats are below C69 + num_flats_below_c69 = group[ + asset_list.EPC_API_DATA_NAMES["current-energy-efficiency"] + ].lt(69).sum() flat_data.append( { - "Postcode": group[POSTCODE_COLUMN].iloc[0], + "Postcode": group[asset_list.STANDARD_POSTCODE].iloc[0], "Property Type": "Flat", "Number of Flats with EPC": num_flats, "Number of Flats below C75": num_below_c75, - "Proportion of Flat EPCs below C75": round(100 * num_below_c75 / num_flats) + "Proportion of Flat EPCs below C75": round(100 * num_below_c75 / num_flats), + "num_flats_below_c69": num_flats_below_c69, } ) @@ -494,11 +499,11 @@ def app(): flat_data = flat_analysis(asset_list) # Store as an excel - filename = os.path.join(DATA_FOLDER, ".".join(DATA_FILENAME.split(".")[:-1])) + " EPC Data Pull.xlsx" + filename = os.path.join(DATA_FOLDER, ".".join(DATA_FILENAME.split(".")[:-1])) + " - Standardised.xlsx" # Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data with pd.ExcelWriter(filename) as writer: - asset_list.to_excel(writer, sheet_name="EPC Data", index=False) + asset_list.standardised_asset_list.to_excel(writer, sheet_name="Standardised Asset List", index=False) flat_data.to_excel(writer, sheet_name="Flat Data", index=False) matches_review = asset_list[