diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 767e13c8..9cadaf9f 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -92,6 +92,27 @@ PROPERTY_TYPE_LOOKUP = { 'Flat Over Shop': {"property-type": "Flat", "built-form": "Mid-Terrace"}, 'Mid Terraced Town House': {"property-type": "House", "built-form": "Mid-Terrace"}, }, + "HA25": { + 'Flat': 'Flat', + 'Mid Terrace House': 'House', + 'Semi Detached House': 'House', + 'End Terrace House': 'House', + 'House': 'House', + 'Semi Detached Bung': 'Bungalow', + 'Bungalow': 'Bungalow', + 'End Terrace Bungalow': 'Bungalow', + 'Maisonnette': 'Maisonette', + 'Mid Terrace Bungalow': 'Bungalow', + 'Bedspace': None, + 'Detached House': 'House', + 'Bedsit': 'Flat', + 'Coach House': 'House', + 'Detached Bungalow': 'Bungalow', + 'Office Buildings': None, + 'Guest Room': None, + 'Mid Terrace Housekeeping ': 'House', + 'End Terrace Housex': 'House' + }, "HA39": { "Semi house": {"property_type": "House", "built_form": "Semi-Detached"}, "1st floor flat": {"property_type": "Flat", "built_form": None}, @@ -2877,6 +2898,9 @@ def get_property_type_and_built_form(property_meta, ha_name): property_meta["Asset Type Description"] ] + built_form = None + elif ha_name == "HA25": + property_type = PROPERTY_TYPE_LOOKUP[ha_name][property_meta["T1_AssetType"]] built_form = None elif ha_name == "HA16": config = PROPERTY_TYPE_LOOKUP[ha_name][property_meta["Type"]] @@ -5092,7 +5116,8 @@ def forecast_remaining_sales(loader): def fml_data_pull(loader): - has_bruh = ["HA7"] + has_bruh = ["HA7", "HA14", "HA25", "HA39", "HA16"] + # DO from backend.SearchEpc import SearchEpc epc_api_key = "a2Nvbm5rb3dsZXNzYXJAZ21haWwuY29tOjY5MGJiMWM0NmIyOGI5ZDUxYzAxMzQzYzNiZGNlZGJjZDNmODQwMzA=" @@ -5104,7 +5129,7 @@ def fml_data_pull(loader): # For each property, search for the latest EPC epc_data = [] for _, row in tqdm(fml.iterrows(), total=fml.shape[0]): - property_type, built_form = get_property_type_and_built_form(property_meta=row, ha_name=ha) + property_type, _ = get_property_type_and_built_form(property_meta=row, ha_name=ha) searcher = SearchEpc( address1=row["HouseNo"], postcode=row["matching_postcode"], @@ -5113,8 +5138,9 @@ def fml_data_pull(loader): property_type=property_type, full_address=row["matching_address"], ) - searcher.ordnance_survey_client.property_type = property_type - searcher.ordnance_survey_client.built_form = built_form + # Force the skipping of estimating the EPC + searcher.ordnance_survey_client.property_type = None + searcher.ordnance_survey_client.built_form = None searcher.find_property(skip_os=True) if searcher.newest_epc is None: @@ -5147,11 +5173,32 @@ def extract_lower_bound(age_band): return 1930 +def classify_loft(x): + # high confidence + if float(x["roof_insulation_thickness"]) <= 100: + return "high" + + if float(x["roof_insulation_thickness"]) <= 200: + return "medium" + + if float(x["roof_insulation_thickness"]) <= 270 and x["epc_age"] >= 5 * 365: + return "medium" + + return "unlikely" + + def fml_analysis(loader): from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes from etl.epc.DataProcessor import EPCDataProcessor + from datetime import datetime assumed_ciga_pass_rate = 0.731 - has_bruh = ["HA7"] + has_bruh = ["HA7", "HA14", "HA25", "HA39", "HA16"] + + no_ciga_cavity_descriptions = [ + "Cavity wall, as built, insulated (assumed)", + "Cavity wall, as built, no insulation (assumed)", + "Cavity wall, as built, partial insulation (assumed)" + ] results = [] for ha_name in has_bruh: @@ -5170,6 +5217,11 @@ def fml_analysis(loader): s3_file_name=f"ha-analysis/revised/{ha_name}/epc_data.pickle" ) + # time from the inspection to now + epc_data["epc_age"] = (datetime.now() - pd.to_datetime(epc_data["inspection-date"])).dt.days + if "estimated" not in epc_data.columns: + epc_data["estimated"] = None + fuck_this = fml.merge( epc_data, how="left", on="asset_list_row_id" ) @@ -5178,12 +5230,27 @@ def fml_analysis(loader): # Take just remaining if not loader.data[ha_name]["survey_list"].empty: - raise NotImplementedError("TAKE JUST REMAINING IDIOT") + survey_list = ( + loader.data[ha_name]["survey_list"][ + ~pd.isnull(loader.data[ha_name]["survey_list"]["asset_list_row_id"]) + ] + ) + fuck_this = fuck_this.merge( + survey_list[["asset_list_row_id", "installation_status"]], + how="left", + on="asset_list_row_id" + ) + # Anything that has an installation has gone to installation, and therefore is not remaining + fuck_this = fuck_this[pd.isnull(fuck_this["installation_status"])] + fuck_this = fuck_this.drop(columns=["installation_status"]) insulation_thicknesses = [] for _, x in fuck_this.iterrows(): if pd.isnull(x["roof-description"]): continue + if x["roof-description"] == "SAP05:Roof": + continue + thickness = RoofAttributes(x["roof-description"]).process()["insulation_thickness"] # If there is a + in the thickness, strip it out thickness = str(thickness).replace("+", "") @@ -5208,11 +5275,13 @@ def fml_analysis(loader): "roof_insulation_thickness" ].str.replace("average", "150") - fuck_this["construction-age-band"] = fuck_this["construction-age-band"].apply( - lambda x: EPCDataProcessor.clean_construction_age_band(x) - ) + fuck_this["roof_classiciation"] = fuck_this.apply(lambda x: classify_loft(x), axis=1) - fuck_this['age_lower_bound'] = fuck_this['construction-age-band'].apply(extract_lower_bound) + # fuck_this["construction-age-band"] = fuck_this["construction-age-band"].apply( + # lambda x: EPCDataProcessor.clean_construction_age_band(x) + # ) + # + # fuck_this['age_lower_bound'] = fuck_this['construction-age-band'].apply(extract_lower_bound) had_survey = fuck_this[pd.isnull(fuck_this["estimated"])] @@ -5225,9 +5294,23 @@ def fml_analysis(loader): had_survey["ECO Eligibility"] == "eco4" ] + # Walls: + # Cavity wall, as built, insulated (assumed) + # Cavity wall, as built, no insulation (assumed) + # Cavity wall, as built, partial insulation (assumed) + + # Roof: + # Less than 100mm = high confidence + # Less than 270mm & EPC at least 5 years old = medium confidence + # Otherwise, low confidence + + # SAP criteria is EPC C or below + + # Pre is 54 or below + no_ciga_check_needed_with_archetype = no_ciga_check_needed[ - (no_ciga_check_needed["walls-description"].str.lower().str.contains("cavity") == True) & - (no_ciga_check_needed["roof-description"].str.lower().str.contains("pitched") == True) & + (no_ciga_check_needed["walls-description"].isin(no_ciga_cavity_descriptions)) & + (no_ciga_check_needed["roof_classiciation"].isin(["high", "medium"])) & (no_ciga_check_needed["current-energy-efficiency"].astype(float) <= 80) ] if not no_ciga_check_needed_with_archetype.empty: @@ -5239,7 +5322,14 @@ def fml_analysis(loader): ciga_check_needed = had_survey[ had_survey["ECO Eligibility"].str.contains("subject to ciga") - ] + ].copy() + + ciga_check_passed = had_survey[ + had_survey["ECO Eligibility"] == "eco4 - passed ciga" + ] + + if not ciga_check_passed.empty: + raise Exception("SORT ME BRUV") # We take just the cavity walls # UCL paper: https://discovery.ucl.ac.uk/id/eprint/10110371/ @@ -5248,17 +5338,12 @@ def fml_analysis(loader): # differ between variables; floor and wall type errors occur in ~10-15% of EPCs, # compared with ~5% for wall insulation and glazing performance - ciga_check_needed_with_archetype = ciga_check_needed[ + ciga_check_needed_plausible = ciga_check_needed[ (ciga_check_needed["walls-description"].str.lower().str.contains("cavity") == True) & - (ciga_check_needed["roof-description"].str.lower().str.contains("pitched") == True) & + (ciga_check_needed["roof_classiciation"].isin(["high", "medium"])) & (ciga_check_needed["current-energy-efficiency"].astype(float) <= 80) ] - # We take properties that could feasibly be within install regions - ciga_check_needed_plausible = ciga_check_needed_with_archetype[ - ciga_check_needed_with_archetype["roof_insulation_thickness"].astype(float) < 270 - ] - if not loader.data[ha_name]["ciga_list"].empty: raise NotImplementedError("SORT OUT THE CIGA BRUV") else: