From 29599c515434260fd81aabaff7349bc046cf9897 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 22 Dec 2023 15:45:51 +0000 Subject: [PATCH 1/6] ha4 wip --- backend/SearchEpc.py | 24 ++++- etl/eligibility/ha_15_32/app.py | 6 +- etl/eligibility/ha_15_32/ha4_app.py | 158 ++++++++++++++++++++++++++++ 3 files changed, 183 insertions(+), 5 deletions(-) create mode 100644 etl/eligibility/ha_15_32/ha4_app.py diff --git a/backend/SearchEpc.py b/backend/SearchEpc.py index 16c2a8c8..d8ea6b78 100644 --- a/backend/SearchEpc.py +++ b/backend/SearchEpc.py @@ -41,7 +41,9 @@ class SearchEpc: address2: str = None, address3: str = None, address4: str = None, - max_retries: int = None + max_retries: int = None, + uprn: [int, None] = None, + size=None, ): """ Address lines 1 and postcode are mandatory fields. The other address lines are optional @@ -51,6 +53,10 @@ class SearchEpc: :param address2: string, optional, propery's address line 2 :param address3: string, optional, propery's address line 3 :param address4: string, optional, propery's address line 4 + :param max_retries: int, optional, number of retries to make when searching the api + :param uprn: int, optional, the uprn of the property + :param size: int, optional, the number of results to return. If not provided, defaults to 25 which is the api's + default """ self.address1 = address1 @@ -58,6 +64,7 @@ class SearchEpc: self.address2 = address2 self.address3 = address3 self.address4 = address4 + self.uprn = uprn self.max_retries = max_retries if max_retries is not None else self.MAX_RETRIES @@ -65,14 +72,23 @@ class SearchEpc: self.data = None + self.size = size if size is not None else 25 + def search(self): # Get the EPC data with retries for retry in range(self.max_retries): try: - response = self.client.domestic.search( - params={"address": self.address1, "postcode": self.postcode} - ) + + if self.uprn: + # We use the direct call method inside, since we need to implement uprn as a valid + # parameter for the search function + url = os.path.join(self.client.domestic.host, "search") + response = self.client.domestic.call(method="get", url=url, params={"uprn": self.uprn}) + else: + response = self.client.domestic.search( + params={"address": self.address1, "postcode": self.postcode}, size=self.size + ) if response: self.data = response diff --git a/etl/eligibility/ha_15_32/app.py b/etl/eligibility/ha_15_32/app.py index 3c7ae901..ccceb05f 100644 --- a/etl/eligibility/ha_15_32/app.py +++ b/etl/eligibility/ha_15_32/app.py @@ -336,7 +336,9 @@ def merge_ha_15(asset_list, identified_addresses): return merged_data, dropped_identified_merge_keys -def prepare_model_data_row(property_id, modelling_epc, cleaned, cleaning_data, created_at): +def prepare_model_data_row( + property_id, modelling_epc, cleaned, cleaning_data, created_at, old_data=None, full_sap_epc=None +): """ This function prepares the data for modelling, in the same fashion as the recommendation engine With up-coming refactoring, this will change @@ -350,6 +352,8 @@ def prepare_model_data_row(property_id, modelling_epc, cleaned, cleaning_data, c epc_client=None, data=modelling_epc ) + p.old_data = old_data + p.full_sap_epc = full_sap_epc p.get_components(cleaned) # This is temp - this should happen after scoring diff --git a/etl/eligibility/ha_15_32/ha4_app.py b/etl/eligibility/ha_15_32/ha4_app.py new file mode 100644 index 00000000..4e87b5a6 --- /dev/null +++ b/etl/eligibility/ha_15_32/ha4_app.py @@ -0,0 +1,158 @@ +import msgpack +from pathlib import Path +from datetime import datetime +import numpy as np +import pandas as pd +from utils.s3 import read_from_s3 +from utils.logger import setup_logger +from dotenv import load_dotenv +from backend.app.utils import read_parquet_from_s3 +from tqdm import tqdm +from backend.SearchEpc import SearchEpc +from etl.eligibility.Eligibility import Eligibility +from etl.eligibility.ha_15_32.app import prepare_model_data_row +from etl.epc.DataProcessor import DataProcessor +from etl.epc.settings import COLUMNS_TO_MERGE_ON +from backend.ml_models.api import ModelApi + +import re + +ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env" + +logger = setup_logger() +load_dotenv(ENV_FILE) + + +def load_ha_4(): + pd.set_option('display.max_rows', 500) + pd.set_option('display.max_columns', 500) + pd.set_option('display.width', 1000) + + data = pd.read_csv(f"etl/eligibility/ha_15_32/HA 4 Asset List.csv", low_memory=False) + return data + + +def standardise_ha_4(data): + # Location name contains some strings like {0664} which we remove + data['Location Name'] = data['Location Name'].str.replace('\{.*?\}', '', regex=True) + + # Trim whitespace from either end of location name + data["Location Name"] = data["Location Name"].str.strip() + + # Remove any unusable postcodes + data = data[data["Post Code"] != '\\\\'] + + # Some specific replacements + data["Location Name"] = np.where( + data["Location Name"] == "Calderbrook Pl & Cog La", + "Calderbrook Place", + data["Location Name"] + ) + + return data + + +def get_ha_4_data(data, cleaned, cleaning_data, created_at): + scoring_data = [] + results = [] + nodata = [] + for _, property_meta in tqdm(data.iterrows(), total=len(data)): + # For many of the entries in this dataset, we're actually given an entire building, so we EPCs for every + # building + searcher = SearchEpc( + address1=property_meta["Address Line 1"], + postcode=property_meta["Post Code"], + size=1000 + ) + searcher.search() + + if searcher.data is None: + searcher = SearchEpc( + address1=property_meta["Location Name"], + postcode=property_meta["Post Code"], + size=1000 + ) + searcher.search() + + if searcher.data is None: + vlsh + + epcs = searcher.data["rows"] + epcs = pd.DataFrame(epcs) + + # Take the newest EPC by UPRN + epcs = epcs.sort_values(by=["lodgement-date"], ascending=False) + newest_epcs = epcs.drop_duplicates(subset=["uprn"], keep="first") + + # For each EPC, we now check eligibility + for _, epc in newest_epcs.iterrows(): + eligibility = Eligibility(epc=epc.to_dict(), cleaned=cleaned) + eligibility.check_gbis_warmfront() + eligibility.check_eco4_warmfront() + + # If the house is not identified, we do a full gbis and eco4 check + eligibility.check_gbis() + eligibility.check_eco4() + + if eligibility.eco4_warmfront["eligible"]: + # We get old_eps + old_data = epcs[ + (epcs["uprn"] == epc["uprn"]) & + (epcs["lmk-key"] != epc["lmk-key"]) + ].to_dict("records") + + full_sap_epc = epcs[ + (epcs["uprn"] == epc["uprn"]) & + (epcs["transaction-type"] == "new dwelling") + ].to_dict("records") + + scoring_dictionary = prepare_model_data_row( + property_id=property_meta["row_id"], + modelling_epc=eligibility.epc, + cleaned=cleaned, + cleaning_data=cleaning_data, + created_at=created_at, + old_data=old_data, + full_sap_epc=full_sap_epc + ) + scoring_data.extend(scoring_dictionary) + + results.append( + { + "row_id": property_meta["row_id"], + "gbis_eligible": eligibility.gbis_warmfront, + "eco4_eligible": eligibility.eco4_warmfront["eligible"], + "eco4_message": eligibility.eco4_warmfront["message"], + "sap": float(eligibility.epc["current-energy-efficiency"]), + "gbis_eligible_future": eligibility.gbis["eligible"], + "gbis_eligible_future_message": eligibility.gbis["message"], + "eco4_eligible_future": eligibility.eco4["eligible"], + "eco4_eligible_future_message": eligibility.eco4["message"], + # Property components + "roof": eligibility.roof["clean_description"], + "walls": eligibility.walls["clean_description"], + "heating": eligibility.epc["mainheat-description"], + "tenure": eligibility.tenure, + "date_epc": eligibility.epc["lodgement-date"], + } + ) + + +def app(): + data = load_ha_4() + + data = standardise_ha_4(data) + + data["row_id"] = ["h4" + str(i) for i in range(0, len(data))] + + cleaned = read_from_s3( + s3_file_name="cleaned_epc_data/cleaned.bson", + bucket_name="retrofit-data-dev" + ) + cleaned = msgpack.unpackb(cleaned, raw=False) + + cleaning_data = read_parquet_from_s3( + bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet", + ) + + created_at = datetime.now().isoformat() From 612922df6a8a1095dae0c6440f288f2c7572d55e Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 22 Dec 2023 16:17:53 +0000 Subject: [PATCH 2/6] handling case of missing built form --- etl/eligibility/ha_15_32/ha4_app.py | 45 +++++++++++++------------ etl/epc/DataProcessor.py | 4 +++ recommendations/recommendation_utils.py | 2 +- 3 files changed, 29 insertions(+), 22 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha4_app.py b/etl/eligibility/ha_15_32/ha4_app.py index 4e87b5a6..1d924347 100644 --- a/etl/eligibility/ha_15_32/ha4_app.py +++ b/etl/eligibility/ha_15_32/ha4_app.py @@ -40,7 +40,7 @@ def standardise_ha_4(data): data["Location Name"] = data["Location Name"].str.strip() # Remove any unusable postcodes - data = data[data["Post Code"] != '\\\\'] + data = data[data["Post Code"] != '\\\\'].copy() # Some specific replacements data["Location Name"] = np.where( @@ -75,7 +75,8 @@ def get_ha_4_data(data, cleaned, cleaning_data, created_at): searcher.search() if searcher.data is None: - vlsh + nodata.append(property_meta.to_dict()) + continue epcs = searcher.data["rows"] epcs = pd.DataFrame(epcs) @@ -117,25 +118,27 @@ def get_ha_4_data(data, cleaned, cleaning_data, created_at): ) scoring_data.extend(scoring_dictionary) - results.append( - { - "row_id": property_meta["row_id"], - "gbis_eligible": eligibility.gbis_warmfront, - "eco4_eligible": eligibility.eco4_warmfront["eligible"], - "eco4_message": eligibility.eco4_warmfront["message"], - "sap": float(eligibility.epc["current-energy-efficiency"]), - "gbis_eligible_future": eligibility.gbis["eligible"], - "gbis_eligible_future_message": eligibility.gbis["message"], - "eco4_eligible_future": eligibility.eco4["eligible"], - "eco4_eligible_future_message": eligibility.eco4["message"], - # Property components - "roof": eligibility.roof["clean_description"], - "walls": eligibility.walls["clean_description"], - "heating": eligibility.epc["mainheat-description"], - "tenure": eligibility.tenure, - "date_epc": eligibility.epc["lodgement-date"], - } - ) + results.append( + { + "row_id": property_meta["row_id"], + "Location Name": property_meta["Location Name"], + "Post Code": property_meta["Post Code"], + "gbis_eligible": eligibility.gbis_warmfront, + "eco4_eligible": eligibility.eco4_warmfront["eligible"], + "eco4_message": eligibility.eco4_warmfront["message"], + "sap": float(eligibility.epc["current-energy-efficiency"]), + "gbis_eligible_future": eligibility.gbis["eligible"], + "gbis_eligible_future_message": eligibility.gbis["message"], + "eco4_eligible_future": eligibility.eco4["eligible"], + "eco4_eligible_future_message": eligibility.eco4["message"], + # Property components + "roof": eligibility.roof["clean_description"], + "walls": eligibility.walls["clean_description"], + "heating": eligibility.epc["mainheat-description"], + "tenure": eligibility.tenure, + "date_epc": eligibility.epc["lodgement-date"], + } + ) def app(): diff --git a/etl/epc/DataProcessor.py b/etl/epc/DataProcessor.py index 0587fdbe..2bc73765 100644 --- a/etl/epc/DataProcessor.py +++ b/etl/epc/DataProcessor.py @@ -492,12 +492,16 @@ class DataProcessor: how='left' ) + global_averages = cleaning_data[cols_to_clean].mean() + # Fill NaN values with averages for col in cols_to_clean: data_to_clean[col].fillna(data_to_clean[f"{col}_AVERAGE"], inplace=True) data_to_clean.drop(columns=[f"{col}_AVERAGE"], inplace=True) # If we still have missings data_to_clean[col].fillna(data_to_clean[col].mean(), inplace=True) + # Final step if we still have missings - use global mean + data_to_clean[col].fillna(global_averages[col], inplace=True) return data_to_clean diff --git a/recommendations/recommendation_utils.py b/recommendations/recommendation_utils.py index 7cfe023e..175eb641 100644 --- a/recommendations/recommendation_utils.py +++ b/recommendations/recommendation_utils.py @@ -548,7 +548,7 @@ def estimate_external_wall_area(num_floors, floor_height, perimeter, built_form) 'Detached': 4, } - exposed_wall_area = total_wall_area * (number_exposed_walls[built_form] / 4) + exposed_wall_area = total_wall_area * (number_exposed_walls.get(built_form, 3) / 4) return exposed_wall_area From 9c140dc0553c0357a4d5a5e24fe03cb22fb14096 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 22 Dec 2023 17:14:15 +0000 Subject: [PATCH 3/6] updated wall description to filled cavity --- etl/eligibility/ha_15_32/ha4_app.py | 147 +++++++++++++++++- .../epc_attributes/WallAttributes.py | 3 + .../test_data/test_wall_attributes_cases.py | 4 +- 3 files changed, 150 insertions(+), 4 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha4_app.py b/etl/eligibility/ha_15_32/ha4_app.py index 1d924347..cc64dfad 100644 --- a/etl/eligibility/ha_15_32/ha4_app.py +++ b/etl/eligibility/ha_15_32/ha4_app.py @@ -64,6 +64,7 @@ def get_ha_4_data(data, cleaned, cleaning_data, created_at): postcode=property_meta["Post Code"], size=1000 ) + searcher.search() if searcher.data is None: @@ -108,7 +109,7 @@ def get_ha_4_data(data, cleaned, cleaning_data, created_at): ].to_dict("records") scoring_dictionary = prepare_model_data_row( - property_id=property_meta["row_id"], + property_id=eligibility.epc["uprn"], modelling_epc=eligibility.epc, cleaned=cleaned, cleaning_data=cleaning_data, @@ -120,7 +121,7 @@ def get_ha_4_data(data, cleaned, cleaning_data, created_at): results.append( { - "row_id": property_meta["row_id"], + "uprn": epc["uprn"], "Location Name": property_meta["Location Name"], "Post Code": property_meta["Post Code"], "gbis_eligible": eligibility.gbis_warmfront, @@ -140,6 +141,131 @@ def get_ha_4_data(data, cleaned, cleaning_data, created_at): } ) + scoring_df = pd.DataFrame(scoring_data) + + # Perform the same cleaning as in the model - first clean number of room variables though + scoring_df = DataProcessor.apply_averages_cleaning( + data_to_clean=scoring_df, + cleaning_data=cleaning_data, + cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'], + colnames=["NUMBER_HABITABLE_ROOMS", "NUMBER_HEATED_ROOMS"], + ) + + scoring_df = DataProcessor.apply_averages_cleaning( + data_to_clean=scoring_df, + cleaning_data=cleaning_data, + cols_to_merge_on=COLUMNS_TO_MERGE_ON + ["LOCAL_AUTHORITY"], + ).drop(columns=["LOCAL_AUTHORITY"]) + + scoring_df = DataProcessor.clean_missings_after_description_process( + scoring_df, + ignore_cols=[c for c in scoring_df.columns if ("thermal_transmittance" in c) or ( + "insulation_thickness" in c) or ("ENERGY_EFF" in c)] + ) + + scoring_df = DataProcessor.clean_efficiency_variables(scoring_df) + + model_api = ModelApi(portfolio_id="ha33-eligibility", timestamp=created_at) + all_predictions = model_api.predict_all( + df=scoring_df, + bucket="retrofit-data-dev", + prediction_buckets={ + "sap_change_predictions": "retrofit-sap-predictions-dev", + "heat_demand_predictions": "retrofit-heat-predictions-dev", + "carbon_change_predictions": "retrofit-carbon-predictions-dev" + } + ) + + predictions = all_predictions["sap_change_predictions"].copy() + + results_df = pd.DataFrame(results) + + predictions = predictions.rename(columns={"property_id": "uprn"}).merge( + results_df[["uprn", "sap"]], how="left", on="uprn" + ) + predictions["sap_uplift"] = predictions["predictions"] - predictions["sap"] + predictions = predictions.groupby("uprn")["sap_uplift"].sum().reset_index() + + results_df = results_df.merge( + predictions[["sap_uplift", "uprn"]], + how="left", + on="uprn" + ) + results_df["post_install_sap"] = results_df["sap"] + results_df["sap_uplift"] + + results_df = results_df[~pd.isnull(results_df["uprn"])] + + eligibility_assessment = [] + for _, row in results_df[results_df["eco4_eligible"] == True].iterrows(): + # The upgrade requirements are dependent on the current SAP + + # If the property is an F or G, it only needs to upgrade to an % + if row["sap"] <= 38: + if row["post_install_sap"] >= 57: + eligibility_classification = "highest confidence" + elif row["post_install_sap"] >= 55: + eligibility_classification = "high confidence" + elif row["post_install_sap"] >= 53: + eligibility_classification = "medium confidence" + else: + eligibility_classification = "unlikely" + else: + + if row["post_install_sap"] >= 71: + eligibility_classification = "highest confidence" + elif row["post_install_sap"] >= 69: + eligibility_classification = "high confidence" + elif row["post_install_sap"] >= 67: + eligibility_classification = "medium confidence" + else: + eligibility_classification = "unlikely" + + eligibility_assessment.append( + { + "uprn": row["uprn"], + "eligibility_classification": eligibility_classification + } + ) + + eligibility_assessment = pd.DataFrame(eligibility_assessment) + + results_df = results_df.merge( + eligibility_assessment, how="left", on="uprn" + ) + # We have some properties that are duplicated so we take just one instance + results_df = results_df.drop_duplicates(subset=["uprn"]) + + return results_df, scoring_data, nodata + + +def analyse_ha_4(results_df, data): + results_df_social = results_df[results_df["tenure"] == "Rented (social)"] + + results_df_social["tenure"].value_counts() + + n_identified = (results_df_social["gbis_eligible"] | results_df_social["eco4_eligible"]).sum() + n_eco4 = results_df_social["eco4_eligible"].sum() + n_gbis = results_df_social[~results_df_social["eco4_eligible"]]["gbis_eligible"].sum() + + eco_eligibile = results_df_social[results_df_social["eco4_eligible"]] + eco_eligibile["walls"].value_counts() + eco_eligibile["roof"].value_counts() + + eco_eligibile[eco_eligibile["walls"] == "Cavity wall, as built, insulated"] + + results_df_social[results_df_social["gbis_eligible"] | results_df_social["eco4_eligible"]]["tenure"].value_counts() + + results_df_social["eligibility_classification"].value_counts() + + future_possibilities_eco = results_df[ + (results_df["eco4_eligible_future"] == True) & (~(results_df["gbis_eligible"] | results_df["eco4_eligible"])) + ].copy() + + future_possibilities_gbis = results_df[ + (results_df["gbis_eligible_future"] == True) & (results_df["eco4_eligible_future"] == False) & ( + ~(results_df["gbis_eligible"] | results_df["eco4_eligible"])) + ].copy() + def app(): data = load_ha_4() @@ -159,3 +285,20 @@ def app(): ) created_at = datetime.now().isoformat() + + results_df, scoring_data, nodata = get_ha_4_data( + data=data, + cleaned=cleaned, + cleaning_data=cleaning_data, + created_at=created_at + ) + + # Store the data locally as a pickle + # import pickle + # with open("ha_4.pickle", "wb") as f: + # pickle.dump( + # { + # "results_df": results_df, + # "scoring_data": scoring_data, + # "nodata": nodata + # }, f) diff --git a/etl/epc_clean/epc_attributes/WallAttributes.py b/etl/epc_clean/epc_attributes/WallAttributes.py index bfe600d5..09eac215 100644 --- a/etl/epc_clean/epc_attributes/WallAttributes.py +++ b/etl/epc_clean/epc_attributes/WallAttributes.py @@ -152,4 +152,7 @@ class WallAttributes(Definitions): else: result["insulation_thickness"] = "average" + if result["is_cavity_wall"] & result["is_as_built"] & (result["insulation_thickness"] == "average"): + result["is_filled_cavity"] = True + return result diff --git a/etl/epc_clean/tests/test_data/test_wall_attributes_cases.py b/etl/epc_clean/tests/test_data/test_wall_attributes_cases.py index 300702a7..96c545c1 100644 --- a/etl/epc_clean/tests/test_data/test_wall_attributes_cases.py +++ b/etl/epc_clean/tests/test_data/test_wall_attributes_cases.py @@ -550,7 +550,7 @@ wall_cases = [ 'is_as_built': False, 'is_cob': False, 'is_assumed': False, 'is_sandstone_or_limestone': False, 'insulation_thickness': None, 'external_insulation': False, 'internal_insulation': False}, {'original_description': 'Cavity wall, as built, insulated (assumed)', 'thermal_transmittance': None, - 'thermal_transmittance_unit': None, 'is_cavity_wall': True, 'is_filled_cavity': False, 'is_solid_brick': False, + 'thermal_transmittance_unit': None, 'is_cavity_wall': True, 'is_filled_cavity': True, 'is_solid_brick': False, 'is_system_built': False, 'is_timber_frame': False, 'is_granite_or_whinstone': False, 'is_as_built': True, 'is_cob': False, 'is_assumed': True, 'is_sandstone_or_limestone': False, 'insulation_thickness': 'average', 'external_insulation': False, 'internal_insulation': False}, @@ -727,7 +727,7 @@ wall_cases = [ 'external_insulation': False, 'internal_insulation': False}, {'original_description': 'Waliau ceudod, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio (rhagdybiaeth)', 'thermal_transmittance': None, - 'thermal_transmittance_unit': None, 'is_cavity_wall': True, 'is_filled_cavity': False, 'is_solid_brick': False, + 'thermal_transmittance_unit': None, 'is_cavity_wall': True, 'is_filled_cavity': True, 'is_solid_brick': False, 'is_system_built': False, 'is_timber_frame': False, 'is_granite_or_whinstone': False, 'is_as_built': True, 'is_cob': False, 'is_assumed': True, 'is_sandstone_or_limestone': False, 'insulation_thickness': 'average', 'external_insulation': False, 'internal_insulation': False}, From 3bad76fa0e598d1c8be4fa6f10d4ea90c3558f4f Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 22 Dec 2023 18:04:18 +0000 Subject: [PATCH 4/6] handle new heating case --- etl/eligibility/Eligibility.py | 12 ++++++++++++ etl/eligibility/ha_15_32/ha4_app.py | 2 ++ etl/epc_clean/epc_attributes/MainheatAttributes.py | 1 + .../test_data/test_mainheat_attributes_cases.py | 13 +++++++++++++ 4 files changed, 28 insertions(+) diff --git a/etl/eligibility/Eligibility.py b/etl/eligibility/Eligibility.py index f25d06bd..c4dc9de0 100644 --- a/etl/eligibility/Eligibility.py +++ b/etl/eligibility/Eligibility.py @@ -152,9 +152,14 @@ class Eligibility: is_partial_filled = ( self.walls["is_as_built"] and self.walls["insulation_thickness"] not in ["below average"] ) + # We look for potentially under performing cavities - anything that is assumed, as built and insulated + is_underperforming = ( + self.walls["is_as_built"] and self.walls["insulation_thickness"] in ["average"] and self.walls["is_assumed"] + ) is_unfilled_cavity = is_cavity and is_empty is_partial_filled_cavity = is_cavity and is_partial_filled + is_underperforming_cavity = is_cavity and is_underperforming if is_unfilled_cavity: self.cavity = { @@ -170,6 +175,13 @@ class Eligibility: } return + if is_underperforming_cavity: + self.cavity = { + "suitability": True, + "type": "underperforming" + } + return + self.cavity = { "suitability": False, "type": "full" diff --git a/etl/eligibility/ha_15_32/ha4_app.py b/etl/eligibility/ha_15_32/ha4_app.py index cc64dfad..8b102f6d 100644 --- a/etl/eligibility/ha_15_32/ha4_app.py +++ b/etl/eligibility/ha_15_32/ha4_app.py @@ -124,6 +124,7 @@ def get_ha_4_data(data, cleaned, cleaning_data, created_at): "uprn": epc["uprn"], "Location Name": property_meta["Location Name"], "Post Code": property_meta["Post Code"], + "property_type": eligibility.epc["property-type"], "gbis_eligible": eligibility.gbis_warmfront, "eco4_eligible": eligibility.eco4_warmfront["eligible"], "eco4_message": eligibility.eco4_warmfront["message"], @@ -135,6 +136,7 @@ def get_ha_4_data(data, cleaned, cleaning_data, created_at): # Property components "roof": eligibility.roof["clean_description"], "walls": eligibility.walls["clean_description"], + "cavity_type": eligibility.cavity["type"], "heating": eligibility.epc["mainheat-description"], "tenure": eligibility.tenure, "date_epc": eligibility.epc["lodgement-date"], diff --git a/etl/epc_clean/epc_attributes/MainheatAttributes.py b/etl/epc_clean/epc_attributes/MainheatAttributes.py index e21f0d37..813e15a6 100644 --- a/etl/epc_clean/epc_attributes/MainheatAttributes.py +++ b/etl/epc_clean/epc_attributes/MainheatAttributes.py @@ -16,6 +16,7 @@ class MainHeatAttributes(Definitions): "solar assisted heat pump", "exhaust source heat pump", "community heat pump", + "portable electric heating" ] FUEL_TYPES = ["electric", "mains gas", "wood logs", "coal", "oil", "wood pellets", "anthracite", "dual fuel mineral and wood", "smokeless fuel", "lpg", "b30k"] diff --git a/etl/epc_clean/tests/test_data/test_mainheat_attributes_cases.py b/etl/epc_clean/tests/test_data/test_mainheat_attributes_cases.py index d264ebff..558b176e 100644 --- a/etl/epc_clean/tests/test_data/test_mainheat_attributes_cases.py +++ b/etl/epc_clean/tests/test_data/test_mainheat_attributes_cases.py @@ -1652,4 +1652,17 @@ mainheat_cases = [ 'has_electricaire': False, 'has_assumed_for_most_rooms': False, 'has_underfloor_heating': False, "has_electric_heat_pumps": False, "has_micro-cogeneration": False}, + {'original_description': 'Portable electric heating assumed for most rooms', 'has_radiators': False, + 'has_fan_coil_units': False, 'has_pipes_in_screed_above_insulation': False, + 'has_pipes_in_insulated_timber_floor': False, 'has_pipes_in_concrete_slab': False, 'has_boiler': False, + 'has_air_source_heat_pump': False, 'has_room_heaters': False, 'has_electric_storage_heaters': False, + 'has_warm_air': False, 'has_electric_underfloor_heating': False, 'has_electric_ceiling_heating': False, + 'has_community_scheme': False, 'has_ground_source_heat_pump': False, 'has_no_system_present': False, + 'has_portable_electric_heaters': False, 'has_water_source_heat_pump': False, 'has_electric_heat_pump': False, + 'has_micro-cogeneration': False, 'has_solar_assisted_heat_pump': False, 'has_exhaust_source_heat_pump': False, + 'has_community_heat_pump': False, 'has_portable_electric_heating': True, 'has_electric': True, + 'has_mains_gas': False, 'has_wood_logs': False, 'has_coal': False, 'has_oil': False, 'has_wood_pellets': False, + 'has_anthracite': False, 'has_dual_fuel_mineral_and_wood': False, 'has_smokeless_fuel': False, 'has_lpg': False, + 'has_b30k': False, 'has_assumed': True, 'has_electricaire': False, 'has_assumed_for_most_rooms': True, + 'has_underfloor_heating': False} ] From 46ce7bafd442c8b57c79364ff107f710d9b58d05 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 22 Dec 2023 18:32:05 +0000 Subject: [PATCH 5/6] done with ha4 --- etl/eligibility/ha_15_32/ha4_app.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha4_app.py b/etl/eligibility/ha_15_32/ha4_app.py index 8b102f6d..d6bc10b1 100644 --- a/etl/eligibility/ha_15_32/ha4_app.py +++ b/etl/eligibility/ha_15_32/ha4_app.py @@ -243,21 +243,13 @@ def get_ha_4_data(data, cleaned, cleaning_data, created_at): def analyse_ha_4(results_df, data): results_df_social = results_df[results_df["tenure"] == "Rented (social)"] - results_df_social["tenure"].value_counts() + results_df_social["property_type"].value_counts() n_identified = (results_df_social["gbis_eligible"] | results_df_social["eco4_eligible"]).sum() n_eco4 = results_df_social["eco4_eligible"].sum() n_gbis = results_df_social[~results_df_social["eco4_eligible"]]["gbis_eligible"].sum() eco_eligibile = results_df_social[results_df_social["eco4_eligible"]] - eco_eligibile["walls"].value_counts() - eco_eligibile["roof"].value_counts() - - eco_eligibile[eco_eligibile["walls"] == "Cavity wall, as built, insulated"] - - results_df_social[results_df_social["gbis_eligible"] | results_df_social["eco4_eligible"]]["tenure"].value_counts() - - results_df_social["eligibility_classification"].value_counts() future_possibilities_eco = results_df[ (results_df["eco4_eligible_future"] == True) & (~(results_df["gbis_eligible"] | results_df["eco4_eligible"])) @@ -268,6 +260,8 @@ def analyse_ha_4(results_df, data): ~(results_df["gbis_eligible"] | results_df["eco4_eligible"])) ].copy() + total_future_possibilities = future_possibilities_eco.shape[0] + future_possibilities_gbis.shape[0] + def app(): data = load_ha_4() From 997d91aaf0acb89227f941d935ba2240005d381d Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 22 Dec 2023 18:33:30 +0000 Subject: [PATCH 6/6] minor --- etl/eligibility/ha_15_32/ha4_app.py | 1 + 1 file changed, 1 insertion(+) diff --git a/etl/eligibility/ha_15_32/ha4_app.py b/etl/eligibility/ha_15_32/ha4_app.py index d6bc10b1..8a404eec 100644 --- a/etl/eligibility/ha_15_32/ha4_app.py +++ b/etl/eligibility/ha_15_32/ha4_app.py @@ -250,6 +250,7 @@ def analyse_ha_4(results_df, data): n_gbis = results_df_social[~results_df_social["eco4_eligible"]]["gbis_eligible"].sum() eco_eligibile = results_df_social[results_df_social["eco4_eligible"]] + eco_eligibile["eligibility_classification"].value_counts() future_possibilities_eco = results_df[ (results_df["eco4_eligible_future"] == True) & (~(results_df["gbis_eligible"] | results_df["eco4_eligible"]))