diff --git a/etl/eligibility/Eligibility.py b/etl/eligibility/Eligibility.py index 6a5c03e1..00c72a8e 100644 --- a/etl/eligibility/Eligibility.py +++ b/etl/eligibility/Eligibility.py @@ -114,7 +114,8 @@ class Eligibility: self.loft = { "suitability": False, "thickness": None, - "reason": "roof not loft" + "reason": "roof not loft", + "thickness_classification": None } return @@ -125,18 +126,32 @@ class Eligibility: is_flat=self.roof["is_flat"] ) + if insulation_thickness <= 100: + thickness_classification = "0-100mm" + elif insulation_thickness <= 270: + thickness_classification = "100-270mm" + else: + thickness_classification = "270mm+" + if insulation_thickness <= loft_thickness_threshold: + # We produce a thiclkness classification for the loft + # 0 - 100mm insulation + # 100 - 270mm insulation + # 270mm+ insulation + self.loft = { "suitability": True, "thickness": insulation_thickness, - "reason": None + "reason": None, + "thickness_classification": thickness_classification } if insulation_thickness <= high_loft_thickness_threshold: self.loft = { "suitability": True, "thickness": insulation_thickness, - "reason": "high loft thickness but below regulation" + "reason": "high loft thickness but below regulation", + "thickness_classification": thickness_classification } return @@ -145,7 +160,8 @@ class Eligibility: self.loft = { "suitability": False, "thickness": insulation_thickness, - "reason": "existing insulation" + "reason": "existing insulation", + "thickness_classification": thickness_classification } return @@ -371,20 +387,21 @@ class Eligibility: """ current_sap = int(self.epc["current-energy-efficiency"]) - - if current_sap >= 69: - self.eco4_warmfront = { - "eligible": False, - "message": "sap too high" - } - return - self.cavity_insulation() self.loft_insulation() # make sure conditions 2 and 3 are true is_eligible = self.cavity["suitability"] & self.loft["suitability"] + if current_sap >= 69: + self.eco4_warmfront = { + "eligible": False, + "message": "sap too high", + "cavity_type": self.cavity["type"], + "loft_type": self.loft["thickness_classification"] + } + return + if post_retrofit_sap is None: if current_sap >= 55: @@ -401,7 +418,9 @@ class Eligibility: self.eco4_warmfront = { "eligible": is_eligible, - "message": message + "message": message, + "cavity_type": self.cavity["type"], + "loft_type": self.loft["thickness_classification"] } return @@ -409,7 +428,9 @@ class Eligibility: self.eco4_warmfront = { "eligible": is_eligible, - "message": None + "message": None, + "cavity_type": self.cavity["type"], + "loft_type": self.loft["thickness_classification"] } return diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index dfd95100..1212522e 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -12,11 +12,9 @@ from tqdm import tqdm from backend.SearchEpc import SearchEpc from etl.eligibility.Eligibility import Eligibility from etl.eligibility.ha_15_32.app import prepare_model_data_row -from etl.epc.settings import COLUMNS_TO_MERGE_ON from backend.ml_models.api import ModelApi from etl.solar.SolarPhotoSupply import SolarPhotoSupply from recommendations.recommendation_utils import calculate_cavity_age -from recommendation_utils import convert_thickness_to_numeric EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN") ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env" @@ -576,14 +574,6 @@ def get_epc_data( } } - # TODO: Sort these - # DwellingType - # UNKNOWN 395 - # SHELTERED FIRST FLR 77 - # 62 - # ROOM 4 - # GRD FLOOR BED SIT 3 - outputs = {} for ha_name, data_assets in loader.data.items(): @@ -596,7 +586,7 @@ def get_epc_data( outputs[ha_name] = { "results_df": processed_ha_results["results_df"], - "scoring_data": processed_ha_results["scoring_df"], + "scoring_df": processed_ha_results["scoring_df"], "nodata": processed_ha_results["nodata"] } continue @@ -680,9 +670,6 @@ def get_epc_data( if property_meta["Street"] == "School View": property_type = "Bungalow" - if property_type is None: - blah - else: raise NotImplementedError("Implement me") @@ -790,7 +777,9 @@ def get_epc_data( "cavity_age": cavity_age, **eligibility.walls, **eligibility.roof, - "is_estimated": searcher.newest_epc.get("estimated") is not None + "is_estimated": searcher.newest_epc.get("estimated") is not None, + "eligibility_cavity_type": eligibility.eco4_warmfront["cavity_type"], + "eligibility_loft_type": eligibility.eco4_warmfront["loft_type"] } ) @@ -877,7 +866,7 @@ def get_epc_data( save_pickle_to_s3( data={ "results_df": results_df, - "scoring_data": scoring_df, + "scoring_df": scoring_df, "nodata": nodata }, bucket_name="retrofit-datalake-dev", @@ -886,7 +875,7 @@ def get_epc_data( outputs[ha_name] = { "results_df": results_df, - "scoring_data": scoring_df, + "scoring_df": scoring_df, "nodata": nodata } @@ -914,6 +903,7 @@ def analyse_ha_data(outputs, loader): for ha_name, datasets in outputs.items(): + inputs = [x for k, x in loader.data.items() if k == ha_name][0] # TODO: This is placeholder because we don't have the schemes that the properties have been qualified for # yet # @@ -930,9 +920,6 @@ def analyse_ha_data(outputs, loader): # End placholder results_df = datasets["results_df"].copy() - - inputs = [x for k, x in loader.data.items() if k == ha_name][0] - analysis_data = inputs["asset_list"][['asset_list_row_id', "row_meaning", "funding_scheme"]].rename( columns={"row_meaning": "asset_identification_status"} ).merge( @@ -970,19 +957,20 @@ def analyse_ha_data(outputs, loader): analysis_data = analysis_data[pd.isnull(analysis_data["survey_list_row_id"])] # We now calculate the number of remaining properties, by scheme - # TODO: We might need to tweak a bit of the knowledge + # TODO: We might need to tweak a bit of the logic remaining_properties = analysis_data[ analysis_data["asset_identification_status"] == "identified potential eco works (CWI)" - ] + ].copy() + remaining_properties["prospect_type"] = None remaining_properties_by_scheme = ( remaining_properties.groupby("funding_scheme")["asset_list_row_id"].nunique().reset_index() ) - remaining_properties_eco4 = remaining_properties_by_scheme[ + n_remaining_properties_eco4 = remaining_properties_by_scheme[ remaining_properties_by_scheme["funding_scheme"] == "ECO4" ]["asset_list_row_id"].values[0] - remaining_properties_gbis = remaining_properties_by_scheme[ + n_remaining_properties_gbis = remaining_properties_by_scheme[ remaining_properties_by_scheme["funding_scheme"] == "GBIS" ]["asset_list_row_id"].values[0] @@ -990,7 +978,8 @@ def analyse_ha_data(outputs, loader): # one of multiple categories # # For properties that have been identified as ECO4 - # 1) Strict ECO4 candidate - Has required fabric and EPC is below a D + # 1) Strict ECO4 candidate - Has required fabric and EPC is D or below. We consider D or below here, because + # Warmfront regularly re-surveys properties which then fall within the SAP requirement # - This is not the very strictest definition of ECO4 eligible, but we aim to characterise the properties # here and re-surveying is a common practicce by Warmfront. Additionally, many of the social homes have # very old EPCs which may score lower when re-done @@ -1008,10 +997,25 @@ def analyse_ha_data(outputs, loader): # 3) Subject to CIGA check - Filled cavity # 4) Does not look like a GBIS candidate + remaining_eco4_df = remaining_properties[ + remaining_properties["funding_scheme"] == "ECO4" + ].copy() # ECO4 # 1) We identify this if: # - remaining_properties["eco4_eligible"] == True - # - remaining_properties[""] + + remaining_eco4_df["prospect_type"] = np.where( + remaining_eco4_df["eco4_eligible"] == True, + "strict ECO4", + remaining_eco4_df["prospect_type"] + ) + + # 2) We identify this if it has a filled cavity but meets the loft conditions + + remaining_eco4_df["prospect_type"] + + z = remaining_eco4_df[remaining_eco4_df["eco4_message"] == "sap too high"] + remaining_properties[remaining_properties["eco4_eligible"] == True]["eco4_message"].value_counts() remaining_properties["eco4_message"].value_counts() z = remaining_properties[ @@ -1026,10 +1030,10 @@ def analyse_ha_data(outputs, loader): "n_properties_in_asset_list": n_properties_in_asset_list, # ECO4 "properties_sold_eco4": properties_sold_eco4, - "remaining_properties_eco4": remaining_properties_eco4, + "n_remaining_properties_eco4": n_remaining_properties_eco4, # GBIS "properties_sold_gbis": properties_sold_gbis, - "remaining_properties_gbis": remaining_properties_gbis + "n_remaining_properties_gbis": n_remaining_properties_gbis } pass @@ -1145,4 +1149,6 @@ def app(): photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev") - outputs = get_epc_data(loader) + outputs = get_epc_data( + loader, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds, pull_data=False + )