diff --git a/etl/eligibility/Eligibility.py b/etl/eligibility/Eligibility.py index 906ff594..b09d2df5 100644 --- a/etl/eligibility/Eligibility.py +++ b/etl/eligibility/Eligibility.py @@ -340,7 +340,6 @@ class Eligibility: # Check if the property is suitable for cavity wall self.cavity_insulation() - self.loft_insulation() self.gbis_warmfront = (self.cavity["suitability"]) and ( int(self.epc["current-energy-efficiency"]) <= 68 @@ -384,43 +383,49 @@ class Eligibility: if current_sap >= 69: self.eco4_warmfront = { "eligible": False, - "message": "sap too high", + "message": "SAP too high", "cavity_type": self.cavity["type"], "loft_type": self.loft["thickness_classification"] } return - if post_retrofit_sap is None: - - if current_sap >= 55: - message = "Possibly eligible but property currently EPC D" - else: - message = "subject to post retrofit sap" if is_eligible else "not eligible" - - # Update the message to flag properties that failed just because of a full cavity. - # We need to double check that the wall is a cavity, that the loft is suitable and that the - # sap is within reason - # We can then estimate the age of the cavity fill - if not is_eligible and (current_sap < 69) and self.loft["suitability"] and self.walls["is_cavity_wall"]: - message = "Failed due to full cavity - check cavity age" - + if not is_eligible and current_sap >= 55: self.eco4_warmfront = { - "eligible": is_eligible, - "message": message, + "eligible": False, + "message": "failed fabric and SAP check", "cavity_type": self.cavity["type"], "loft_type": self.loft["thickness_classification"] } return - is_eligible = is_eligible & (post_retrofit_sap >= 69) + if not is_eligible and current_sap < 55: + self.eco4_warmfront = { + "eligible": False, + "message": "failed fabric check", + "cavity_type": self.cavity["type"], + "loft_type": self.loft["thickness_classification"] + } + return - self.eco4_warmfront = { - "eligible": is_eligible, - "message": None, - "cavity_type": self.cavity["type"], - "loft_type": self.loft["thickness_classification"] - } - return + if is_eligible and current_sap >= 55: + self.eco4_warmfront = { + "eligible": True, + "message": "Meets fabric, fails SAP check", + "cavity_type": self.cavity["type"], + "loft_type": self.loft["thickness_classification"] + } + return + + if is_eligible and current_sap < 55: + self.eco4_warmfront = { + "eligible": True, + "message": "Meets fabric and SAP check", + "cavity_type": self.cavity["type"], + "loft_type": self.loft["thickness_classification"] + } + return + + raise ValueError("Implement me") def check_gbis(self): diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index bd4d5128..5dd9b6e1 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -25,6 +25,84 @@ DATA_FOLDER = Path(__file__).parent / "local_data" / "ha_data" logger = setup_logger() load_dotenv(ENV_FILE) +PROPERTY_TYPE_LOOKUP = { + "HA1": { + "built_form": { + 'Mid Terrace': 'Mid-Terrace', + 'Semi-Detached': 'Semi-Detached', + 'End Terrace': 'End-Terrace', + 'Detached': 'Detached', + 'Enclosed Mid': 'Mid-Terrace', + 'Detached Local Connect': 'Detached', + } + }, + "HA6": { + "property_type": { + 'HOUSE': "House", + 'GROUND FLOOR FLAT': "Flat", + 'UPPER FLOOR FLAT': "Flat", + 'MAISONETTE': "Maisonette", + 'BUNGALOW': "Bungalow", + 'WARDEN BUNGALOW': "Bungalow", + 'WARDEN FLAT': "Flat", + 'EXTRACARE SCHEME': "Flat", + } + }, + "HA14": { + "property_type": { + "House": "House", + "Flat": "Flat", + "Bungalow": "Bungalow", + "Maisonette": "Maisonette", + } + }, + "HA39": { + "Semi house": {"property_type": "House", "built_form": "Semi-Detached"}, + "1st floor flat": {"property_type": "Flat", "built_form": None}, + "Mid terrace house": {"property_type": "House", "built_form": "Mid-Terrace"}, + "Ground floor flat": {"property_type": "Flat", "built_form": None}, + "End terrace house": {"property_type": "House", "built_form": "End-Terrace"}, + "Semi bungalow": {"property_type": "Bungalow", "built_form": "Semi-Detached"}, + "End terrace bungalow": {"property_type": "Bungalow", "built_form": "End-Terrace"}, + "2nd floor flat": {"property_type": "Flat", "built_form": None}, + "Mid terrace bungalow": {"property_type": "Bungalow", "built_form": "Mid-Terrace"}, + "3rd floor flat": {"property_type": "Flat", "built_form": None}, + "Detached bungalow": {"property_type": "Bungalow", "built_form": "Detached"}, + "Maisonette": {"property_type": "Maisonette", "built_form": None}, + "Detached house": {"property_type": "House", "built_form": "Detached"}, + "Lower ground floor flat": {"property_type": "Flat", "built_form": None}, + "Dormer bungalow": {"property_type": "Bungalow", "built_form": None}, + "Basement flat": {"property_type": "Flat", "built_form": None}, + "Cluster House": {"property_type": "House", "built_form": "Detached"}, + "2nd/3rd floor duplex flat": {"property_type": "Flat", "built_form": None}, + "Ground floor flat with study": {"property_type": "Flat", "built_form": None}, + "4th floor flat": {"property_type": "Flat", "built_form": None}, + "1st floor flat with study room": {"property_type": "Flat", "built_form": None}, + "2nd floor flat with study": {"property_type": "Flat", "built_form": None}, + }, + "HA107": { + "property_type": { + "HOUSE": "House", + "BUNGALOW": "Bungalow", + "GRD FLOOR FLAT": "Flat", + "FIRST FLOOR FLAT": "Flat", + "SHELTERED BUNGALOW": "Bungalow", + "MAISONETTE": "Maisonette", + "SECOND FLOOR FLAT": "Flat", + "SHELTERED FIRST FLR": "Flat", + "SHELTERED GROUND FLR": "Flat", + "GRD FLOOR BED SIT": "House" + }, + "built_form": { + "Semi Detached": "Semi-Detached", + "Mid Terrace": "Mid-Terrace", + "End Terrace": "End-Terrace", + "Detached": "Detached", + "Detatched": "Detached", + } + } +} + class DataLoader: COLUMN_CONFIG = { @@ -54,7 +132,7 @@ class DataLoader: self.data = {} self.december_figures = None - self.ha_facts_and_figures = None + self.facts_and_figures = None def create_asset_list_matching_address(self, ha_name, asset_list): @@ -929,7 +1007,77 @@ class DataLoader: ha_facts_and_figures = self.december_figures.merge(ha_facts_and_figures, how="inner", on="HA Name") ha_facts_and_figures = ha_facts_and_figures.fillna(0) - self.ha_facts_and_figures = ha_facts_and_figures + self.facts_and_figures = ha_facts_and_figures + + +def get_property_type_and_built_form(property_meta, ha_name): + if ha_name == "HA1": + property_type = property_meta["Asset Type"] + # We correct a small error + if property_type == "a": + property_type = "House" + + # Remap bedsits to flats + if property_type in ["Bedsit", "Room"]: + property_type = "Flat" + + built_form = PROPERTY_TYPE_LOOKUP[ha_name]["built_form"].get(property_meta["Property Type"], None) + elif ha_name == "HA6": + property_type = PROPERTY_TYPE_LOOKUP[ha_name]["property_type"][property_meta["Dwelling type"]] + built_form = property_meta["built_form"] + elif ha_name == "HA14": + if property_meta["Asset Type Description"] == "Block - Repair": + # We try and deduce if it's a flat or house, depending on if it has "room" or "flats" in the address + if "room" in property_meta["Address 1"].lower(): + property_type = "House" + else: + property_type = "Flat" + + else: + property_type = PROPERTY_TYPE_LOOKUP[ha_name]["property_type"][ + property_meta["Asset Type Description"] + ] + + built_form = None + elif ha_name == "HA39": + + property_type_config = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["ConstructionStyle"], {}) + property_type = property_type_config.get("property_type", None) + built_form = property_type_config.get("built_form", None) + + if property_type is None: + # We check for the presence of room or flat + if "flat" in property_meta["matching_address"]: + property_type = "Flat" + else: + property_type = "House" + elif ha_name == "HA107": + + dwelling_style = property_meta["Dwelling Style"] + if isinstance(dwelling_style, str): + dwelling_style = dwelling_style.strip() + + property_type = PROPERTY_TYPE_LOOKUP[ha_name]["property_type"].get(property_meta["DwellingType"]) + built_form = PROPERTY_TYPE_LOOKUP[ha_name]["built_form"].get(dwelling_style, None) + + if property_type is None: + if built_form in ["Semi-Detached", "Mid-Terrace", "End-Terrace", "Detached"]: + property_type = "House" + + if "flat" in property_meta["Wall Construction"].lower(): + property_type = "Flat" + + if (property_meta["DwellingType"] == "UNKNOWN") & (property_meta["Dwelling Style"] == 0): + # Hand a few specific cases + property_type = "Bungalow" + + if property_meta["Street"] == "School View": + property_type = "Bungalow" + + else: + raise NotImplementedError("Implement me") + + return property_type, built_form def get_epc_data( @@ -938,84 +1086,6 @@ def get_epc_data( if not loader.data: raise ValueError("Data not found - please run loader.load() first") - property_type_lookup = { - "ha_1": { - "built_form": { - 'Mid Terrace': 'Mid-Terrace', - 'Semi-Detached': 'Semi-Detached', - 'End Terrace': 'End-Terrace', - 'Detached': 'Detached', - 'Enclosed Mid': 'Mid-Terrace', - 'Detached Local Connect': 'Detached', - } - }, - "ha_6": { - "property_type": { - 'HOUSE': "House", - 'GROUND FLOOR FLAT': "Flat", - 'UPPER FLOOR FLAT': "Flat", - 'MAISONETTE': "Maisonette", - 'BUNGALOW': "Bungalow", - 'WARDEN BUNGALOW': "Bungalow", - 'WARDEN FLAT': "Flat", - 'EXTRACARE SCHEME': "Flat", - } - }, - "ha_14": { - "property_type": { - "House": "House", - "Flat": "Flat", - "Bungalow": "Bungalow", - "Maisonette": "Maisonette", - } - }, - "ha_39": { - "Semi house": {"property_type": "House", "built_form": "Semi-Detached"}, - "1st floor flat": {"property_type": "Flat", "built_form": None}, - "Mid terrace house": {"property_type": "House", "built_form": "Mid-Terrace"}, - "Ground floor flat": {"property_type": "Flat", "built_form": None}, - "End terrace house": {"property_type": "House", "built_form": "End-Terrace"}, - "Semi bungalow": {"property_type": "Bungalow", "built_form": "Semi-Detached"}, - "End terrace bungalow": {"property_type": "Bungalow", "built_form": "End-Terrace"}, - "2nd floor flat": {"property_type": "Flat", "built_form": None}, - "Mid terrace bungalow": {"property_type": "Bungalow", "built_form": "Mid-Terrace"}, - "3rd floor flat": {"property_type": "Flat", "built_form": None}, - "Detached bungalow": {"property_type": "Bungalow", "built_form": "Detached"}, - "Maisonette": {"property_type": "Maisonette", "built_form": None}, - "Detached house": {"property_type": "House", "built_form": "Detached"}, - "Lower ground floor flat": {"property_type": "Flat", "built_form": None}, - "Dormer bungalow": {"property_type": "Bungalow", "built_form": None}, - "Basement flat": {"property_type": "Flat", "built_form": None}, - "Cluster House": {"property_type": "House", "built_form": "Detached"}, - "2nd/3rd floor duplex flat": {"property_type": "Flat", "built_form": None}, - "Ground floor flat with study": {"property_type": "Flat", "built_form": None}, - "4th floor flat": {"property_type": "Flat", "built_form": None}, - "1st floor flat with study room": {"property_type": "Flat", "built_form": None}, - "2nd floor flat with study": {"property_type": "Flat", "built_form": None}, - }, - "ha_107": { - "property_type": { - "HOUSE": "House", - "BUNGALOW": "Bungalow", - "GRD FLOOR FLAT": "Flat", - "FIRST FLOOR FLAT": "Flat", - "SHELTERED BUNGALOW": "Bungalow", - "MAISONETTE": "Maisonette", - "SECOND FLOOR FLAT": "Flat", - "SHELTERED FIRST FLR": "Flat", - "SHELTERED GROUND FLR": "Flat", - "GRD FLOOR BED SIT": "House" - }, - "built_form": { - "Semi Detached": "Semi-Detached", - "Mid Terrace": "Mid-Terrace", - "End Terrace": "End-Terrace", - "Detached": "Detached", - "Detatched": "Detached", - } - } - } - outputs = {} for ha_name, data_assets in loader.data.items(): @@ -1049,77 +1119,15 @@ def get_epc_data( if property_meta["matching_postcode"] is None: continue - if ha_name == "ha_1": - property_type = property_meta["Asset Type"] - # We correct a small error - if property_type == "a": - property_type = "House" - - # Remap bedsits to flats - if property_type in ["Bedsit", "Room"]: - property_type = "Flat" - - built_form = property_type_lookup[ha_name]["built_form"].get(property_meta["Property Type"], None) - elif ha_name == "ha_6": - property_type = property_type_lookup[ha_name]["property_type"][property_meta["Dwelling type"]] - built_form = property_meta["built_form"] - elif ha_name == "ha_14": - if property_meta["Asset Type Description"] == "Block - Repair": - # We try and deduce if it's a flat or house, depending on if it has "room" or "flats" in the address - if "room" in property_meta["Address 1"].lower(): - property_type = "House" - else: - property_type = "Flat" - - else: - property_type = property_type_lookup[ha_name]["property_type"][ - property_meta["Asset Type Description"] - ] - - built_form = None - elif ha_name == "ha_39": - - property_type_config = property_type_lookup[ha_name].get(property_meta["ConstructionStyle"], {}) - property_type = property_type_config.get("property_type", None) - built_form = property_type_config.get("built_form", None) - - if property_type is None: - # We check for the presence of room or flat - if "flat" in property_meta["matching_address"]: - property_type = "Flat" - else: - property_type = "House" - elif ha_name == "ha_107": - - dwelling_style = property_meta["Dwelling Style"] - if isinstance(dwelling_style, str): - dwelling_style = dwelling_style.strip() - - property_type = property_type_lookup[ha_name]["property_type"].get(property_meta["DwellingType"]) - built_form = property_type_lookup[ha_name]["built_form"].get(dwelling_style, None) - - if property_type is None: - if built_form in ["Semi-Detached", "Mid-Terrace", "End-Terrace", "Detached"]: - property_type = "House" - - if "flat" in property_meta["Wall Construction"].lower(): - property_type = "Flat" - - if (property_meta["DwellingType"] == "UNKNOWN") & (property_meta["Dwelling Style"] == 0): - # Hand a few specific cases - property_type = "Bungalow" - - if property_meta["Street"] == "School View": - property_type = "Bungalow" - - else: - raise NotImplementedError("Implement me") + property_type, built_form = get_property_type_and_built_form( + property_meta=property_meta, ha_name=ha_name + ) searcher = SearchEpc( address1=str(property_meta["HouseNo"]), postcode=property_meta["matching_postcode"], auth_token=EPC_AUTH_TOKEN, - os_api_key=None, + os_api_key="", full_address=property_meta["matching_address"] ) searcher.ordnance_survey_client.property_type = property_type @@ -1150,9 +1158,21 @@ def get_epc_data( eligibility.check_gbis_warmfront() eligibility.check_eco4_warmfront() - if (not eligibility.eco4_warmfront["eligible"]) and ( - not eligibility.gbis_warmfront - ) and consider_penultimate_epc: + # We check the conditions for checking the penultimate epc + identified_for_gbis = property_meta["ECO Eligibility"] == "gbis" + identified_for_eco4 = property_meta["ECO Eligibility"] in ["eco4"] + + # condition 1 - identified for gbis and not eligible + condition_1 = ( + identified_for_gbis and not eligibility.gbis_warmfront and not eligibility.eco4_warmfront["eligible"] + ) & consider_penultimate_epc + + # condition 2 - identified for eco4 and not eligible + condition_2 = ( + identified_for_eco4 and not eligibility.eco4_warmfront["eligible"] + ) & consider_penultimate_epc + + if identified_for_gbis and not eligibility.gbis_warmfront and not eligibility.eco4_warmfront["eligible"]: # We check the penultimate epc eligibility = Eligibility(epc=penultimate_epc, cleaned=cleaned) eligibility.check_gbis_warmfront() @@ -1161,6 +1181,10 @@ def get_epc_data( # We don't update just to make data cleaning easier if penultimate_epc.get("estimated") is None: older_epcs = [x for x in searcher.data["rows"] if x["lmk-key"] != penultimate_epc["lmk-key"]] + elif identified_for_eco4 and not eligibility.eco4_warmfront["eligible"]: + + else: + blah # If the property is a cavity wall and it's filled, we produce an estimate for the age of the cavity # Loft MUST be suitable @@ -1199,6 +1223,7 @@ def get_epc_data( { "row_id": property_meta["asset_list_row_id"], "uprn": eligibility.epc["uprn"], + "is_estimated": searcher.newest_epc.get("estimated") is not None, "property_type": eligibility.epc["property-type"], "gbis_eligible": eligibility.gbis_warmfront, "eco4_eligible": eligibility.eco4_warmfront["eligible"], @@ -1219,7 +1244,6 @@ def get_epc_data( "cavity_age": cavity_age, **eligibility.walls, **eligibility.roof, - "is_estimated": searcher.newest_epc.get("estimated") is not None, "eligibility_cavity_type": eligibility.eco4_warmfront["cavity_type"], "eligibility_loft_type": eligibility.eco4_warmfront["loft_type"] } @@ -1687,38 +1711,7 @@ def analyse_ha_data(outputs, loader): writer.sheets[sheet].set_column(i, i, width) -def app(): - """ - This app contains the housin association analysis for HAs 1, 6, 14, 39 and 107. - Only HA 6 has surveys - :return: - """ - - use_cache = True - - # List all of the data in the folder - directories = [str(list(entry.iterdir())[0]) for entry in DATA_FOLDER.iterdir() if entry.is_dir()] - # Grab the December HA figures filepath - december_figures_filepath = "local_data/ha_data/HA_December_figures.csv" - - priority_has = ["HA1", "HA6", "HA14", "HA39", "HA107"] - # Filter down the directories to only the priority HAs - directories = [d for d in directories if d.split("/")[2] in priority_has] - - loader = DataLoader(directories, december_figures_filepath, use_cache) - loader.load() - loader.ha_facts_and_figures() - - # TODO: We probably need to make sure that we have all of the columns that we need - - # We load in the additional data required to perform the analysis - - cleaned = read_from_s3( - s3_file_name="cleaned_epc_data/cleaned.bson", - bucket_name="retrofit-data-dev" - ) - cleaned = msgpack.unpackb(cleaned, raw=False) - +def patch_cleaned(cleaned): # Patch to handle the a missing description cleaned["floor-description"].extend( [ @@ -1762,16 +1755,57 @@ def app(): x["another_property_below"] = True x["thermal_transmittance"] = 0 + return cleaned + + +def app(): + """ + This app contains the housin association analysis for HAs 1, 6, 14, 39 and 107. + Only HA 6 has surveys + :return: + """ + + # Determines if we want to use the cached data in s3 + use_cache = True + # Determines if we want to perform the data pull + pull_data = True + + # List all of the data in the folder + directories = [str(list(entry.iterdir())[0]) for entry in DATA_FOLDER.iterdir() if entry.is_dir()] + # Grab the December HA figures filepath + december_figures_filepath = "local_data/ha_data/HA_December_figures.csv" + + priority_has = ["HA1", "HA6", "HA14", "HA39", "HA107"] + # Filter down the directories to only the priority HAs + directories = [d for d in directories if d.split("/")[2] in priority_has] + + loader = DataLoader(directories, december_figures_filepath, use_cache) + loader.load() + loader.ha_facts_and_figures() + + # We load in the additional data required to perform the analysis + cleaned = read_from_s3( + s3_file_name="cleaned_epc_data/cleaned.bson", + bucket_name="retrofit-data-dev" + ) + cleaned = msgpack.unpackb(cleaned, raw=False) + cleaned = patch_cleaned(cleaned) + cleaning_data = read_dataframe_from_s3_parquet( bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet", ) - created_at = datetime.now().isoformat() photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev") outputs = get_epc_data( - loader, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds, pull_data=False + loader=loader, + cleaned=cleaned, + cleaning_data=cleaning_data, + created_at=created_at, + photo_supply_lookup=photo_supply_lookup, + floor_area_decile_thresholds=floor_area_decile_thresholds, + pull_data=pull_data ) # for ha_name, datasets in outputs.items():