From e9bfd63c3588206cd9e7c79b25c6067b617bf436 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 27 Feb 2024 21:00:23 +0000 Subject: [PATCH] Fixed getting property type and built form for ha107 --- .../ha_15_32/ha_analysis_batch_3.py | 77 ++++++++++++++----- 1 file changed, 57 insertions(+), 20 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 94df8ceb..5cbfb90c 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -223,12 +223,67 @@ class DataLoader: return asset_list + @staticmethod + def extract_property_info_ha107(properties): + property_types = { + "House": "House", + "Flat": "Flat", + "Bungalow": "Bungalow", + "Maisonette": "Maisonette", + "Bedsit": None + } + + built_forms = { + "Detached": "Detached", + "Semi Detached": "Semi-Detached", + "End Terrace": "End-Terrace", + "Mid Terrace": "Mid-Terrace" + } + + # Function to extract property type and built form from a description + def extract_from_description(description): + property_type = None + built_form = None + + for key in property_types: + if key in description: + property_type = property_types[key] + break + + for key in built_forms: + if key in description: + built_form = built_forms[key] + break + + return property_type, built_form + + # Process each property in the list + results = [] + for property_description in properties: + property_type, built_form = extract_from_description(property_description) + results.append( + { + "Property type": property_description, + "property_type": property_type, + "built_form": built_form + } + ) + results = pd.DataFrame(results) + + return results + def append_asset_list_built_form(self, ha_name, asset_list): # Finally, we process property_type or built form, where needed if ha_name == "HA6": asset_list["built_form"] = asset_list["Property Type"].apply(self.identify_built_form_ha6) + if ha_name == "HA107": + mapped_df = self.extract_property_info_ha107(asset_list["Property type"].unique()) + asset_list = asset_list.merge( + mapped_df, how="left", on="Property type" + ) + return asset_list @staticmethod @@ -1280,26 +1335,8 @@ def get_property_type_and_built_form(property_meta, ha_name): property_type = "House" elif ha_name == "HA107": - dwelling_style = property_meta["Dwelling Style"] - if isinstance(dwelling_style, str): - dwelling_style = dwelling_style.strip() - - property_type = PROPERTY_TYPE_LOOKUP[ha_name]["property_type"].get(property_meta["DwellingType"]) - built_form = PROPERTY_TYPE_LOOKUP[ha_name]["built_form"].get(dwelling_style, None) - - if property_type is None: - if built_form in ["Semi-Detached", "Mid-Terrace", "End-Terrace", "Detached"]: - property_type = "House" - - if "flat" in property_meta["Wall Construction"].lower(): - property_type = "Flat" - - if (property_meta["DwellingType"] == "UNKNOWN") & (property_meta["Dwelling Style"] == 0): - # Hand a few specific cases - property_type = "Bungalow" - - if property_meta["Street"] == "School View": - property_type = "Bungalow" + property_type = property_meta.get("property_type", None) + built_form = property_meta.get("built_form", None) else: raise NotImplementedError("Implement me")