From 12f780a08989e896235adf96e175d39240c3adbb Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 15 Mar 2024 16:54:48 +0000 Subject: [PATCH] setting up complete data pull --- .../ha_15_32/ha_analysis_batch_3.py | 380 +++++++++++++++++- 1 file changed, 369 insertions(+), 11 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index a0b7e0bb..902d48fd 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -42,6 +42,15 @@ PROPERTY_TYPE_LOOKUP = { 'Detached Local Connect': 'Detached', } }, + "HA2": { + 'HOUSE': 'House', + 'FLAT': 'Flat', + 'SHELTERED': None, + 'BUNGALOW': 'Bungalow', + 'BED-SIT': None, + 'MAISONETTE': "Maisonette", + 'HOSTEL': None + }, "HA6": { "property_type": { 'HOUSE': "House", @@ -69,6 +78,23 @@ PROPERTY_TYPE_LOOKUP = { "End Terraced": "End-Terrace", } }, + "HA12": { + "House": "House", + "Flat": "Flat", + "Bungalow": "Bungalow", + "Maisonette": "Maisonette", + "Bedsit": None, + }, + "HA13": { + 'House': "House", + 'Flat': "Flat", + 'House MT': "House", + 'House SD': "House", + 'House ET': "House", + 'Bungalow MT': "Bungalow", + 'Bungalow ET': "Bungalow", + 'ii': None, + }, "HA14": { "property_type": { "House": "House", @@ -77,6 +103,13 @@ PROPERTY_TYPE_LOOKUP = { "Maisonette": "Maisonette", } }, + "HA15": { + 'House': 'House', + 'Flat': 'Flat', + 'Bungalow': 'Bungalow', + 'Maisonette': 'Maisonette', + 'Flat over garage': 'Flat', + }, "HA16": { 'Semi Detached Bungalow': {"property-type": "Bungalow", "built-form": "Semi-Detached"}, 'Mid Terraced House': {"property-type": "House", "built-form": "Mid-Terrace"}, @@ -95,6 +128,30 @@ PROPERTY_TYPE_LOOKUP = { 'Flat Over Shop': {"property-type": "Flat", "built-form": "Mid-Terrace"}, 'Mid Terraced Town House': {"property-type": "House", "built-form": "Mid-Terrace"}, }, + "HA18": { + "House": "House", + "Flat": "Flat", + "Bungalow": "Bungalow", + "Maisonette": "Maisonette", + "Bedsit": None, + "Shop": None, + "Hostel": None, + "Block": None, + }, + "HA24": { + '01 HOUSE': 'House', + '02 FLAT': 'Flat', + '03 BUNGALOW': 'Bungalow', + '10 PBUNGALOW': 'Bungalow', + '01 HOUSE MID': 'House', + '13 SBUNGALOW': 'Bungalow', + '12 SBEDSIT': None, # BEDSIT does not match the specified property types + '14 SFLAT': 'Flat', + '05 BEDSIT': None, + '04 MAISONETTE': 'Maisonette', + '11 PFLAT': 'Flat', + '09 PBEDSIT': None + }, "HA25": { 'Flat': 'Flat', 'Mid Terrace House': 'House', @@ -116,6 +173,77 @@ PROPERTY_TYPE_LOOKUP = { 'Mid Terrace Housekeeping ': 'House', 'End Terrace Housex': 'House' }, + "HA28": { + 'Flat': 'Flat', + 'Semi detached house': 'House', + 'Terraced house': 'House', + 'Maisonette flat': 'Maisonette', + 'Sheltered bedsit': None, + 'APD flat': 'Flat', + 'Bungalow terraced': 'Bungalow', + 'Flat with partition': 'Flat', + 'Bungalow semi detached': 'Bungalow', + 'APD Bungalow': 'Bungalow', + 'Sheltered flat': 'Flat', + 'Bedsit Flat': 'Flat', + 'Bedsit bungalow semi detached': 'Bungalow', + 'Sheltered bungalow terraced': 'Bungalow', + 'Sheltered bedsit disabled': None, + 'Bedsit bungalow terraced': 'Bungalow', + 'Sheltered bungalow semi detached': 'Bungalow', + 'Sheltered warden flat': 'Flat', + 'Bungalow detached': 'Bungalow', + 'Block': None, # Does not match the specified property types + 'End Terraced House': 'House', + 'Mid Terraced House': 'House', + '#N/A': None, # Assuming this is an invalid or missing entry + 0: None # Assuming 0 is also an invalid or missing entry + }, + "HA30": { + 'House': 'House', + 'Flat': 'Flat', + 'Bungalow': 'Bungalow', + 'House with Attached Garage': 'House', + 'Bed Space': None, # Assuming this does not fit the specified property types + 'House with Garage': 'House', + 'Bungalow with Wheelchair Access': 'Bungalow', + 'Maisonette': 'Maisonette', + 'Flat with Wheelchair Access': 'Flat', + 'Bedsit': None, # Assuming this does not fit the specified property types + 'Flat w Wheelchair Access & Car Park': 'Flat', + 'House with Wheelchair Access': 'House', + 'Bungalow w Wheelchair Access & Car ': 'Bungalow' + }, + "HA32": { + 'Bungalow': 'Bungalow', + 'Flat': 'Flat', + 'Bungalow Disabled': 'Bungalow', # "Disabled" properties categorized with their base type + 'House': 'House', + 'Dormer Bungalow': 'Bungalow', + 'Pop-In': None, # Does not fit the specified property types + 'Flat Disabled': 'Flat', + 'Laundry': None, # Does not fit the specified property types + 'Bedsit': None, # Excluded from the given categories + 'Shed': None, # Does not fit the specified property types + 'Store Room': None # Does not fit the specified property types + }, + "HA34": { + 'Flat': 'Flat', + 'House': 'House', + 'Bungalow': 'Bungalow', + 'Maisonette': 'Maisonette', + 'ND': None, + }, + "HA35": { + "Flat": "Flat", + "Maisonette": "Maisonette", + "House": "House", + "Bedsit": None, + "2 Bedroom Unknown": None, + "1 Bedroom Unknown": None, + "3 Bedroom Unknown": None, + "4 Bedroom Unknown": None, + }, "HA39": { "Semi house": {"property_type": "House", "built_form": "Semi-Detached"}, "1st floor flat": {"property_type": "Flat", "built_form": None}, @@ -140,6 +268,105 @@ PROPERTY_TYPE_LOOKUP = { "1st floor flat with study room": {"property_type": "Flat", "built_form": None}, "2nd floor flat with study": {"property_type": "Flat", "built_form": None}, }, + "HA41": { + 'Garage': None, + 'House 1919-1945': 'House', + 'House 1946-1964': 'House', + 'Flats & Maisonettes post 1974': 'Flat', + 'Non traditional houses': 'House', + 'Sheltered': None, + 'Flats & Maisonettes 1965-1974': 'Flat', + 'House post 1974': 'House', + 'Block': None, + 'Flats & Maisonettes 1946-1964': 'Flat', + 'House 1965-1974': 'House', + 'Non traditional flats': 'Flat', + 'Bungalow 1965-1974': 'Bungalow', + 'PIMSS EMPTY': None, + 'Bungalow post 1974': 'Bungalow', + 'Bungalow 1946-1964': 'Bungalow', + 'Flats & Maisonettes 1919-1945': 'Flat', + 'House pre 1919': 'House', + 'Flats & Maisonettes pre 1919': 'Flat', + 'Bungalow 1919-1945': 'Bungalow', + 'Office': None + }, + "HA48": { + "House": "House", + "Flat": "Flat", + "Bungalow": "Bungalow", + "Maisonette": "Maisonette", + "Unit": None + }, + "HA50": { + 'House': 'House', + 'Bungalow': 'Bungalow', + 'Flat': 'Flat', + 'House SD': 'House', + 'House MT': 'House', + 'House ET': 'House', + 'Bungalow ET': 'Bungalow', + 'House SD ': 'House', + 'House. SD': 'House', + 'Bungalow SD': 'Bungalow', + 'Bungalow MT': 'Bungalow', + 'Bungalow D': 'Bungalow', + 'House D': 'House', + 'House. MT': 'House', + 'House ': 'House', + 'House ET ': 'House', + ' ': None, + 'Flat?': 'Flat', + 'Bungalow ': 'Bungalow' + }, + "HA56": { + 'House Non Specific': 'House', + 'HOUSE TERRACED': 'House', + 'HOUSE - SEMI DETACHD': 'House', + 'Bungalow': 'Bungalow', + 'House - End Terraced': 'House', + 'Block': None, + 'Block with Communal': None, + 'Bungalow - Terraced': 'Bungalow', + 'Bungalow - Semi Dtch': 'Bungalow', + 'Block House with rooms': None, + 'Bungalow - End Terr': 'Bungalow', + 'House - Mid Terraced': 'House', + 'Bungalow - Detached': 'Bungalow', + 'House - Detached': 'House', + 'HOUSE THREE STOREY': 'House', + 'Maisonette': 'Maisonette', + 'Communal Block': None, + 'Scheme': None + }, + "HA63": { + 'Flat': 'Flat', + 'House - Semi detached': 'House', + 'House - Detached': 'House', + 'House - End Terrace': 'House', + 'House - Mid Terrace': 'House', + 'Bungalow - Semi detached': 'Bungalow', + 'Bungalow': 'Bungalow', + 'Bedsit': None, # Considering as a non-specific residential category here + 'Maisonette': 'Maisonette', + 'Bungalow - End Terrace': 'Bungalow', + 'Bungalow - Detached': 'Bungalow', + 'Maisonette - Mid Terrace': 'Maisonette', + 'Maisonette - End Terrace': 'Maisonette', + 'Studio Flat': 'Flat', + 'Maisonette - Detached': 'Maisonette', + 'Bungalow - Mid Terrace': 'Bungalow', + 'Bedsit - Mid Terrace': None, + 'Bedsit - End Terrace': None, + 'Amenity Block - Semi detached': None, # Assuming non-residential + 'Maisonette - Semi Detached': 'Maisonette', + 'Amenity Block - Detached': None, # Assuming non-residential + 'Hostel': None, # Typically not considered a standard residential property for this context + 'Bungalow - Attached': 'Bungalow', + 'Unknown': None, # Not enough information to categorize + 'Studio Flat - Mid Terrace': 'Flat', + 'Chalet - Wheelchair': None # Specialized type, not categorized here + }, "HA107": { "property_type": { "HOUSE": "House", @@ -160,6 +387,27 @@ PROPERTY_TYPE_LOOKUP = { "Detached": "Detached", "Detatched": "Detached", } + }, + "HA117": { + "Flat": "Flat", + "House": "House", + "Bungalow": "Bungalow", + "Flat over garage/underpass": "Flat", + }, + "HAXXX": { + 'mid terraced house': 'House', + 'semi detached house': 'House', + '1st fl 4 in a block': 'Flat', + 'G/F 4 in a block': 'Flat', + 'end terraced house': 'House', + '1st floor flat': 'Flat', + 'G/F floor flat': 'Flat', + 'semi detached bungalow': 'Bungalow', + '2nd floor flat': 'Flat', + 'mid terrace bungalow': 'Bungalow', + 'detached bungalow': 'Bungalow', + 'end terrace bungalow': 'Bungalow', + 'Staff accommodation': None # Marked as None due to its special nature } } @@ -2882,12 +3130,36 @@ def get_property_type_and_built_form(property_meta, ha_name): property_type = "Flat" built_form = PROPERTY_TYPE_LOOKUP[ha_name]["built_form"].get(property_meta["Property Type"], None) + elif ha_name == "HA2": + property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Dwelling Type"].strip()) + built_form = None elif ha_name == "HA6": property_type = PROPERTY_TYPE_LOOKUP[ha_name]["property_type"][property_meta["Dwelling type"]] built_form = property_meta["built_form"] elif ha_name == "HA7": property_type = PROPERTY_TYPE_LOOKUP[ha_name]["property_type"].get(property_meta["Archetype"]) built_form = PROPERTY_TYPE_LOOKUP[ha_name]["built_form"].get(property_meta["Property Type"]) + elif ha_name == "HA9": + property_description = property_meta["Asset Type"].strip().lower() + if "house" in property_description: + return "House", None + + if "flat" in property_description: + return "Flat", None + + if "bungalow" in property_description: + return "Bungalow", None + + if "maisonette" in property_description: + return "Maisonette", None + + return None, None + elif ha_name == "HA12": + property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Asset_Type1"].strip()) + built_form = None + elif ha_name == "HA13": + property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Type Cd"].strip()) + built_form = None elif ha_name == "HA14": if property_meta["Asset Type Description"] == "Block - Repair": # We try and deduce if it's a flat or house, depending on if it has "room" or "flats" in the address @@ -2902,15 +3174,60 @@ def get_property_type_and_built_form(property_meta, ha_name): ] built_form = None - elif ha_name == "HA25": - property_type = PROPERTY_TYPE_LOOKUP[ha_name][property_meta["T1_AssetType"]] + elif ha_name == "HA15": + property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip()) built_form = None elif ha_name == "HA16": config = PROPERTY_TYPE_LOOKUP[ha_name][property_meta["Type"]] property_type = config.get("property-type") built_form = config.get("built-form") - elif ha_name == "HA39": + elif ha_name == "HA18": + property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Asset Type"].strip()) + built_form = None + elif ha_name == "HA19": + property_type = property_meta["Dwelling Type"] + built_form = None + elif ha_name == "HA24": + property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip()) + built_form = None + elif ha_name == "HA25": + property_type = PROPERTY_TYPE_LOOKUP[ha_name][property_meta["T1_AssetType"]] + built_form = None + elif ha_name == "HA27": + property_type = property_meta["Property Type"] + built_form = None + elif ha_name == "HA28": + property_type = PROPERTY_TYPE_LOOKUP[ha_name][property_meta["Property Type - Academy"]] + built_form = None + elif ha_name == "HA30": + property_type = PROPERTY_TYPE_LOOKUP[ha_name][property_meta["A_AssetType"]] + built_form = None + elif ha_name == "HA31": + property_description = property_meta["A_AssetType"].strip().lower() + if "house" in property_description: + return "House", None + if "flat" in property_description: + return "Flat", None + + if "bungalow" in property_description: + return "Bungalow", None + + if "maisonette" in property_description: + return "Maisonette", None + + return None, None + + elif ha_name == "HA32": + property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Dwelling type"].strip()) + built_form = None + elif ha_name == "HA34": + property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip()) + built_form = None + elif ha_name == "HA35": + property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type Grouping"].strip()) + built_form = None + elif ha_name == "HA39": property_type_config = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["ConstructionStyle"], {}) property_type = property_type_config.get("property_type", None) built_form = property_type_config.get("built_form", None) @@ -2921,11 +3238,35 @@ def get_property_type_and_built_form(property_meta, ha_name): property_type = "Flat" else: property_type = "House" + elif ha_name == "HA41": + property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Archetype"].strip()) + built_form = None + elif ha_name == "HA48": + property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip()) + built_form = None + elif ha_name == "HA49": + property_type = property_meta["Property Class"].strip() + built_form = None + elif ha_name == "HA54": + property_type = property_meta["Property Type"] + built_form = None + elif ha_name == "HA56": + property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Dwelling Type Description"].strip()) + built_form = None + elif ha_name == "HA63": + property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["PropertyType"].strip()) + built_form = None elif ha_name == "HA107": - property_type = property_meta.get("property_type", None) built_form = property_meta.get("built_form", None) - + elif ha_name == "HA117": + property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Property Type"].strip()) + built_form = None + elif ha_name == "HAXX": + return property_meta["Property Type"].split(":")[0].strip(), None + elif ha_name == "HAXXX": + property_type = PROPERTY_TYPE_LOOKUP[ha_name].get(property_meta["Unit Description"].strip()) + built_form = None else: raise NotImplementedError("Implement me") @@ -5119,9 +5460,16 @@ def forecast_remaining_sales(loader): def fml_data_pull(loader): - has_bruh = ["HA7", "HA14", "HA25", "HA39", "HA16", - # Do these - "HA1", "HA13", "HA50", "HA24"] + has_bruh = [ + # "HA7", "HA14", "HA25", "HA39", "HA16", "HA28", + # Updated get_property_type_and_built_form, still needs running + "HA13", "HA50", "HA24", "HA15", "HA32", "HA28", "HA6", "HA1", "HA107", "HA41", "HA48", "HA2", "HA63", "HA12", + "HA117", "HA35", "HA34", "HA56", "HA19", "HA18", "HA9", "HA27", "HA30", "HA31", "HA54", "HA49", + # todo + ] + + # Can't pull from EPC database because it's based in Scotland + # "HAXXX", "HAXX" # DO from backend.SearchEpc import SearchEpc epc_api_key = "a2Nvbm5rb3dsZXNzYXJAZ21haWwuY29tOjY5MGJiMWM0NmIyOGI5ZDUxYzAxMzQzYzNiZGNlZGJjZDNmODQwMzA=" @@ -5134,14 +5482,24 @@ def fml_data_pull(loader): # For each property, search for the latest EPC epc_data = [] for _, row in tqdm(fml.iterrows(), total=fml.shape[0]): + property_type, _ = get_property_type_and_built_form(property_meta=row, ha_name=ha) + + if ha == "HAXXX": + to_join = [str(x) for x in + [row["Door Number"], row["Address Line 1"], row["Address Line 2"], row["Address Line 3"], + row["Postcode"]] if x is not None] + full_address = ", ".join(to_join) + else: + full_address = row["matching_address"] + searcher = SearchEpc( - address1=row["HouseNo"], + address1=str(row["HouseNo"]), postcode=row["matching_postcode"], auth_token=epc_api_key, os_api_key="", property_type=property_type, - full_address=row["matching_address"], + full_address=full_address, ) # Force the skipping of estimating the EPC searcher.ordnance_survey_client.property_type = None @@ -5194,7 +5552,7 @@ def classify_loft(x): def fml_analysis(loader): assumed_ciga_pass_rate = 0.731 - has_bruh = ["HA7", "HA14", "HA25", "HA39", "HA16"] + has_bruh = ["HA7", "HA14", "HA25", "HA39", "HA16", "HA1"] no_ciga_cavity_descriptions = [ "Cavity wall, as built, insulated (assumed)",