diff --git a/etl/customers/waltham_forest/decent_homes_pilot.py b/etl/customers/waltham_forest/decent_homes_pilot.py index b59168fb..ba9bb3b7 100644 --- a/etl/customers/waltham_forest/decent_homes_pilot.py +++ b/etl/customers/waltham_forest/decent_homes_pilot.py @@ -5,6 +5,8 @@ import pandas as pd from datetime import datetime +from docutils.nodes import table + def years_between(d1, d2): # precise year difference (accounts for months/days) @@ -16,13 +18,14 @@ def get_element(elements, label): return elements.get(label) -def append_result(decent_homes_meta, variable, result, install_date=None): +def append_result(decent_homes_meta, variable, result, install_date=None, expiry_date=None): decent_homes_meta.append({ "variable": variable, "result": result, "hhsrs_rank": None, "hhsrs_score": None, - "install_date": install_date + "install_date": install_date, + "expiry_date": expiry_date, }) @@ -165,11 +168,13 @@ B_COMPONENT_LABELS = { "Roof Structure 1 in External Area", "Roof Structure 2 in External Area", "Roof Structure 3 in External Area", - # If you later decide to include ancillary items, add: - # "Fascia / Soffit / Bargeboard in External Area", - # "Gutters in External Area", "Downpipes in External Area", - # "Internal Downpipes in External Area", - # and give them a clear condition rule. + "Garage Roof in External Area", + "Garage and Store Roofs in External Area", + "Store Roof in External Area", + "Fascia / Soffit / Bargeboard in External Area", + "Gutters in External Area", + "Downpipes in External Area", + "Internal Downpipes in External Area" ], "roof_finish": [ "Roof Covering 1 in External Area", @@ -198,20 +203,15 @@ B_COMPONENT_LABELS = { "Store Door in External Area", ], "central_heating_boiler": [ - # If the dataset exposes a specific boiler element, put it here. - # For now we only have "Heating Improvement Required in Property" elsewhere (Criterion D), - # which isn't reliable for age. If your JSON later includes a boiler line with INSTALL DATE, - # add its label here. + # TODO ], "heating_other": [ - # e.g., gas fires/storage heaters if present as discrete elements later. + # TODO ], "electrical_systems": [ - # If you have an installation line with dates (e.g. "Electrics Required in Property") - # add it here; we will rely on INSTALL DATE + REMAINING LIFE. + # TODO "Electrics Required in Property", ], - # Other components "kitchen": [ "Adequacy of Kitchen and Type in Property", @@ -287,9 +287,10 @@ COMPONENT_LIFESPANS = { } # Database design -# creation_date, uprn, variable, result, hhsrs_score (optional, numeric), hhsrs_rank (A-J), install_date (for -# components which expire, e.g. kitchen) +# creation_date, uprn, variable, result (pass/fail/nodata), hhsrs_score (optional, numeric), hhsrs_rank (A-J), +# install_date (for components which expire, e.g. kitchen), remaining_life (for components which expire, e.g. kitchen), +# TODO: Add the criterion decent_homes_meta = [] # Use to capture criterion A, B, C and D. Should be: # {"uprn": int, "creation_date": datetime, "criterion_a": bool, "criterion_b": bool, "criterion_c": bool, @@ -303,12 +304,15 @@ for fn in filenames: if property_info["PROP TYPE"] in ["HOU"]: property_type = "house" elif property_info["PROP TYPE"] == "FLA": - raise Exception("Implement distrinction between below and above 6 storeys") + raise NotImplementedError("Implement distrinction between below and above 6 storeys") # property_type = "flat" else: raise NotImplementedError("Unknown property type") # ---------------- Criterion A ---------------- + # TODO: Map out the sub-information + # Critrion A: pass/fail + # If fail, why? for hhsrs_variable, mapping in HHSRS_MAPPING.items(): element_code = list(mapping.keys())[0] @@ -347,19 +351,36 @@ for fn in filenames: component_pass_or_fail = [] # TODO: Delete me - component, labels = list(B_COMPONENT_LABELS.items())[1] + component, labels = list(B_COMPONENT_LABELS.items())[9] + label = labels[0] + # TODO: need to handle the case where there is no survey data at all for a component for component, labels in B_COMPONENT_LABELS.items(): # TODO: labels may not need to be multiple variables for label in labels: # Grab the label label_data = get_element(data["elements"], label) + if label_data["ATTRIBUTE CODE"] in ["UNKNOWN", "NONE", "UNKNOWNG", "UNKNOWNS"]: + # This isn't applicable + component_pass_or_fail.append( + { + "component": component, + "label": label, + "install_date": None, + "remaining_life": None, + "is_old": False, + "has_failed": False, + "result": "pass", + "appliable": False + } + ) + continue # 1) We check if the component is old install_date = pd.to_datetime(label_data["INSTALL DATE"]) if pd.isnull(install_date): raise ValueError("Missing install date - pls check") component_lifetime = COMPONENT_LIFESPANS[component][property_type] # This should be populated, and for the pilot it's okay if this errors if missing - we'll handle accordingly - is_old = years_between(today.to_pydatetime(), install_date.to_pydatetime()) >= component_lifetime + is_old = years_between(today.to_pydatetime(), install_date.to_pydatetime()) > component_lifetime # 2) We check if the component is in poor condition if pd.isnull(label_data["REMAINING LIFE"]): raise ValueError("Missing remaining life - pls check") @@ -369,15 +390,54 @@ for fn in filenames: component_pass_or_fail.append( { "component": component, + "component_type": "key" if component in KEY_COMPONENTS else "other", + "component_sub_description": label_data["ATTRIBUTE CODE DESCRIPTION"], "label": label, "install_date": str(install_date), "remaining_life": label_data["REMAINING LIFE"], "is_old": is_old, "has_failed": has_failed, - "result": component_result + "result": component_result, + "appliable": True } ) + # TODO: We need to check by component + # Example of a pass for a component + # [ + # {"component": "external_walls", "component_type": "key", "descr": "A", "result": "pass"}, + # {"component": "external_walls", "component_type": "key", "descr": "B", "result": "pass"}, + # {"component": "external_walls", "component_type": "key", "descr": "C", "result": "pass"}, + # ] + + # Example of a fail for a component + # [ + # {"component": "external_walls", "component_type": "key", "descr": "A", "result": "pass"}, + # {"component": "external_walls", "component_type": "key", "descr": "B", "result": "fail"}, + # {"component": "external_walls", "component_type": "key", "descr": "C", "result": "pass"}, + # ] + + # Example of a no data for a component + # [ + # {"component": "external_walls", "component_type": "key", "descr": "A", "result": "pass"}, + # {"component": "external_walls", "component_type": "key", "descr": "B", "result": "nodata", "appliable": True}, + # {"component": "external_walls", "component_type": "key", "descr": "C", "result": "pass"}, + # ] + # OR + # Everything is unknown + # [ + # {"component": "external_walls", "component_type": "key", "descr": "A", "result": "pass", "appliable": False}, + # {"component": "external_walls", "component_type": "key", "descr": "B", "result": "pass", "appliable": False}, + # {"component": "external_walls", "component_type": "key", "descr": "C", "result": "pass", "appliable": False}, + # ] + + # Component 1: pass/fail, key: true/False + # Component 2: pass/fail, key: true/False + # Component 3: pass/fail, key: true/False + # Component 4: pass/fail, key: true/False + # Component 4: pass/fail, key: true/False + # -> Decide on outcome. If failure of 1 key component -> fail criterion B, or 2 other components -> fail criterion B + # ---------------- Criterion C ---------------- today = pd.Timestamp.today().normalize() @@ -396,7 +456,8 @@ for fn in filenames: else: raise NotImplementedError("Kitchen data missing - pls check") append_result( - decent_homes_meta, "kitchen_less_than_20_years_old", kitchen_age_result, install_date=str(kit_install) + decent_homes_meta, "kitchen_less_than_20_years_old", kitchen_age_result, + install_date=str(kit_install), expiry_date=str(kit_next_due) ) # 2) Kitchen adequate space/layout @@ -533,6 +594,8 @@ for fn in filenames: else: criterion_a_result = "no_data" + # ---------------- Criterion B overall ---------------- + # ---------------- Criterion C overall ---------------- criterion_c_vars = [ "kitchen_less_than_20_years_old",