From 612922df6a8a1095dae0c6440f288f2c7572d55e Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 22 Dec 2023 16:17:53 +0000 Subject: [PATCH] handling case of missing built form --- etl/eligibility/ha_15_32/ha4_app.py | 45 +++++++++++++------------ etl/epc/DataProcessor.py | 4 +++ recommendations/recommendation_utils.py | 2 +- 3 files changed, 29 insertions(+), 22 deletions(-) diff --git a/etl/eligibility/ha_15_32/ha4_app.py b/etl/eligibility/ha_15_32/ha4_app.py index 4e87b5a6..1d924347 100644 --- a/etl/eligibility/ha_15_32/ha4_app.py +++ b/etl/eligibility/ha_15_32/ha4_app.py @@ -40,7 +40,7 @@ def standardise_ha_4(data): data["Location Name"] = data["Location Name"].str.strip() # Remove any unusable postcodes - data = data[data["Post Code"] != '\\\\'] + data = data[data["Post Code"] != '\\\\'].copy() # Some specific replacements data["Location Name"] = np.where( @@ -75,7 +75,8 @@ def get_ha_4_data(data, cleaned, cleaning_data, created_at): searcher.search() if searcher.data is None: - vlsh + nodata.append(property_meta.to_dict()) + continue epcs = searcher.data["rows"] epcs = pd.DataFrame(epcs) @@ -117,25 +118,27 @@ def get_ha_4_data(data, cleaned, cleaning_data, created_at): ) scoring_data.extend(scoring_dictionary) - results.append( - { - "row_id": property_meta["row_id"], - "gbis_eligible": eligibility.gbis_warmfront, - "eco4_eligible": eligibility.eco4_warmfront["eligible"], - "eco4_message": eligibility.eco4_warmfront["message"], - "sap": float(eligibility.epc["current-energy-efficiency"]), - "gbis_eligible_future": eligibility.gbis["eligible"], - "gbis_eligible_future_message": eligibility.gbis["message"], - "eco4_eligible_future": eligibility.eco4["eligible"], - "eco4_eligible_future_message": eligibility.eco4["message"], - # Property components - "roof": eligibility.roof["clean_description"], - "walls": eligibility.walls["clean_description"], - "heating": eligibility.epc["mainheat-description"], - "tenure": eligibility.tenure, - "date_epc": eligibility.epc["lodgement-date"], - } - ) + results.append( + { + "row_id": property_meta["row_id"], + "Location Name": property_meta["Location Name"], + "Post Code": property_meta["Post Code"], + "gbis_eligible": eligibility.gbis_warmfront, + "eco4_eligible": eligibility.eco4_warmfront["eligible"], + "eco4_message": eligibility.eco4_warmfront["message"], + "sap": float(eligibility.epc["current-energy-efficiency"]), + "gbis_eligible_future": eligibility.gbis["eligible"], + "gbis_eligible_future_message": eligibility.gbis["message"], + "eco4_eligible_future": eligibility.eco4["eligible"], + "eco4_eligible_future_message": eligibility.eco4["message"], + # Property components + "roof": eligibility.roof["clean_description"], + "walls": eligibility.walls["clean_description"], + "heating": eligibility.epc["mainheat-description"], + "tenure": eligibility.tenure, + "date_epc": eligibility.epc["lodgement-date"], + } + ) def app(): diff --git a/etl/epc/DataProcessor.py b/etl/epc/DataProcessor.py index 0587fdbe..2bc73765 100644 --- a/etl/epc/DataProcessor.py +++ b/etl/epc/DataProcessor.py @@ -492,12 +492,16 @@ class DataProcessor: how='left' ) + global_averages = cleaning_data[cols_to_clean].mean() + # Fill NaN values with averages for col in cols_to_clean: data_to_clean[col].fillna(data_to_clean[f"{col}_AVERAGE"], inplace=True) data_to_clean.drop(columns=[f"{col}_AVERAGE"], inplace=True) # If we still have missings data_to_clean[col].fillna(data_to_clean[col].mean(), inplace=True) + # Final step if we still have missings - use global mean + data_to_clean[col].fillna(global_averages[col], inplace=True) return data_to_clean diff --git a/recommendations/recommendation_utils.py b/recommendations/recommendation_utils.py index 7cfe023e..175eb641 100644 --- a/recommendations/recommendation_utils.py +++ b/recommendations/recommendation_utils.py @@ -548,7 +548,7 @@ def estimate_external_wall_area(num_floors, floor_height, perimeter, built_form) 'Detached': 4, } - exposed_wall_area = total_wall_area * (number_exposed_walls[built_form] / 4) + exposed_wall_area = total_wall_area * (number_exposed_walls.get(built_form, 3) / 4) return exposed_wall_area