From 3ef346b248ed89e04a08d07a0231db987809521b Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 27 Feb 2024 13:12:54 +0000 Subject: [PATCH] patching roof description in cleaned further --- .../ha_15_32/ha_analysis_batch_3.py | 60 ++++++++++++++++++- etl/epc/Dataset.py | 28 +++++++++ 2 files changed, 87 insertions(+), 1 deletion(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 3dc4d45f..e261710e 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -1755,7 +1755,16 @@ def patch_cleaned(cleaned): ] ) - # We treat unknown loft insulation as no insulation + cleaned["roof-description"].extend( + [ + {'original_description': 'Pitched, Unknown loft insulation', 'clean_description': 'Pitched, no insulation', + 'thermal_transmittance': None, 'thermal_transmittance_unit': None, 'is_pitched': True, + 'is_roof_room': False, + 'is_loft': False, 'is_flat': False, 'is_thatched': False, 'is_at_rafters': False, 'is_assumed': True, + 'has_dwelling_above': False, 'is_valid': True, 'insulation_thickness': 'none'} + ] + ) + cleaned["roof-description"].extend( [ {'original_description': 'Pitched, Unknown loft insulation', 'clean_description': 'Pitched, no insulation', @@ -1777,6 +1786,55 @@ def patch_cleaned(cleaned): ] ) + thermal_transmittance_values = list(np.arange(0, 2, 0.01)) + for ttv in thermal_transmittance_values: + ttv_roundeded = round(ttv, 2) + # We look for an instance of that thermal transmittance value + rec = [ + x for x in cleaned["roof-description"] if + (x["thermal_transmittance"] == ttv_roundeded) and "Average thermal transmittance" in x["clean_description"] + ] + + if rec: + continue + else: + # We patch the record + cleaned["roof-description"].extend( + [{'original_description': f'Average thermal transmittance {ttv_roundeded} W/m-¦K', + 'clean_description': f'Average thermal transmittance {ttv_roundeded} w/m-¦k', + 'thermal_transmittance': ttv_roundeded, + 'thermal_transmittance_unit': 'w/m-¦k', 'is_pitched': False, 'is_roof_room': False, 'is_loft': False, + 'is_flat': False, 'is_thatched': False, 'is_at_rafters': False, 'is_assumed': False, + 'has_dwelling_above': False, 'is_valid': True, 'insulation_thickness': None}] + ) + + # We also patch a funny unit value we found + for ttv in thermal_transmittance_values: + ttv_rounded = round(ttv, 2) + # We look for an instance of that thermal transmittance value + rec = [ + x for x in cleaned["roof-description"] if + (x["thermal_transmittance"] == ttv_rounded) and "Average thermal transmittance" in x["clean_description"] + and x["thermal_transmittance_unit"] == "w/m?K" + ] + + if rec: + continue + else: + # We patch the record + ttv_string = str(ttv_rounded) + if len(ttv_string) == 3: + ttv_string = f"{ttv_string}0" + + cleaned["roof-description"].extend( + [{'original_description': f'Average thermal transmittance {ttv_string} W/m?K', + 'clean_description': f'Average thermal transmittance {ttv_string} w/m-¦k', + 'thermal_transmittance': ttv_rounded, + 'thermal_transmittance_unit': 'w/m-¦k', 'is_pitched': False, 'is_roof_room': False, 'is_loft': False, + 'is_flat': False, 'is_thatched': False, 'is_at_rafters': False, 'is_assumed': False, + 'has_dwelling_above': False, 'is_valid': True, 'insulation_thickness': None}] + ) + # Patch mainheatcont-description cleaned["mainheatcont-description"].extend( [ diff --git a/etl/epc/Dataset.py b/etl/epc/Dataset.py index 7040d66c..cf241747 100644 --- a/etl/epc/Dataset.py +++ b/etl/epc/Dataset.py @@ -658,6 +658,34 @@ class TrainingDataset(BaseDataset): components_to_expand = cols_to_drop.keys() + for comp in list(components_to_expand): + if comp == "main-fuel": + cleaned_key = "main-fuel" + left_on_starting = "main_fuel_starting" + left_on_ending = "main_fuel_ending" + original_cols = ["main_fuel_starting", "main_fuel_ending"] + else: + cleaned_key = f"{comp}-description" + left_on_starting = f"{comp}_description_starting" + left_on_ending = f"{comp}_description_ending" + original_cols = [ + f"{comp}_description_starting", + f"{comp}_description_ending", + ] + df = pd.DataFrame(cleaned_lookup[cleaned_key]) + # Check for the existence + filtered_1 = df[df["original_description"] == self.df[left_on_starting].values[0]] + filtered_2 = df[df["original_description"] == self.df[left_on_ending].values[0]] + if filtered_1.empty: + print(comp) + print(self.df[left_on_starting].values[0]) + + if filtered_2.empty: + print(f"Original description {self.df[left_on_ending].values[0]} not found in lookup") + + z = pd.DataFrame(cleaned_lookup["roof-description"]) + z[z["original_description"] == "Average thermal transmittance 0.20 W/m?K"] + for component in components_to_expand: # TODO: change cleaned dataframe to have underscores instead of dashes if component == "main-fuel":