From 552047f85c6d58b5dfb973e5efe9c7281f2d76ce Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 22 Jan 2026 22:56:58 +0000 Subject: [PATCH] Fixing errored case for calico run with missing data in floors --- asset_list/app.py | 23 ++++++------- backend/Property.py | 32 +++++++------------ backend/apis/GoogleSolarApi.py | 12 ++++--- .../epc_attributes/FloorAttributes.py | 4 +-- .../epc_attributes/WallAttributes.py | 3 +- etl/epc_clean/tests/test_floor_attributes.py | 18 ++++------- .../tests/test_hotwater_attributes.py | 7 ++++ .../tests/test_lighting_attributes.py | 4 +++ .../tests/test_mainfuel_attributes.py | 6 ++++ .../tests/test_mainheat_attributes.py | 19 +++++++++++ .../test_mainheat_controls_attributes.py | 9 ++++++ etl/epc_clean/tests/test_roof_attributes.py | 15 ++++++--- etl/epc_clean/tests/test_wall_attributes.py | 9 ++++++ etl/epc_clean/tests/test_window_attributes.py | 11 ++++--- recommendations/FloorRecommendations.py | 3 ++ 15 files changed, 116 insertions(+), 59 deletions(-) diff --git a/asset_list/app.py b/asset_list/app.py index c58eccd7..21a06a07 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -59,19 +59,19 @@ def app(): Property UPRN """ - data_folder = ("/Users/khalimconn-kowlessar/Documents/hestia/Warmfront/SCIS") - data_filename = "SCIS_Historic_Deemed_Combined_Workings.xlsx" - sheet_name = "SCIS" - postcode_column = 'POSTCODE' - address1_column = "NO" + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Hackney" + data_filename = "Domna SHF Wave 3.xlsx" + sheet_name = "Domna Wave 3" + postcode_column = 'Postcode' + address1_column = "Address 1" address1_method = None fulladdress_column = None - address_cols_to_concat = ["NO", "Street / Block Name", "Town/Area"] + address_cols_to_concat = ["Address 1"] missing_postcodes_method = None landlord_year_built = None - landlord_os_uprn = None - landlord_property_type = "PROPERTY TYPE As per table emailed" - landlord_built_form = "PROPERTY TYPE As per table emailed" + landlord_os_uprn = "UPRN" + landlord_property_type = None + landlord_built_form = None landlord_wall_construction = None landlord_roof_construction = None landlord_heating_system = None @@ -492,5 +492,6 @@ def app(): asset_list.geographical_areas.to_excel(writer, sheet_name="Geographical Areas", index=False) # Store dupes - if not asset_list.duplicated_addresses.empty: - asset_list.duplicated_addresses.to_excel(writer, sheet_name="Duplicate Properties", index=False) + if asset_list.duplicated_addresses is not None: + if not asset_list.duplicated_addresses.empty: + asset_list.duplicated_addresses.to_excel(writer, sheet_name="Duplicate Properties", index=False) diff --git a/backend/Property.py b/backend/Property.py index 88bee6dc..fa607cfd 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -607,26 +607,19 @@ class Property: for description, attribute in cleaned.items(): - if self.data[description] in self.DATA_ANOMALY_MATCHES: - template = cleaned[description][0] - # Handling edge case for walls - fill_with = False if description == "walls-description" else None - fill_dict = dict(zip(template.keys(), [fill_with] * len(template))) - if description == "walls-description": - fill_dict["thermal_transmittance_unit"] = None - fill_dict["insulation_thickness"] = "none" + cleaner_cls = all_cleaner_map[description] - fill_dict.update( - { - "original_description": self.data[description], - "clean_description": self.data[description], - } - ) - setattr( - self, - self.ATTRIBUTE_MAP[description], - fill_dict, - ) + if self.data[description] in self.DATA_ANOMALY_MATCHES: + if description == "lighting-description": + cleaner_cls = cleaner_cls("", averages=None) + else: + cleaner_cls = cleaner_cls("") + fill_dict = { + "original_description": self.data[description], + "clean_description": self.data[description], + **cleaner_cls.process() + } + setattr(self, self.ATTRIBUTE_MAP[description], fill_dict) continue attributes = [ @@ -642,7 +635,6 @@ class Property: if len(attributes) == 0: # We attempt to perform the clean on the fly - cleaner_cls = all_cleaner_map[description] if description == "lighting-description": cleaner_cls = cleaner_cls(self.data[description], averages=None) else: diff --git a/backend/apis/GoogleSolarApi.py b/backend/apis/GoogleSolarApi.py index f7aa311f..bf07b5e5 100644 --- a/backend/apis/GoogleSolarApi.py +++ b/backend/apis/GoogleSolarApi.py @@ -404,9 +404,10 @@ class GoogleSolarApi: panel_performance["initial_ac_kwh_per_year"] = panel_performance["yearly_dc_energy"] * self.dc_to_ac_rate # Remove anything where the total ac energy is less than half of the array wattage - panel_performance = panel_performance[ - (panel_performance["initial_ac_kwh_per_year"] / panel_performance["array_wattage"]) >= 0.5 - ] + # But - only where this is possible + wattage_filter = (panel_performance["initial_ac_kwh_per_year"] / panel_performance["array_wattage"]) >= 0.5 + if any(wattage_filter): + panel_performance = panel_performance[wattage_filter] # 2) Calculate the liftime solar energy production panel_performance['lifetime_ac_kwh'] = panel_performance.apply( @@ -477,7 +478,10 @@ class GoogleSolarApi: } ) - roi_results = pd.DataFrame(roi_results) + roi_results = pd.DataFrame( + roi_results, + columns=["n_panels", "roi", "generation_value", "generation_deficit", "expected_payback_years", "surplus"] + ) panel_performance = panel_performance.merge(roi_results, how="left", on="n_panels") diff --git a/etl/epc_clean/epc_attributes/FloorAttributes.py b/etl/epc_clean/epc_attributes/FloorAttributes.py index cd1499c2..b0d2b361 100644 --- a/etl/epc_clean/epc_attributes/FloorAttributes.py +++ b/etl/epc_clean/epc_attributes/FloorAttributes.py @@ -113,8 +113,8 @@ class FloorAttributes(Definitions): if self.nodata: return { - 'thermal_transmittance': None, 'thermal_transmittance_unit': None, 'is_assumed': True, - 'is_to_unheated_space': False, 'is_to_external_air': False, 'is_suspended': True, 'is_solid': False, + 'thermal_transmittance': None, 'thermal_transmittance_unit': None, 'is_assumed': False, + 'is_to_unheated_space': False, 'is_to_external_air': False, 'is_suspended': False, 'is_solid': False, 'another_property_below': False, 'insulation_thickness': 'none', 'no_data': True } diff --git a/etl/epc_clean/epc_attributes/WallAttributes.py b/etl/epc_clean/epc_attributes/WallAttributes.py index 075dee96..3d92e7b3 100644 --- a/etl/epc_clean/epc_attributes/WallAttributes.py +++ b/etl/epc_clean/epc_attributes/WallAttributes.py @@ -147,9 +147,10 @@ class WallAttributes(Definitions): if self.nodata: for key in self.DEFAULT_KEYS: result[key] = False - + result["thermal_transmittance"] = None result["thermal_transmittance_unit"] = None result["insulation_thickness"] = "none" + result["is_park_home"] = False return result diff --git a/etl/epc_clean/tests/test_floor_attributes.py b/etl/epc_clean/tests/test_floor_attributes.py index 887cb689..a1f021e3 100644 --- a/etl/epc_clean/tests/test_floor_attributes.py +++ b/etl/epc_clean/tests/test_floor_attributes.py @@ -11,17 +11,6 @@ class TestCleanFloor: floor_attr = FloorAttributes(valid_description) assert floor_attr.description == valid_description.lower() - # Test initialization with an empty description - empty = FloorAttributes('') - assert empty.nodata - output = empty.process() - assert output == { - 'thermal_transmittance': None, 'thermal_transmittance_unit': None, 'is_assumed': True, - 'is_to_unheated_space': False, 'is_to_external_air': False, 'is_suspended': True, - 'is_solid': False, 'another_property_below': False, 'insulation_thickness': 'none', - 'no_data': True - } - # Test initialization with a description that contains none of the keywords with pytest.raises(ValueError): FloorAttributes('description without keywords') @@ -37,6 +26,13 @@ class TestCleanFloor: # Ensure the output ordering is correct assert sorted(result.items()) == sorted(expected_result.items()) + def test_empty_str_description(self): + assert FloorAttributes("").process() == { + 'thermal_transmittance': None, 'thermal_transmittance_unit': None, 'is_assumed': False, + 'is_to_unheated_space': False, 'is_to_external_air': False, 'is_suspended': False, 'is_solid': False, + 'another_property_below': False, 'insulation_thickness': 'none', 'no_data': True + } + def test_invalid_description(self): # Test that invalid descriptions raise a ValueError invalid_descriptions = [ diff --git a/etl/epc_clean/tests/test_hotwater_attributes.py b/etl/epc_clean/tests/test_hotwater_attributes.py index 2809b805..ab0f6409 100644 --- a/etl/epc_clean/tests/test_hotwater_attributes.py +++ b/etl/epc_clean/tests/test_hotwater_attributes.py @@ -15,6 +15,13 @@ class TestHotWaterAttributes: with pytest.raises(ValueError): HotWaterAttributes('description without keywords') + def test_empty_str_input(self): + assert HotWaterAttributes("").process() == { + 'heater_type': None, 'system_type': None, 'thermostat_characteristics': None, 'heating_scope': None, + 'energy_recovery': None, 'tariff_type': None, 'extra_features': None, 'chp_systems': None, + 'distribution_system': None, 'no_system_present': None, 'assumed': None, 'appliance': None + } + @pytest.mark.parametrize( "test_case", hotwater_cases diff --git a/etl/epc_clean/tests/test_lighting_attributes.py b/etl/epc_clean/tests/test_lighting_attributes.py index f3c23e8f..e6171268 100644 --- a/etl/epc_clean/tests/test_lighting_attributes.py +++ b/etl/epc_clean/tests/test_lighting_attributes.py @@ -13,6 +13,10 @@ averages = [ class TestLightingAttributes: + + def test_empty_str(self): + assert LightingAttributes("", averages).process() == {'low_energy_proportion': None} + def test_no_lighting(self): lighting = LightingAttributes("no low energy lighting", averages) result = lighting.process() diff --git a/etl/epc_clean/tests/test_mainfuel_attributes.py b/etl/epc_clean/tests/test_mainfuel_attributes.py index bface6e2..ed60b24d 100644 --- a/etl/epc_clean/tests/test_mainfuel_attributes.py +++ b/etl/epc_clean/tests/test_mainfuel_attributes.py @@ -15,6 +15,12 @@ class TestMainHeatControlAttributes: with pytest.raises(ValueError): MainFuelAttributes('description without keywords') + def test_empty_str(self): + assert MainFuelAttributes("").process() == { + 'fuel_type': 'unknown', 'tariff_type': None, 'is_community': False, + 'no_individual_heating_or_community_network': False, 'complex_fuel_type': None + } + @pytest.mark.parametrize( "test_case", mainfuel_cases diff --git a/etl/epc_clean/tests/test_mainheat_attributes.py b/etl/epc_clean/tests/test_mainheat_attributes.py index d79c271a..5813c1cf 100644 --- a/etl/epc_clean/tests/test_mainheat_attributes.py +++ b/etl/epc_clean/tests/test_mainheat_attributes.py @@ -15,6 +15,25 @@ class TestMainHeatAttributes: with pytest.raises(ValueError): MainHeatAttributes('description without keywords') + def test_empty_str(self): + assert MainHeatAttributes("").process() == { + 'has_radiators': False, 'has_fan_coil_units': False, 'has_pipes_in_screed_above_insulation': False, + 'has_pipes_in_insulated_timber_floor': False, 'has_pipes_in_concrete_slab': False, 'has_boiler': False, + 'has_air_source_heat_pump': False, 'has_room_heaters': False, 'has_electric_storage_heaters': False, + 'has_warm_air': False, 'has_electric_underfloor_heating': False, 'has_electric_ceiling_heating': False, + 'has_community_scheme': False, 'has_ground_source_heat_pump': False, 'has_no_system_present': False, + 'has_portable_electric_heaters': False, 'has_water_source_heat_pump': False, + 'has_electric_heat_pump': False, 'has_micro-cogeneration': False, 'has_solar_assisted_heat_pump': False, + 'has_exhaust_source_heat_pump': False, 'has_community_heat_pump': False, 'has_hot-water-only': False, + 'has_electric': False, 'has_mains_gas': False, 'has_wood_logs': False, 'has_coal': False, 'has_oil': False, + 'has_wood_pellets': False, 'has_anthracite': False, 'has_dual_fuel_mineral_and_wood': False, + 'has_smokeless_fuel': False, 'has_lpg': False, 'has_b30k': False, 'has_mineral_and_wood': False, + 'has_dual_fuel_appliance': False, 'has_wood_chips': False, 'has_assumed': False, 'has_electricaire': False, + 'has_assumed_for_most_rooms': False, 'has_underfloor_heating': False + } + + assert set(list(MainHeatAttributes("").process().values())) == {False} + @pytest.mark.parametrize( "test_case", mainheat_cases diff --git a/etl/epc_clean/tests/test_mainheat_controls_attributes.py b/etl/epc_clean/tests/test_mainheat_controls_attributes.py index 7b114107..8826546b 100644 --- a/etl/epc_clean/tests/test_mainheat_controls_attributes.py +++ b/etl/epc_clean/tests/test_mainheat_controls_attributes.py @@ -15,6 +15,15 @@ class TestMainHeatControlAttributes: with pytest.raises(ValueError): MainheatControlAttributes('description without keywords') + def test_empty_str(self): + assert MainheatControlAttributes("").process() == { + 'thermostatic_control': False, 'charging_system': False, 'switch_system': False, 'no_control': False, + 'dhw_control': False, 'community_heating': False, 'multiple_room_thermostats': False, + 'auxiliary_systems': False, 'trvs': False, 'rate_control': False + } + + assert set(list(MainheatControlAttributes("").process().values())) == {False} + @pytest.mark.parametrize( "test_case", mainheat_control_cases diff --git a/etl/epc_clean/tests/test_roof_attributes.py b/etl/epc_clean/tests/test_roof_attributes.py index 481beedc..33c6a829 100644 --- a/etl/epc_clean/tests/test_roof_attributes.py +++ b/etl/epc_clean/tests/test_roof_attributes.py @@ -1,10 +1,10 @@ import pytest -from pathlib import Path from etl.epc_clean.tests.test_data.test_roof_attributes_cases import clean_roof_test_cases from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes # For local testing +# from pathlib import Path # if __file__ == "": # input_data_path = Path("./model_data/tests/test_data/EpcClean_inputs.obj") # else: @@ -20,13 +20,18 @@ class TestRoofAttributes: floor_attr = RoofAttributes(valid_description) assert floor_attr.description == valid_description.lower() - # Test initialization with an empty description - ra = RoofAttributes('') - assert ra.nodata - with pytest.raises(ValueError): RoofAttributes('description without keywords') + def test_empty_str(self): + # Test initialization with an empty description + assert RoofAttributes('').process() == { + 'thermal_transmittance': False, 'thermal_transmittance_unit': False, 'is_pitched': False, + 'is_roof_room': False, 'is_loft': False, 'is_flat': False, 'is_thatched': False, 'is_at_rafters': False, + 'is_assumed': False, 'has_dwelling_above': False, 'is_valid': False, 'insulation_thickness': False + } + assert set(list(RoofAttributes('').process().values())) == {False} + def test_clean_roof(self): result = RoofAttributes('Pitched, 270 mm loft insulation').process() diff --git a/etl/epc_clean/tests/test_wall_attributes.py b/etl/epc_clean/tests/test_wall_attributes.py index 970dbd98..67e87bf5 100644 --- a/etl/epc_clean/tests/test_wall_attributes.py +++ b/etl/epc_clean/tests/test_wall_attributes.py @@ -56,3 +56,12 @@ class TestWallAttributes: raise Exception("Something went wong") # Ensure the output ordering is correct assert sorted(result.items()) == sorted(expected_result.items()) + + def test_empty_str(self): + assert WallAttributes("").process() == { + 'thermal_transmittance': None, 'thermal_transmittance_unit': None, 'is_cavity_wall': False, + 'is_filled_cavity': False, 'is_solid_brick': False, 'is_system_built': False, 'is_timber_frame': False, + 'is_granite_or_whinstone': False, 'is_as_built': False, 'is_cob': False, 'is_assumed': False, + 'is_sandstone_or_limestone': False, 'insulation_thickness': 'none', 'external_insulation': False, + 'internal_insulation': False, "is_park_home": False + } diff --git a/etl/epc_clean/tests/test_window_attributes.py b/etl/epc_clean/tests/test_window_attributes.py index 46ebde45..baa421d1 100644 --- a/etl/epc_clean/tests/test_window_attributes.py +++ b/etl/epc_clean/tests/test_window_attributes.py @@ -11,15 +11,16 @@ class TestWindowAttributes: window_attr = WindowAttributes(valid_description) assert window_attr.description == valid_description.lower() - # Test initialization with an empty description - empty_description = '' - window_attr_empty = WindowAttributes(empty_description) - assert window_attr_empty.nodata - # Test initialization with a description that contains none of the keywords with pytest.raises(ValueError): WindowAttributes('description without keywords') + def test_empty_str(self): + # Test initialization with an empty description + assert WindowAttributes("").process() == { + 'has_glazing': False, 'glazing_coverage': None, 'glazing_type': None, 'no_data': True + } + @pytest.mark.parametrize( "case", windows_cases diff --git a/recommendations/FloorRecommendations.py b/recommendations/FloorRecommendations.py index 2610c842..7469031c 100644 --- a/recommendations/FloorRecommendations.py +++ b/recommendations/FloorRecommendations.py @@ -72,6 +72,9 @@ class FloorRecommendations(Definitions): if not measures or not any(x in measures for x in MEASURE_MAP["floor_insulation"]): return + if self.property.floor.get("no_data", False): + return + u_value = self.property.floor["thermal_transmittance"] property_type = self.property.data["property-type"] floor_area = self.property.insulation_floor_area