From 7c109ebf5d2ad3004e5c1615edb4337e99b45d51 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 12 Feb 2024 16:03:51 +0000 Subject: [PATCH] handling empty data cases in testing piipeline --- etl/testing_data/sap_model_simulation.py | 81 +++++++++++++----------- 1 file changed, 45 insertions(+), 36 deletions(-) diff --git a/etl/testing_data/sap_model_simulation.py b/etl/testing_data/sap_model_simulation.py index e8b97464..6ff89691 100644 --- a/etl/testing_data/sap_model_simulation.py +++ b/etl/testing_data/sap_model_simulation.py @@ -232,10 +232,13 @@ def app(): population["is_solid"] & (population["floor_insulation_thickness"] == "none") ] + solid_floor_uvalues = solid_floor_sample["floor_thermal_transmittance"].quantile([0.25, 0.5, 0.75]).values + solid_floor_uvalues = {v for v in solid_floor_uvalues if not pd.isnull(v)} + # We have many different values of u-value for solid floors, we we'll take a sample at the 25%, 50% and 75% # values # We must take a value that is in one of the unique values for floor_thermal_transmittance - for uvalue in solid_floor_sample["floor_thermal_transmittance"].quantile([0.25, 0.5, 0.75]).values: + for uvalue in solid_floor_uvalues: nearest_value = solid_floor_sample['floor_thermal_transmittance'].sub(uvalue).abs().idxmin() nearest_row = solid_floor_sample.loc[[nearest_value]].sample(1) @@ -258,8 +261,13 @@ def app(): population["is_suspended"] & (population["floor_insulation_thickness"] == "none") ] + suspended_floor_uvalues = suspended_floor_sample["floor_thermal_transmittance"].quantile( + [0.25, 0.5, 0.75] + ).values + suspended_floor_uvalues = {v for v in suspended_floor_uvalues if not pd.isnull(v)} + # We take the same approach as for solid floors - for uvalue in suspended_floor_sample["floor_thermal_transmittance"].quantile([0.25, 0.5, 0.75]).values: + for uvalue in suspended_floor_uvalues: nearest_value = suspended_floor_sample['floor_thermal_transmittance'].sub(uvalue).abs().idxmin() nearest_row = suspended_floor_sample.loc[[nearest_value]].sample(1) @@ -282,14 +290,7 @@ def app(): (population["glazing_type"] == "single") ] - # We take multiple values for multi_glaze_proportion_starting. We definitely need zero, but then we also - # take the 25%, 50% and 75% values - multi_glaze_values = [0] + list( - single_glazing_sample["multi_glaze_proportion_starting"].quantile([0.25, 0.5, 0.75]).values - ) - multi_glaze_values = set(multi_glaze_values) - - for value in multi_glaze_values: + if not single_glazing_sample.empty: row = single_glazing_sample[ single_glazing_sample["multi_glaze_proportion_starting"] == value ].sample(1) @@ -409,40 +410,48 @@ def app(): # We only recommend solar for properties that have flat or pitched roofs, and no existing solar pitched_roof_no_solar = population[ (population["is_pitched"]) & (population["photo_supply_starting"] == 0) - ].sample(1) + ] + + if not pitched_roof_no_solar.empty: + pitched_roof_no_solar = pitched_roof_no_solar.sample(1) flat_roof_no_solar = population[ (population["is_flat"]) & (population["photo_supply_starting"] == 0) - ].sample(1) + ] + + if not flat_roof_no_solar.empty: + flat_roof_no_solar = flat_roof_no_solar.sample(1) # We simulate 30%, 40% and 50% coverage for coverage in [30, 40, 50]: - solar_simulation_pitched = Property.create_recommendation_scoring_data( - property_id=pitched_roof_no_solar["uprn"].values[0], - recommendation_record=pitched_roof_no_solar.copy().to_dict("records")[0], - recommendation={ - "recommendation_id": "solar_pv", - "type": "solar_pv", - "new_u_value": None, # This doesn't matter at the moment - "parts": [], - "photo_supply": coverage - } - ) - solar_simulation_flat = Property.create_recommendation_scoring_data( - property_id=flat_roof_no_solar["uprn"].values[0], - recommendation_record=flat_roof_no_solar.copy().to_dict("records")[0], - recommendation={ - "recommendation_id": "solar_pv", - "type": "solar_pv", - "new_u_value": None, # This doesn't matter at the moment - "parts": [], - "photo_supply": coverage - } - ) + if not pitched_roof_no_solar.empty: + solar_simulation_pitched = Property.create_recommendation_scoring_data( + property_id=pitched_roof_no_solar["uprn"].values[0], + recommendation_record=pitched_roof_no_solar.copy().to_dict("records")[0], + recommendation={ + "recommendation_id": "solar_pv", + "type": "solar_pv", + "new_u_value": None, # This doesn't matter at the moment + "parts": [], + "photo_supply": coverage + } + ) + pitched_roof_solar.append(solar_simulation_pitched) - pitched_roof_solar.append(solar_simulation_pitched) - flat_roof_solar.append(solar_simulation_flat) + if not flat_roof_no_solar.empty: + solar_simulation_flat = Property.create_recommendation_scoring_data( + property_id=flat_roof_no_solar["uprn"].values[0], + recommendation_record=flat_roof_no_solar.copy().to_dict("records")[0], + recommendation={ + "recommendation_id": "solar_pv", + "type": "solar_pv", + "new_u_value": None, # This doesn't matter at the moment + "parts": [], + "photo_supply": coverage + } + ) + flat_roof_solar.append(solar_simulation_flat) # We store all of this data in s3, as it is save_data_to_s3(