diff --git a/etl/testing_data/sap_model_simulation.py b/etl/testing_data/sap_model_simulation.py index 31cc21e7..e8b97464 100644 --- a/etl/testing_data/sap_model_simulation.py +++ b/etl/testing_data/sap_model_simulation.py @@ -1,12 +1,15 @@ +import json + import pandas as pd -from utils.s3 import read_dataframe_from_s3_parquet +from tqdm import tqdm +from utils.s3 import read_dataframe_from_s3_parquet, save_data_to_s3 from backend.Property import Property def app(): dataset = read_dataframe_from_s3_parquet( - bucket_name="retrofit-datalake-dev", - file_key="dataset_with0perm_all.parquet" + bucket_name="retrofit-data-dev", + file_key="sap_change_model/dataset.parquet" ) thresholds = dataset["total_floor_area_starting"].quantile( @@ -81,7 +84,9 @@ def app(): single_glazed_testing_data = [] partial_double_glazed_testing_data = [] partial_secondary_glazed_testing_data = [] - for property_config in property_types.itertuples(): + pitched_roof_solar = [] + flat_roof_solar = [] + for property_config in tqdm(property_types.itertuples(), total=property_types.shape[0]): # Take a sample row population = dataset[ (dataset["property_type"] == property_config.property_type) & @@ -101,13 +106,18 @@ def app(): row = population[ (population["roof_insulation_thickness"] == "none") & (population["is_pitched"]) - ].sample(1) + ] else: row = population[ (population["roof_insulation_thickness"] == insulation_thickness) & (population["is_pitched"]) - ].sample(1) + ] + + if row.empty: + continue + + row = row.sample(1) loft_insulation_270mm_simulation = Property.create_recommendation_scoring_data( property_id=row["uprn"].values[0], @@ -164,8 +174,8 @@ def app(): # Simulated IWI internal_wall_insulation_simulation = Property.create_recommendation_scoring_data( - property_id=row["uprn"].values[0].to_dict("records")[0], - recommendation_record=row.copy(), + property_id=row["uprn"].values[0], + recommendation_record=row.copy().to_dict("records")[0], recommendation={ "recommendation_id": "internal_wall_insulation", "type": "internal_wall_insulation", @@ -333,9 +343,14 @@ def app(): ) ] - for value in partial_double_glazing_sample["multi_glaze_proportion_starting"].quantile( + partial_double_glazed_values = partial_double_glazing_sample["multi_glaze_proportion_starting"].quantile( [0.25, 0.5, 0.75] - ).values: + ).values + # Take non-null values + partial_double_glazed_values = [v for v in partial_double_glazed_values if not pd.isnull(v)] + partial_double_glazed_values = set(partial_double_glazed_values) + + for value in partial_double_glazed_values: nearest_value = partial_double_glazing_sample['multi_glaze_proportion_starting'].sub(value).abs().idxmin() nearest_row = partial_double_glazing_sample.loc[[nearest_value]].sample(1) # If we start with partial double glazing, we recommend completing the job @@ -361,9 +376,14 @@ def app(): ) ] - for value in partial_secondary_glazing_sample["multi_glaze_proportion_starting"].quantile( + partial_secondary_glazed_values = partial_secondary_glazing_sample["multi_glaze_proportion_starting"].quantile( [0.25, 0.5, 0.75] - ).values: + ).values + # Take non-null values + partial_secondary_glazed_values = [v for v in partial_secondary_glazed_values if not pd.isnull(v)] + partial_secondary_glazed_values = set(partial_secondary_glazed_values) + + for value in partial_secondary_glazed_values: nearest_value = partial_secondary_glazing_sample['multi_glaze_proportion_starting'].sub( value).abs().idxmin() nearest_row = partial_secondary_glazing_sample.loc[[nearest_value]].sample(1) @@ -383,3 +403,63 @@ def app(): ) partial_secondary_glazed_testing_data.append(secondary_glazing_simulation) + + # 9) Solar PV + + # We only recommend solar for properties that have flat or pitched roofs, and no existing solar + pitched_roof_no_solar = population[ + (population["is_pitched"]) & (population["photo_supply_starting"] == 0) + ].sample(1) + + flat_roof_no_solar = population[ + (population["is_flat"]) & (population["photo_supply_starting"] == 0) + ].sample(1) + + # We simulate 30%, 40% and 50% coverage + for coverage in [30, 40, 50]: + solar_simulation_pitched = Property.create_recommendation_scoring_data( + property_id=pitched_roof_no_solar["uprn"].values[0], + recommendation_record=pitched_roof_no_solar.copy().to_dict("records")[0], + recommendation={ + "recommendation_id": "solar_pv", + "type": "solar_pv", + "new_u_value": None, # This doesn't matter at the moment + "parts": [], + "photo_supply": coverage + } + ) + + solar_simulation_flat = Property.create_recommendation_scoring_data( + property_id=flat_roof_no_solar["uprn"].values[0], + recommendation_record=flat_roof_no_solar.copy().to_dict("records")[0], + recommendation={ + "recommendation_id": "solar_pv", + "type": "solar_pv", + "new_u_value": None, # This doesn't matter at the moment + "parts": [], + "photo_supply": coverage + } + ) + + pitched_roof_solar.append(solar_simulation_pitched) + flat_roof_solar.append(solar_simulation_flat) + + # We store all of this data in s3, as it is + save_data_to_s3( + bucket_name="retrofit-datalake-dev", + s3_file_name="sap_change_model/loft_insulation_testing_data.parquet", + data=json.dumps( + { + "loft_insulation_testing_data": loft_insulation_testing_data, + "solid_wall_testing_data": solid_wall_testing_data, + "cavity_wall_testing_data": cavity_wall_testing_data, + "solid_floor_testing_data": solid_floor_testing_data, + "suspended_floor_testing_data": suspended_floor_testing_data, + "single_glazed_testing_data": single_glazed_testing_data, + "partial_double_glazed_testing_data": partial_double_glazed_testing_data, + "partial_secondary_glazed_testing_data": partial_secondary_glazed_testing_data, + "pitched_roof_solar": pitched_roof_solar, + "flat_roof_solar": flat_roof_solar + } + ) + )