debugging simulation testing pipeline

This commit is contained in:
Khalim Conn-Kowlessar 2024-02-12 15:53:27 +00:00
parent a0bbdadd1f
commit 61488b1aea

View file

@ -1,12 +1,15 @@
import json
import pandas as pd
from utils.s3 import read_dataframe_from_s3_parquet
from tqdm import tqdm
from utils.s3 import read_dataframe_from_s3_parquet, save_data_to_s3
from backend.Property import Property
def app():
dataset = read_dataframe_from_s3_parquet(
bucket_name="retrofit-datalake-dev",
file_key="dataset_with0perm_all.parquet"
bucket_name="retrofit-data-dev",
file_key="sap_change_model/dataset.parquet"
)
thresholds = dataset["total_floor_area_starting"].quantile(
@ -81,7 +84,9 @@ def app():
single_glazed_testing_data = []
partial_double_glazed_testing_data = []
partial_secondary_glazed_testing_data = []
for property_config in property_types.itertuples():
pitched_roof_solar = []
flat_roof_solar = []
for property_config in tqdm(property_types.itertuples(), total=property_types.shape[0]):
# Take a sample row
population = dataset[
(dataset["property_type"] == property_config.property_type) &
@ -101,13 +106,18 @@ def app():
row = population[
(population["roof_insulation_thickness"] == "none") &
(population["is_pitched"])
].sample(1)
]
else:
row = population[
(population["roof_insulation_thickness"] == insulation_thickness) &
(population["is_pitched"])
].sample(1)
]
if row.empty:
continue
row = row.sample(1)
loft_insulation_270mm_simulation = Property.create_recommendation_scoring_data(
property_id=row["uprn"].values[0],
@ -164,8 +174,8 @@ def app():
# Simulated IWI
internal_wall_insulation_simulation = Property.create_recommendation_scoring_data(
property_id=row["uprn"].values[0].to_dict("records")[0],
recommendation_record=row.copy(),
property_id=row["uprn"].values[0],
recommendation_record=row.copy().to_dict("records")[0],
recommendation={
"recommendation_id": "internal_wall_insulation",
"type": "internal_wall_insulation",
@ -333,9 +343,14 @@ def app():
)
]
for value in partial_double_glazing_sample["multi_glaze_proportion_starting"].quantile(
partial_double_glazed_values = partial_double_glazing_sample["multi_glaze_proportion_starting"].quantile(
[0.25, 0.5, 0.75]
).values:
).values
# Take non-null values
partial_double_glazed_values = [v for v in partial_double_glazed_values if not pd.isnull(v)]
partial_double_glazed_values = set(partial_double_glazed_values)
for value in partial_double_glazed_values:
nearest_value = partial_double_glazing_sample['multi_glaze_proportion_starting'].sub(value).abs().idxmin()
nearest_row = partial_double_glazing_sample.loc[[nearest_value]].sample(1)
# If we start with partial double glazing, we recommend completing the job
@ -361,9 +376,14 @@ def app():
)
]
for value in partial_secondary_glazing_sample["multi_glaze_proportion_starting"].quantile(
partial_secondary_glazed_values = partial_secondary_glazing_sample["multi_glaze_proportion_starting"].quantile(
[0.25, 0.5, 0.75]
).values:
).values
# Take non-null values
partial_secondary_glazed_values = [v for v in partial_secondary_glazed_values if not pd.isnull(v)]
partial_secondary_glazed_values = set(partial_secondary_glazed_values)
for value in partial_secondary_glazed_values:
nearest_value = partial_secondary_glazing_sample['multi_glaze_proportion_starting'].sub(
value).abs().idxmin()
nearest_row = partial_secondary_glazing_sample.loc[[nearest_value]].sample(1)
@ -383,3 +403,63 @@ def app():
)
partial_secondary_glazed_testing_data.append(secondary_glazing_simulation)
# 9) Solar PV
# We only recommend solar for properties that have flat or pitched roofs, and no existing solar
pitched_roof_no_solar = population[
(population["is_pitched"]) & (population["photo_supply_starting"] == 0)
].sample(1)
flat_roof_no_solar = population[
(population["is_flat"]) & (population["photo_supply_starting"] == 0)
].sample(1)
# We simulate 30%, 40% and 50% coverage
for coverage in [30, 40, 50]:
solar_simulation_pitched = Property.create_recommendation_scoring_data(
property_id=pitched_roof_no_solar["uprn"].values[0],
recommendation_record=pitched_roof_no_solar.copy().to_dict("records")[0],
recommendation={
"recommendation_id": "solar_pv",
"type": "solar_pv",
"new_u_value": None, # This doesn't matter at the moment
"parts": [],
"photo_supply": coverage
}
)
solar_simulation_flat = Property.create_recommendation_scoring_data(
property_id=flat_roof_no_solar["uprn"].values[0],
recommendation_record=flat_roof_no_solar.copy().to_dict("records")[0],
recommendation={
"recommendation_id": "solar_pv",
"type": "solar_pv",
"new_u_value": None, # This doesn't matter at the moment
"parts": [],
"photo_supply": coverage
}
)
pitched_roof_solar.append(solar_simulation_pitched)
flat_roof_solar.append(solar_simulation_flat)
# We store all of this data in s3, as it is
save_data_to_s3(
bucket_name="retrofit-datalake-dev",
s3_file_name="sap_change_model/loft_insulation_testing_data.parquet",
data=json.dumps(
{
"loft_insulation_testing_data": loft_insulation_testing_data,
"solid_wall_testing_data": solid_wall_testing_data,
"cavity_wall_testing_data": cavity_wall_testing_data,
"solid_floor_testing_data": solid_floor_testing_data,
"suspended_floor_testing_data": suspended_floor_testing_data,
"single_glazed_testing_data": single_glazed_testing_data,
"partial_double_glazed_testing_data": partial_double_glazed_testing_data,
"partial_secondary_glazed_testing_data": partial_secondary_glazed_testing_data,
"pitched_roof_solar": pitched_roof_solar,
"flat_roof_solar": flat_roof_solar
}
)
)