mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
adding unique ids and config hash to prevent duplications
This commit is contained in:
parent
1028861a1b
commit
bd0fb8c2c1
1 changed files with 45 additions and 17 deletions
|
|
@ -104,6 +104,9 @@ def app():
|
|||
pitched_roof_solar = []
|
||||
flat_roof_solar = []
|
||||
for property_config in tqdm(property_types.itertuples(), total=property_types.shape[0]):
|
||||
|
||||
config_hash = hash(str(property_config))
|
||||
|
||||
# Take a sample row
|
||||
population = dataset[
|
||||
(dataset["property_type"] == property_config.property_type) &
|
||||
|
|
@ -144,7 +147,7 @@ def app():
|
|||
property_id=row["uprn"].values[0],
|
||||
recommendation_record=row.copy().to_dict("records")[0],
|
||||
recommendation={
|
||||
"recommendation_id": "loft_insulation_270mm",
|
||||
"recommendation_id": f"loft_insulation_{insulation_thickness}_270mm_{config_hash}",
|
||||
"type": "loft_insulation",
|
||||
"new_u_value": best_270mm_uvalue,
|
||||
"parts": [
|
||||
|
|
@ -157,7 +160,7 @@ def app():
|
|||
property_id=row["uprn"].values[0],
|
||||
recommendation_record=row.copy().to_dict("records")[0],
|
||||
recommendation={
|
||||
"recommendation_id": "loft_insulation_300mm",
|
||||
"recommendation_id": f"loft_insulation_{insulation_thickness}_300mm_{config_hash}",
|
||||
"type": "loft_insulation",
|
||||
"new_u_value": best_300mm_uvalue,
|
||||
"parts": [
|
||||
|
|
@ -198,7 +201,7 @@ def app():
|
|||
property_id=row["uprn"].values[0],
|
||||
recommendation_record=row.copy().to_dict("records")[0],
|
||||
recommendation={
|
||||
"recommendation_id": "internal_wall_insulation",
|
||||
"recommendation_id": f"internal_wall_insulation_uvalue_{uvalue}_{config_hash}",
|
||||
"type": "internal_wall_insulation",
|
||||
"new_u_value": best_internal_wall_uvalue,
|
||||
"parts": []
|
||||
|
|
@ -210,7 +213,7 @@ def app():
|
|||
property_id=row["uprn"].values[0],
|
||||
recommendation_record=row.copy().to_dict("records")[0],
|
||||
recommendation={
|
||||
"recommendation_id": "external_wall_insulation",
|
||||
"recommendation_id": f"external_wall_insulation_uvalue_{uvalue}_{config_hash}",
|
||||
"type": "external_wall_insulation",
|
||||
"new_u_value": best_external_wall_uvalue,
|
||||
"parts": []
|
||||
|
|
@ -239,7 +242,7 @@ def app():
|
|||
property_id=row["uprn"].values[0],
|
||||
recommendation_record=row.copy().to_dict("records")[0],
|
||||
recommendation={
|
||||
"recommendation_id": "cavity_wall_insulation",
|
||||
"recommendation_id": f"cavity_wall_insulation_uvalue_{uvalue}_{config_hash}",
|
||||
"type": "cavity_wall_insulation",
|
||||
"new_u_value": best_cavity_wall_uvalue,
|
||||
"parts": []
|
||||
|
|
@ -268,7 +271,7 @@ def app():
|
|||
property_id=nearest_row["uprn"].values[0],
|
||||
recommendation_record=nearest_row.copy().to_dict("records")[0],
|
||||
recommendation={
|
||||
"recommendation_id": "solid_floor_insulation",
|
||||
"recommendation_id": f"solid_floor_insulation_uvalue_{uvalue}_{config_hash}",
|
||||
"type": "solid_floor_insulation",
|
||||
"new_u_value": None, # This doesn't matter at the moment
|
||||
"parts": []
|
||||
|
|
@ -297,7 +300,7 @@ def app():
|
|||
property_id=nearest_row["uprn"].values[0],
|
||||
recommendation_record=nearest_row.copy().to_dict("records")[0],
|
||||
recommendation={
|
||||
"recommendation_id": "suspended_floor_insulation",
|
||||
"recommendation_id": f"suspended_floor_insulation_uvalue_{uvalue}_{config_hash}",
|
||||
"type": "suspended_floor_insulation",
|
||||
"new_u_value": None, # This doesn't matter at the moment
|
||||
"parts": []
|
||||
|
|
@ -321,7 +324,7 @@ def app():
|
|||
property_id=row["uprn"].values[0],
|
||||
recommendation_record=row.copy().to_dict("records")[0],
|
||||
recommendation={
|
||||
"recommendation_id": "windows_glazing",
|
||||
"recommendation_id": f"windows_glazing_single_to_double_{config_hash}",
|
||||
"type": "windows_glazing",
|
||||
"new_u_value": None, # This doesn't matter at the moment
|
||||
"parts": [],
|
||||
|
|
@ -334,7 +337,7 @@ def app():
|
|||
property_id=row["uprn"].values[0],
|
||||
recommendation_record=row.copy().to_dict("records")[0],
|
||||
recommendation={
|
||||
"recommendation_id": "windows_glazing",
|
||||
"recommendation_id": f"windows_glazing_single_to_secondary_{config_hash}",
|
||||
"type": "windows_glazing",
|
||||
"new_u_value": None, # This doesn't matter at the moment
|
||||
"parts": [],
|
||||
|
|
@ -379,7 +382,7 @@ def app():
|
|||
property_id=nearest_row["uprn"].values[0],
|
||||
recommendation_record=nearest_row.copy().to_dict("records")[0],
|
||||
recommendation={
|
||||
"recommendation_id": "windows_glazing",
|
||||
"recommendation_id": f"windows_glazing_partial_double_to_double_{value}_{config_hash}",
|
||||
"type": "windows_glazing",
|
||||
"new_u_value": None, # This doesn't matter at the moment
|
||||
"parts": [],
|
||||
|
|
@ -414,7 +417,7 @@ def app():
|
|||
property_id=nearest_row["uprn"].values[0],
|
||||
recommendation_record=nearest_row.copy().to_dict("records")[0],
|
||||
recommendation={
|
||||
"recommendation_id": "windows_glazing",
|
||||
"recommendation_id": f"windows_glazing_partial_secondary_to_secondary_{value}_{config_hash}",
|
||||
"type": "windows_glazing",
|
||||
"new_u_value": None, # This doesn't matter at the moment
|
||||
"parts": [],
|
||||
|
|
@ -449,7 +452,7 @@ def app():
|
|||
property_id=pitched_roof_no_solar["uprn"].values[0],
|
||||
recommendation_record=pitched_roof_no_solar.copy().to_dict("records")[0],
|
||||
recommendation={
|
||||
"recommendation_id": "solar_pv",
|
||||
"recommendation_id": f"pitched_solar_pv_coverage_{coverage}_percent_{config_hash}",
|
||||
"type": "solar_pv",
|
||||
"new_u_value": None, # This doesn't matter at the moment
|
||||
"parts": [],
|
||||
|
|
@ -463,7 +466,7 @@ def app():
|
|||
property_id=flat_roof_no_solar["uprn"].values[0],
|
||||
recommendation_record=flat_roof_no_solar.copy().to_dict("records")[0],
|
||||
recommendation={
|
||||
"recommendation_id": "solar_pv",
|
||||
"recommendation_id": f"flat_solar_pv_coverage_{coverage}_percent_{config_hash}",
|
||||
"type": "solar_pv",
|
||||
"new_u_value": None, # This doesn't matter at the moment
|
||||
"parts": [],
|
||||
|
|
@ -521,18 +524,43 @@ def app():
|
|||
# Store final parquet in s3
|
||||
save_dataframe_to_s3_parquet(
|
||||
df=loft_insulation_predictions,
|
||||
bucket_name="retrofit-data-dev",
|
||||
bucket_name="retrofit-datalake-dev",
|
||||
file_key=f"sap_change_model/simulation-pipeline-loft-insulation-predictions_{MODEL_VERSION}.parquet"
|
||||
)
|
||||
|
||||
# We now merge the loft insulation predictions onto the scoring data and calculate exactly how much the insulation
|
||||
# is worth
|
||||
|
||||
loft_insulation_comparison_df = loft_insulation_testing_df[
|
||||
["simulation_ending_insulation_thickness", "simulation_starting_insulation_thickness", "uprn", "id", ""]
|
||||
loft_insulation_comparison_matrix = loft_insulation_testing_df[
|
||||
["simulation_starting_insulation_thickness", "simulation_ending_insulation_thickness", "uprn", "id",
|
||||
"sap_starting"]
|
||||
].merge(
|
||||
loft_insulation_predictions,
|
||||
loft_insulation_predictions.drop(columns=["recommendation_id"]),
|
||||
left_on="id",
|
||||
right_on="id",
|
||||
how="left"
|
||||
)
|
||||
|
||||
loft_insulation_comparison_matrix["measure_impact"] = loft_insulation_comparison_matrix["predictions"] - \
|
||||
loft_insulation_comparison_matrix["sap_starting"]
|
||||
# Perform a group by describe
|
||||
loft_insulation_describe = loft_insulation_comparison_matrix.groupby(
|
||||
["simulation_starting_insulation_thickness", "simulation_ending_insulation_thickness"]
|
||||
)[["measure_impact"]].describe().reset_index()
|
||||
|
||||
z = loft_insulation_comparison_matrix[loft_insulation_comparison_matrix["measure_impact"] < 0]
|
||||
z.head(1)[["uprn", "id"]]
|
||||
error_row = loft_insulation_testing_df[
|
||||
(loft_insulation_testing_df["id"] == "100090292333+loft_insulation_150_270mm")
|
||||
]
|
||||
|
||||
error_dataset = dataset[
|
||||
(dataset["uprn"] == "10070401239") & (dataset["roof_insulation_thickness"] == "250")
|
||||
]
|
||||
|
||||
changed_from_dataset = []
|
||||
for c in column_config:
|
||||
ending_value = error_row[column_config[c]].values[0]
|
||||
starting_value = error_row[column_config[c]].values[0]
|
||||
error_dataset["roof_insulation_thickness"]
|
||||
error_dataset["roof_insulation_thickness_ending"]
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue