looking into loft insulation data

This commit is contained in:
Khalim Conn-Kowlessar 2024-02-12 19:16:10 +00:00
parent bd0fb8c2c1
commit 7a219285fc

View file

@ -543,24 +543,112 @@ def app():
loft_insulation_comparison_matrix["measure_impact"] = loft_insulation_comparison_matrix["predictions"] - \
loft_insulation_comparison_matrix["sap_starting"]
# We create a sap band grouping, for every 10 points of sap. So 1-10, 11-20, 21-30 etc
loft_insulation_comparison_matrix["sap_band"] = pd.cut(
loft_insulation_comparison_matrix["sap_starting"],
bins=range(0, 101, 10),
labels=range(1, 11)
)
# Perform a group by describe
loft_insulation_describe = loft_insulation_comparison_matrix.groupby(
["simulation_starting_insulation_thickness", "simulation_ending_insulation_thickness"]
["sap_band", "simulation_starting_insulation_thickness", "simulation_ending_insulation_thickness"]
)[["measure_impact"]].describe().reset_index()
z = loft_insulation_comparison_matrix[loft_insulation_comparison_matrix["measure_impact"] < 0]
z.head(1)[["uprn", "id"]]
error_row = loft_insulation_testing_df[
(loft_insulation_testing_df["id"] == "100090292333+loft_insulation_150_270mm")
]
for col in ["simulation_starting_insulation_thickness", "simulation_ending_insulation_thickness"]:
loft_insulation_describe[col] = loft_insulation_describe[col].str.replace('none', "0")
loft_insulation_describe[col] = loft_insulation_describe[col].astype(int)
error_dataset = dataset[
(dataset["uprn"] == "10070401239") & (dataset["roof_insulation_thickness"] == "250")
loft_insulation_describe = loft_insulation_describe.sort_values(
["simulation_ending_insulation_thickness", "simulation_starting_insulation_thickness"], ascending=True
)
# In the training data, try and get just the rows that are loft insulation only
# Things that change:
# 1) roof_insulation_thickness
# 3) roof_thermal_transmittance
# 4) roof_energy_eff_ending
loft_insulation_training_data = dataset.copy()
loft_insulation_columns_we_need_the_same = [c for c in column_config.keys() if c not in [
"roof_insulation_thickness_ending", "roof_thermal_transmittance_ending", "roof_energy_eff_ending",
"transaction_type_ending", "days_to_ending", "sap_ending", "heat_demand_ending", "carbon_ending",
"total_floor_area_ending", "floor_height_ending", "estimated_perimeter_ending"
]]
for ending_col in tqdm(loft_insulation_columns_we_need_the_same):
starting_col = column_config[ending_col]
loft_insulation_training_data = loft_insulation_training_data[
loft_insulation_training_data[ending_col] == loft_insulation_training_data[starting_col]
]
# We get rows where the insulation starts at 200mm
insulation_200mm_starting = loft_insulation_training_data[
(loft_insulation_training_data["roof_insulation_thickness"] == "200") &
(loft_insulation_training_data["roof_insulation_thickness_ending"] == "300")
]
changed_from_dataset = []
for c in column_config:
ending_value = error_row[column_config[c]].values[0]
starting_value = error_row[column_config[c]].values[0]
error_dataset["roof_insulation_thickness"]
error_dataset["roof_insulation_thickness_ending"]
# Let's use the API to find exactly the record
from backend.SearchEpc import SearchEpc
searcher = SearchEpc(
address1="2 Darkfield Way",
postcode="TA7 8HY",
auth_token="a2Nvbm5rb3dsZXNzYXJAZ21haWwuY29tOjY5MGJiMWM0NmIyOGI5ZDUxYzAxMzQzYzNiZGNlZGJjZDNmODQwMzA=",
os_api_key=""
)
searcher.uprn = "10009320092"
searcher.find_property(skip_os=True)
newest_epc = searcher.newest_epc
older_epc = [epc for epc in searcher.older_epcs if
epc["lmk-key"] == "5ae2f073004839510f9eeb1886160776a05697f8518b8b3b63d45f65686c4757"][0]
# Iterate through the keys in the newest_epc and find the values in older epc that are different to the newest epc
differences = {}
for k, v in newest_epc.items():
if v != older_epc[k]:
differences[k] = (v, older_epc[k])
testing_model_api = ModelApi(portfolio_id="simulation-testing-loft-example", timestamp=created_at)
testing_model_api.MODEL_PREFIXES = ["sap_change_predictions"]
############################################################################################################
# TODO:!
# Findings: 1) For uprn 10009320092, the number of rooms and number of heated rooms has changed and can change from
# epc to epc. We should therefore include a starting and ending value for this
# Investigation 1)
testing_row = insulation_200mm_starting[insulation_200mm_starting["uprn"] == "10009320092"].copy()
testing_row["id"] = "testing-200mm-loft-insulation-starting-baseline+recommendation_id_baseline"
testing_row["recommendation_id"] = "recommendation_id_baseline"
# The testing row has 4 rooms
# Score in the model to see what we get
baseline_prediction = testing_model_api.predict_all(
df=testing_row,
bucket="retrofit-data-dev",
prediction_buckets={
"sap_change_predictions": "retrofit-sap-predictions-dev",
}
)
baseline_pred_df = baseline_prediction["sap_change_predictions"]
impact = baseline_pred_df["predictions"].values[0] - testing_row["sap_starting"].values[0]
# Changing this from 4 rooms to 5 rooms has NO impact!!
testing_row_5_rooms = testing_row.copy()
testing_row_5_rooms["id"] = "testing-200mm-loft-insulation-starting-baseline+recommendation_id_5_rooms"
testing_row_5_rooms["recommendation_id"] = "recommendation_id_5_rooms"
testing_row_5_rooms["number_habitable_rooms"] = float(5)
testing_row_5_rooms["number_heated_rooms"] = float(5)
prediction_5_rooms = testing_model_api.predict_all(
df=testing_row_5_rooms,
bucket="retrofit-data-dev",
prediction_buckets={
"sap_change_predictions": "retrofit-sap-predictions-dev",
}
)
pred_df_5_rooms = prediction_5_rooms["sap_change_predictions"]
impact_5_rooms = pred_df_5_rooms["predictions"].values[0] - testing_row_5_rooms["sap_starting"].values[0]