diff --git a/backend/tests/test_sap_model_prep.py b/backend/tests/test_sap_model_prep.py index c27d05dc..dc793cad 100644 --- a/backend/tests/test_sap_model_prep.py +++ b/backend/tests/test_sap_model_prep.py @@ -9,96 +9,86 @@ import pytest from utils.s3 import read_dataframe_from_s3_parquet from tqdm import tqdm + # Handy code for selecting testin data -import pickle - -with open("sap_change_dataset.pickle", "rb") as f: - sap_change_dataset = pickle.load(f) - -z = sap_change_dataset[(sap_change_dataset["NUMBER_OPEN_FIREPLACES_ENDING"] > 0) & ( - sap_change_dataset["NUMBER_OPEN_FIREPLACES_STARTING"] > 0)].head(2).tail(1) -z["UPRN"] -z["SAP_STARTING"] -z["SAP_ENDING"] -z["NUMBER_OPEN_FIREPLACES_STARTING"] -z["NUMBER_OPEN_FIREPLACES_ENDING"] - -# 10002083298 - -# m -search_from = sap_change_dataset[ - (sap_change_dataset["walls_thermal_transmittance_ENDING"] == sap_change_dataset["walls_thermal_transmittance"]) -] -search_from = search_from[ - (search_from["roof_thermal_transmittance_ENDING"] == search_from["roof_thermal_transmittance"]) & - (search_from["floor_thermal_transmittance_ENDING"] == search_from["floor_thermal_transmittance"]) & - (search_from["MECHANICAL_VENTILATION_ENDING"] == search_from["MECHANICAL_VENTILATION_STARTING"]) & - (search_from["SECONDHEAT_DESCRIPTION_ENDING"] == search_from["SECONDHEAT_DESCRIPTION_STARTING"]) & - (search_from["GLAZED_TYPE_ENDING"] == search_from["GLAZED_TYPE_STARTING"]) & - (search_from["NUMBER_OPEN_FIREPLACES_STARTING"] > 0) & - (search_from["NUMBER_OPEN_FIREPLACES_ENDING"] == 0) - ] - -# Find a record where the only difference is cavity wall getting filled -ending_cols = [c for c in search_from.columns if "_ENDING" in c] - -ignore = [ - "SAP_ENDING", "HEAT_DEMAND_ENDING", "CARBON_ENDING", "TRANSACTION_TYPE_ENDING", "FLOOR_HEIGHT_ENDING", - "DAYS_TO_ENDING", "TOTAL_FLOOR_AREA_ENDING" -] - -ending_cols = [c for c in ending_cols if c not in ignore] - -for _, row in tqdm(search_from.iterrows(), total=search_from.shape[0]): - - same = True - starting_cols = [] - for c in ending_cols: - - starting_col = c.replace("_ENDING", "") - if starting_col not in search_from.columns: - starting_col = c.replace("_ENDING", "_STARTING") - if starting_col not in search_from.columns: - raise Exception("something went wrong") - - starting_cols.append(starting_col) - - # We want them to be different - if c == "NUMBER_OPEN_FIREPLACES_ENDING": - if (row[c] == row[starting_col]) | (row[starting_col] != "natural"): - same = False - break - else: - continue - - # We now check if the starting and ending values are the same - if row[c] != row[starting_col]: - same = False - break - - if same: - raise Exception("We found one!") - - fixed_cols = [c for c in search_from.columns if c not in starting_cols + ending_cols] - - import pandas as pd - - start = row[["SAP_STARTING"] + starting_cols] - start.index = [c.replace("_STARTING", "") for c in start.index] - end = row[["SAP_ENDING"] + ending_cols] - end.index = [c.replace("_ENDING", "") for c in end.index] - start["type"] = "starting" - end["type"] = "ending" - - compare = pd.concat([start, end], axis=1) - -ending_lmk = "bab3983fa167717b8bb4a36ef395046d53937f9b880a45bcc751270d72e5de45" -starting_lmk = "736b6f4803a11d9e45b49bf98f36eb8a7f357b0dd24f3e7cddef5295518e5bef" - -client = EpcClient(auth_token=EPC_AUTH_TOKEN) -result = client.domestic.search(params={"address": "9 Glebe Road, Asfordby Hill", "postcode": "LE14 3QT"}) -starting_epc = [x for x in result["rows"] if x["lmk-key"] == starting_lmk][0] -ending_epc = [x for x in result["rows"] if x["lmk-key"] == ending_lmk][0] +# import pickle +# +# with open("sap_change_dataset.pickle", "rb") as f: +# sap_change_dataset = pickle.load(f) +# +# search_from = sap_change_dataset[ +# (sap_change_dataset["walls_thermal_transmittance_ENDING"] == sap_change_dataset["walls_thermal_transmittance"]) +# ] +# search_from = search_from[ +# (search_from["roof_thermal_transmittance_ENDING"] == search_from["roof_thermal_transmittance"]) & +# (search_from["floor_thermal_transmittance_ENDING"] == search_from["floor_thermal_transmittance"]) & +# (search_from["MECHANICAL_VENTILATION_ENDING"] == search_from["MECHANICAL_VENTILATION_STARTING"]) & +# (search_from["SECONDHEAT_DESCRIPTION_ENDING"] == search_from["SECONDHEAT_DESCRIPTION_STARTING"]) & +# (search_from["GLAZED_TYPE_ENDING"] == search_from["GLAZED_TYPE_STARTING"]) & +# (search_from["NUMBER_OPEN_FIREPLACES_STARTING"] > 0) & +# (search_from["NUMBER_OPEN_FIREPLACES_ENDING"] == 0) +# ] +# +# # Find a record where the only difference is cavity wall getting filled +# ending_cols = [c for c in search_from.columns if "_ENDING" in c] +# +# ignore = [ +# "SAP_ENDING", "HEAT_DEMAND_ENDING", "CARBON_ENDING", "TRANSACTION_TYPE_ENDING", "FLOOR_HEIGHT_ENDING", +# "DAYS_TO_ENDING", "TOTAL_FLOOR_AREA_ENDING" +# ] +# +# ending_cols = [c for c in ending_cols if c not in ignore] +# +# for _, row in tqdm(search_from.iterrows(), total=search_from.shape[0]): +# +# same = True +# starting_cols = [] +# for c in ending_cols: +# +# starting_col = c.replace("_ENDING", "") +# if starting_col not in search_from.columns: +# starting_col = c.replace("_ENDING", "_STARTING") +# if starting_col not in search_from.columns: +# raise Exception("something went wrong") +# +# starting_cols.append(starting_col) +# +# # We want them to be different +# if c == "NUMBER_OPEN_FIREPLACES_ENDING": +# if (row[c] == row[starting_col]) | (row[starting_col] != "natural"): +# same = False +# break +# else: +# continue +# +# # We now check if the starting and ending values are the same +# if row[c] != row[starting_col]: +# same = False +# break +# +# if same: +# raise Exception("We found one!") +# +# fixed_cols = [c for c in search_from.columns if c not in starting_cols + ending_cols] +# +# import pandas as pd +# +# start = row[["SAP_STARTING"] + starting_cols] +# start.index = [c.replace("_STARTING", "") for c in start.index] +# end = row[["SAP_ENDING"] + ending_cols] +# end.index = [c.replace("_ENDING", "") for c in end.index] +# start["type"] = "starting" +# end["type"] = "ending" +# +# compare = pd.concat([start, end], axis=1) +# +# ending_lmk = "bab3983fa167717b8bb4a36ef395046d53937f9b880a45bcc751270d72e5de45" +# starting_lmk = "736b6f4803a11d9e45b49bf98f36eb8a7f357b0dd24f3e7cddef5295518e5bef" +# +# client = EpcClient(auth_token=EPC_AUTH_TOKEN) +# result = client.domestic.search(params={"address": "9 Glebe Road, Asfordby Hill", "postcode": "LE14 3QT"}) +# starting_epc = [x for x in result["rows"] if x["lmk-key"] == starting_lmk][0] +# ending_epc = [x for x in result["rows"] if x["lmk-key"] == ending_lmk][0] # with open(