debugging stonewater algorithm

This commit is contained in:
Khalim Conn-Kowlessar 2024-11-17 15:16:54 +00:00
parent 4d021f0ba6
commit d00c291c17

View file

@ -1716,20 +1716,11 @@ def propsed_wave_3_sample():
][["Archetype ID", "Current EPC Band"]].drop_duplicates()
if region_surveyed["Archetype ID"].duplicated().sum():
# Take the duplicated archetypes
duplicated_archetypes = region_surveyed[
region_surveyed["Archetype ID"].duplicated()
]["Archetype ID"].unique()
duplicated_archetypes = region_surveyed[
region_surveyed["Archetype ID"].isin(duplicated_archetypes)
]
# We need to select which one is the most relevant to these properties
survey_data = survey_results_with_original_features[
survey_results_with_original_features["Archetype ID"].isin(duplicated_archetypes["Archetype ID"].values)
]
raise NotImplementedError("Fix me")
region_surveyed = survey_results[
survey_results["Archetype ID"].isin(archetypes) &
(survey_results["Postal Region"] == region)
].groupby("Archetype ID")[["Current SAP Rating"]].mean().reset_index()
region_surveyed["Current EPC Band"] = region_surveyed["Current SAP Rating"].apply(sap_to_epc)
region_assets = region_assets.merge(
region_surveyed,
@ -1744,6 +1735,17 @@ def propsed_wave_3_sample():
pd.isnull(region_assets["Confidence Tier"]),
"1 - Archetype surveyed", region_assets["Confidence Tier"]
)
region_assets["Current EPC Band"] = np.where(
pd.isnull(region_assets["Current EPC Band"]) & pd.notnull(region_assets["Current EPC Band_method1"]),
region_assets["Current EPC Band_method1"], region_assets["Current EPC Band"]
)
# Handle EPC C
region_assets["Confidence Tier"] = np.where(
region_assets["Current EPC Band"].isin(["C", "B", "A"]) & pd.isnull(region_assets["Confidence Tier"]),
"6 - EPC C or above", region_assets["Confidence Tier"]
)
region_assets = region_assets.drop(columns=["Current EPC Band_method1"])
# TODO: Turn into a function
missed_archetypes = set(archetypes) - set(region_surveyed["Archetype ID"])
@ -1752,36 +1754,16 @@ def propsed_wave_3_sample():
survey_results["Archetype ID"].isin(missed_archetypes)
][["Archetype ID", "Current EPC Band"]].drop_duplicates()
# TODO - We could average the property?? And call it borderline, call out it was averaged!!!
# We could also find the nearest property to it, with similar wall, roof, heating?
# Can use long/lag to distance calc. We have this data from previous
if archetype_surveyed["Archetype ID"].duplicated().sum():
# We need to select which one is the most relevant to these properties
duplicated_archetypes = archetype_surveyed[
archetype_surveyed["Archetype ID"].duplicated()
]["Archetype ID"].unique()
survey_data = survey_results_with_original_features[
survey_results_with_original_features["Archetype ID"].isin(duplicated_archetypes)
]
homes_with_these_archetypes = region_assets[
region_assets["Archetype ID"].isin(duplicated_archetypes)
]
for _, home in homes_with_these_archetypes.iterrows():
first_filter = survey_data[
(survey_data["Postal Region"] == home["Postal Region"]) &
(survey_data["Property Type"] == home["Property Type"]) &
(survey_data["Wall Type"].str.split(":").str[0] == home["Wall Type"].split(":")[0])
]
if not first_filter.empty:
NotImplementedError("Fix me 0")
second_filter = survey_data[
(survey_data["Property Type"].str.split(":").str[0] == home["Property Type"].split(":")[0]) &
(survey_data["Wall Type"].str.split(":").str[0] == home["Wall Type"].split(":")[0])
]
raise NotImplementedError("Fix me 2")
archetype_surveyed = survey_results[
survey_results["Archetype ID"].isin(missed_archetypes)
].groupby("Archetype ID")[["Current SAP Rating"]].mean().reset_index()
archetype_surveyed["Current EPC Band"] = archetype_surveyed["Current SAP Rating"].apply(sap_to_epc)
archetype_surveyed = archetype_surveyed.drop(columns=["Current SAP Rating"])
region_assets = region_assets.merge(
archetype_surveyed,