fixing unhandled cases in matching algorithm

This commit is contained in:
Khalim Conn-Kowlessar 2024-11-17 19:46:17 +00:00
parent eff80e637f
commit a630fe05c4

View file

@ -1756,20 +1756,44 @@ def propsed_wave_3_sample():
][["Archetype ID", "Current EPC Band"]].drop_duplicates()
if region_surveyed["Archetype ID"].duplicated().sum():
blah1
region_surveyed = survey_results[
survey_results["Archetype ID"].isin(archetypes) &
(survey_results["Postal Region"] == region)
].groupby("Archetype ID")[["Current SAP Rating"]].mean().reset_index()
region_surveyed["Current EPC Band"] = region_surveyed["Current SAP Rating"].apply(sap_to_epc)
region_surveyed = region_surveyed.drop(columns=["Current SAP Rating"])
region_surveyed = []
for arch_id in archetypes:
for _, property in region_assets[region_assets["Archetype ID"] == arch_id].iterrows():
archetype_data = survey_results_with_original_features[
survey_results["Archetype ID"] == arch_id
].copy()
if archetype_data.empty:
continue
archetype_data["distance_meters"] = haversine(
lat1=property.latitude, lon1=property.longitude,
lat2=archetype_data["latitude"].values, lon2=archetype_data["longitude"].values
)
expected_sap = np.average(
archetype_data["Current SAP Rating"], weights=1 / (archetype_data["distance_meters"] + 1)
)
expected_epc = sap_to_epc(expected_sap)
region_surveyed.append(
{
"Archetype ID": arch_id,
"Address ID": property["Address ID"],
"Current EPC Band": expected_epc
}
)
region_assets = region_assets.merge(
region_surveyed,
on="Archetype ID",
how="left",
suffixes=("", "_method1")
)
region_surveyed = pd.DataFrame(region_surveyed)
region_assets = region_assets.merge(
region_surveyed,
on=["Archetype ID", "Address ID"],
how="left",
suffixes=("", "_method1")
)
else:
region_assets = region_assets.merge(
region_surveyed,
on="Archetype ID",
how="left",
suffixes=("", "_method1")
)
# Label the tier 1 properties
region_assets["Confidence Tier"] = np.where(
@ -1897,7 +1921,47 @@ def propsed_wave_3_sample():
].copy()
if surveyed.empty:
blah3
# In this case, we do one additional check where we filter on everything the same apart from heating,
# where we do a slightly more rough match
surveyed = survey_results_with_original_features[
(
survey_results_with_original_features["Property Type"].str.split(":").str[0] ==
property["Property Type"].split(":")[0]
) &
(
survey_results_with_original_features["Wall Type"].str.split(":").str[0] ==
property["Wall Type"].split(":")[0]
) &
(
survey_results_with_original_features["Roof Type"].str.split(":").str[0] ==
property["Roof Type"].split(":")[0]
)
].copy()
if "Electric" in property["Heating"]:
# Take other electric heating systems
surveyed = surveyed[surveyed["Heating"].str.contains("Electric")]
elif property["Heating"] == "Community Heating Systems: Community boilers only (RdSAP)":
# Take other community heating systems
surveyed = surveyed[surveyed["Heating"].str.contains("Community")]
elif property["Heating"] == 'Heat Pump: (from database)':
# Take other heat pumps
surveyed = surveyed[surveyed["Heating"].str.contains("Heat Pump")]
elif property["Heating"] == "Solid fuel room heaters: Open fire in grate":
# Take other properties with room heaters
surveyed = surveyed[surveyed["Heating"].str.contains("room heaters")]
else:
raise Exception("Fix me")
if surveyed.empty:
final_missed_matches.append(
{
"Address ID": a_id,
"Confidence Tier": "4 - no similar property, needs survey to confirm",
"Current EPC Band": "Needs Survey"
}
)
continue
# Calculate distance
surveyed["distance_meters"] = haversine(