mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
fixing unhandled cases in matching algorithm
This commit is contained in:
parent
eff80e637f
commit
a630fe05c4
1 changed files with 78 additions and 14 deletions
|
|
@ -1756,20 +1756,44 @@ def propsed_wave_3_sample():
|
|||
][["Archetype ID", "Current EPC Band"]].drop_duplicates()
|
||||
|
||||
if region_surveyed["Archetype ID"].duplicated().sum():
|
||||
blah1
|
||||
region_surveyed = survey_results[
|
||||
survey_results["Archetype ID"].isin(archetypes) &
|
||||
(survey_results["Postal Region"] == region)
|
||||
].groupby("Archetype ID")[["Current SAP Rating"]].mean().reset_index()
|
||||
region_surveyed["Current EPC Band"] = region_surveyed["Current SAP Rating"].apply(sap_to_epc)
|
||||
region_surveyed = region_surveyed.drop(columns=["Current SAP Rating"])
|
||||
region_surveyed = []
|
||||
for arch_id in archetypes:
|
||||
for _, property in region_assets[region_assets["Archetype ID"] == arch_id].iterrows():
|
||||
archetype_data = survey_results_with_original_features[
|
||||
survey_results["Archetype ID"] == arch_id
|
||||
].copy()
|
||||
if archetype_data.empty:
|
||||
continue
|
||||
archetype_data["distance_meters"] = haversine(
|
||||
lat1=property.latitude, lon1=property.longitude,
|
||||
lat2=archetype_data["latitude"].values, lon2=archetype_data["longitude"].values
|
||||
)
|
||||
expected_sap = np.average(
|
||||
archetype_data["Current SAP Rating"], weights=1 / (archetype_data["distance_meters"] + 1)
|
||||
)
|
||||
expected_epc = sap_to_epc(expected_sap)
|
||||
region_surveyed.append(
|
||||
{
|
||||
"Archetype ID": arch_id,
|
||||
"Address ID": property["Address ID"],
|
||||
"Current EPC Band": expected_epc
|
||||
}
|
||||
)
|
||||
|
||||
region_assets = region_assets.merge(
|
||||
region_surveyed,
|
||||
on="Archetype ID",
|
||||
how="left",
|
||||
suffixes=("", "_method1")
|
||||
)
|
||||
region_surveyed = pd.DataFrame(region_surveyed)
|
||||
region_assets = region_assets.merge(
|
||||
region_surveyed,
|
||||
on=["Archetype ID", "Address ID"],
|
||||
how="left",
|
||||
suffixes=("", "_method1")
|
||||
)
|
||||
else:
|
||||
region_assets = region_assets.merge(
|
||||
region_surveyed,
|
||||
on="Archetype ID",
|
||||
how="left",
|
||||
suffixes=("", "_method1")
|
||||
)
|
||||
|
||||
# Label the tier 1 properties
|
||||
region_assets["Confidence Tier"] = np.where(
|
||||
|
|
@ -1897,7 +1921,47 @@ def propsed_wave_3_sample():
|
|||
].copy()
|
||||
|
||||
if surveyed.empty:
|
||||
blah3
|
||||
# In this case, we do one additional check where we filter on everything the same apart from heating,
|
||||
# where we do a slightly more rough match
|
||||
surveyed = survey_results_with_original_features[
|
||||
(
|
||||
survey_results_with_original_features["Property Type"].str.split(":").str[0] ==
|
||||
property["Property Type"].split(":")[0]
|
||||
) &
|
||||
(
|
||||
survey_results_with_original_features["Wall Type"].str.split(":").str[0] ==
|
||||
property["Wall Type"].split(":")[0]
|
||||
) &
|
||||
(
|
||||
survey_results_with_original_features["Roof Type"].str.split(":").str[0] ==
|
||||
property["Roof Type"].split(":")[0]
|
||||
)
|
||||
].copy()
|
||||
|
||||
if "Electric" in property["Heating"]:
|
||||
# Take other electric heating systems
|
||||
surveyed = surveyed[surveyed["Heating"].str.contains("Electric")]
|
||||
elif property["Heating"] == "Community Heating Systems: Community boilers only (RdSAP)":
|
||||
# Take other community heating systems
|
||||
surveyed = surveyed[surveyed["Heating"].str.contains("Community")]
|
||||
elif property["Heating"] == 'Heat Pump: (from database)':
|
||||
# Take other heat pumps
|
||||
surveyed = surveyed[surveyed["Heating"].str.contains("Heat Pump")]
|
||||
elif property["Heating"] == "Solid fuel room heaters: Open fire in grate":
|
||||
# Take other properties with room heaters
|
||||
surveyed = surveyed[surveyed["Heating"].str.contains("room heaters")]
|
||||
else:
|
||||
raise Exception("Fix me")
|
||||
|
||||
if surveyed.empty:
|
||||
final_missed_matches.append(
|
||||
{
|
||||
"Address ID": a_id,
|
||||
"Confidence Tier": "4 - no similar property, needs survey to confirm",
|
||||
"Current EPC Band": "Needs Survey"
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
# Calculate distance
|
||||
surveyed["distance_meters"] = haversine(
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue