mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
working on stonewater alg
This commit is contained in:
parent
2158ab2cd5
commit
4d021f0ba6
1 changed files with 86 additions and 16 deletions
|
|
@ -3,6 +3,7 @@ import PyPDF2
|
|||
import re
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from docutils.utils.math.tex2mathml_extern import blahtexml
|
||||
from tqdm import tqdm
|
||||
from collections import Counter
|
||||
|
||||
|
|
@ -1681,19 +1682,15 @@ def propsed_wave_3_sample():
|
|||
for region in tqdm(unique_postal_regions):
|
||||
# Take all of the properties in that region
|
||||
region_assets = asset_list[asset_list["Postal Region"] == region].copy()
|
||||
archetypes = region_assets["Archetype ID"].unique()
|
||||
# We get the properties that have been surveyed
|
||||
region_surveyed = survey_results[
|
||||
survey_results["Archetype ID"].isin(archetypes) &
|
||||
(survey_results["Postal Region"] == region)
|
||||
][["Archetype ID", "Current EPC Band"]].drop_duplicates()
|
||||
|
||||
if region_surveyed["Archetype ID"].duplicated().sum():
|
||||
raise NotImplementedError("Fix me")
|
||||
# We have a tier 1 match if the property itself was surveyed
|
||||
exact_surveyed = survey_results[
|
||||
survey_results["Address ID"].isin(region_assets["Address ID"])
|
||||
]
|
||||
|
||||
region_assets = region_assets.merge(
|
||||
region_surveyed,
|
||||
on="Archetype ID",
|
||||
exact_surveyed[["Address ID", "Current EPC Band"]],
|
||||
on="Address ID",
|
||||
how="left"
|
||||
)
|
||||
|
||||
|
|
@ -1701,22 +1698,95 @@ def propsed_wave_3_sample():
|
|||
region_assets["Confidence Tier"] = None
|
||||
region_assets["Confidence Tier"] = np.where(
|
||||
region_assets["Current EPC Band"].isin(["D", "E", "F", "G"]),
|
||||
"1", region_assets["Confidence Tier"]
|
||||
"1 - property was surveyed", region_assets["Confidence Tier"]
|
||||
)
|
||||
# TODO: Turn into a function
|
||||
missed_archetypes = set(archetypes) - set(region_surveyed["Archetype ID"])
|
||||
|
||||
region_assets["Confidence Tier"] = np.where(
|
||||
region_assets["Current EPC Band"].isin(["C", "B", "A"]),
|
||||
"6 - property was surveyed", region_assets["Confidence Tier"]
|
||||
)
|
||||
|
||||
archetypes = region_assets[
|
||||
pd.isnull(region_assets["Confidence Tier"])
|
||||
]["Archetype ID"].unique()
|
||||
# We get the properties that have been surveyed
|
||||
region_surveyed = survey_results[
|
||||
survey_results["Archetype ID"].isin(missed_archetypes)
|
||||
][["Archetype ID", "Current EPC Band"]].drop_duplicates()
|
||||
survey_results["Archetype ID"].isin(archetypes) &
|
||||
(survey_results["Postal Region"] == region)
|
||||
][["Archetype ID", "Current EPC Band"]].drop_duplicates()
|
||||
|
||||
if region_surveyed["Archetype ID"].duplicated().sum():
|
||||
raise NotImplementedError("Fix me 2")
|
||||
# Take the duplicated archetypes
|
||||
duplicated_archetypes = region_surveyed[
|
||||
region_surveyed["Archetype ID"].duplicated()
|
||||
]["Archetype ID"].unique()
|
||||
duplicated_archetypes = region_surveyed[
|
||||
region_surveyed["Archetype ID"].isin(duplicated_archetypes)
|
||||
]
|
||||
|
||||
# We need to select which one is the most relevant to these properties
|
||||
survey_data = survey_results_with_original_features[
|
||||
survey_results_with_original_features["Archetype ID"].isin(duplicated_archetypes["Archetype ID"].values)
|
||||
]
|
||||
|
||||
raise NotImplementedError("Fix me")
|
||||
|
||||
region_assets = region_assets.merge(
|
||||
region_surveyed,
|
||||
on="Archetype ID",
|
||||
how="left",
|
||||
suffixes=("", "_method1")
|
||||
)
|
||||
|
||||
# Label the tier 1 properties
|
||||
region_assets["Confidence Tier"] = np.where(
|
||||
region_assets["Current EPC Band_method1"].isin(["D", "E", "F", "G"]) &
|
||||
pd.isnull(region_assets["Confidence Tier"]),
|
||||
"1 - Archetype surveyed", region_assets["Confidence Tier"]
|
||||
)
|
||||
region_assets = region_assets.drop(columns=["Current EPC Band_method1"])
|
||||
# TODO: Turn into a function
|
||||
missed_archetypes = set(archetypes) - set(region_surveyed["Archetype ID"])
|
||||
|
||||
archetype_surveyed = survey_results[
|
||||
survey_results["Archetype ID"].isin(missed_archetypes)
|
||||
][["Archetype ID", "Current EPC Band"]].drop_duplicates()
|
||||
|
||||
if archetype_surveyed["Archetype ID"].duplicated().sum():
|
||||
# We need to select which one is the most relevant to these properties
|
||||
duplicated_archetypes = archetype_surveyed[
|
||||
archetype_surveyed["Archetype ID"].duplicated()
|
||||
]["Archetype ID"].unique()
|
||||
|
||||
survey_data = survey_results_with_original_features[
|
||||
survey_results_with_original_features["Archetype ID"].isin(duplicated_archetypes)
|
||||
]
|
||||
|
||||
homes_with_these_archetypes = region_assets[
|
||||
region_assets["Archetype ID"].isin(duplicated_archetypes)
|
||||
]
|
||||
|
||||
for _, home in homes_with_these_archetypes.iterrows():
|
||||
first_filter = survey_data[
|
||||
(survey_data["Postal Region"] == home["Postal Region"]) &
|
||||
(survey_data["Property Type"] == home["Property Type"]) &
|
||||
(survey_data["Wall Type"].str.split(":").str[0] == home["Wall Type"].split(":")[0])
|
||||
]
|
||||
|
||||
if not first_filter.empty:
|
||||
NotImplementedError("Fix me 0")
|
||||
|
||||
second_filter = survey_data[
|
||||
(survey_data["Property Type"].str.split(":").str[0] == home["Property Type"].split(":")[0]) &
|
||||
(survey_data["Wall Type"].str.split(":").str[0] == home["Wall Type"].split(":")[0])
|
||||
]
|
||||
|
||||
raise NotImplementedError("Fix me 2")
|
||||
|
||||
region_assets = region_assets.merge(
|
||||
archetype_surveyed,
|
||||
on="Archetype ID",
|
||||
how="left",
|
||||
suffixes=("", "_method2")
|
||||
)
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue