mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
set up load data function for cancellation app
This commit is contained in:
parent
1bb188a8b8
commit
7969f51733
9 changed files with 234 additions and 91 deletions
2
.idea/Model.iml
generated
2
.idea/Model.iml
generated
|
|
@ -7,7 +7,7 @@
|
|||
<sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
|
||||
</content>
|
||||
<orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
|
||||
<orderEntry type="jdk" jdkName="Python 3.10 (backend)" jdkType="Python SDK" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
<component name="PyNamespacePackagesService">
|
||||
|
|
|
|||
2
.idea/misc.xml
generated
2
.idea/misc.xml
generated
|
|
@ -3,7 +3,7 @@
|
|||
<component name="Black">
|
||||
<option name="sdkName" value="Python 3.10 (backend)" />
|
||||
</component>
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (backend)" project-jdk-type="Python SDK" />
|
||||
<component name="PythonCompatibilityInspectionAdvertiser">
|
||||
<option name="version" value="3" />
|
||||
</component>
|
||||
|
|
|
|||
|
|
@ -358,9 +358,16 @@ def prepare_model_data_row(
|
|||
|
||||
p.get_components(cleaned, photo_supply_lookup=photo_supply_lookup,
|
||||
floor_area_decile_thresholds=floor_area_decile_thresholds)
|
||||
|
||||
# THIS IS TEMP AND SHOULDN'T BE HERE
|
||||
data_to_clean = p.get_model_data()
|
||||
if data_to_clean["NUMBER_HEATED_ROOMS"] in ['', None]:
|
||||
data_to_clean["NUMBER_HEATED_ROOMS"] = data_to_clean["NUMBER_HABITABLE_ROOMS"]
|
||||
p.data["number-heated-rooms"] = data_to_clean["NUMBER_HABITABLE_ROOMS"]
|
||||
|
||||
# This is temp - this should happen after scoring
|
||||
cleaned_property_data = DataProcessor.apply_averages_cleaning(
|
||||
data_to_clean=pd.DataFrame([dict(**p.get_model_data(), LOCAL_AUTHORITY=p.data["local-authority"])]),
|
||||
data_to_clean=pd.DataFrame([dict(**data_to_clean, LOCAL_AUTHORITY=p.data["local-authority"])]),
|
||||
cleaning_data=cleaning_data,
|
||||
cols_to_merge_on=['PROPERTY_TYPE', 'BUILT_FORM', 'CONSTRUCTION_AGE_BAND', 'LOCAL_AUTHORITY'],
|
||||
)
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@ from etl.epc.settings import COLUMNS_TO_MERGE_ON
|
|||
from backend.ml_models.api import ModelApi
|
||||
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
|
||||
from recommendations.recommendation_utils import calculate_cavity_age
|
||||
from recommendation_utils import convert_thickness_to_numeric
|
||||
|
||||
import re
|
||||
|
||||
|
|
@ -484,9 +485,6 @@ def analyse_results(results_df, data, survey_list):
|
|||
how="left", on="survey_key"
|
||||
)
|
||||
|
||||
from recommendation_utils import convert_thickness_to_numeric
|
||||
|
||||
analysis_data["roof_insulation_thickness"] = analysis_data["roof_insulation_thickness"].fillna(None)
|
||||
analysis_data["roof_insulation_thickness"] = np.where(
|
||||
pd.isnull(analysis_data["roof_insulation_thickness"]), None, analysis_data["roof_insulation_thickness"]
|
||||
)
|
||||
|
|
@ -497,13 +495,12 @@ def analyse_results(results_df, data, survey_list):
|
|||
warmfront_sold_eco4 = analysis_data[
|
||||
(analysis_data["warmfront_identified"] == True) & (
|
||||
analysis_data["funding_scheme"].isin(["ECO4 A/W", "AFFORDABLE WARMTH"]))
|
||||
]
|
||||
] # 1407
|
||||
|
||||
warmfront_sold_gbis = analysis_data[
|
||||
(analysis_data["warmfront_identified"] == True) & (
|
||||
analysis_data["funding_scheme"].isin(["ECO4 GBIS (ECO+)"]))
|
||||
]
|
||||
# 1407
|
||||
|
||||
ideal_eco4_warmfront_not_sold = analysis_data[
|
||||
(analysis_data["eco4_eligible"] == True) & (analysis_data["warmfront_identified"] == False) & (
|
||||
|
|
@ -519,7 +516,7 @@ def analyse_results(results_df, data, survey_list):
|
|||
underperforming_cavities = analysis_data[
|
||||
(analysis_data["eco4_message"] == "Failed due to full cavity - check cavity age") & (
|
||||
analysis_data["cavity_age"] > 10 * 365
|
||||
)
|
||||
) & (analysis_data["roof_insulation_thickness_numeric"] <= 100)
|
||||
]
|
||||
|
||||
identified_gbis_not_sold = analysis_data[
|
||||
|
|
@ -643,7 +640,7 @@ def app():
|
|||
|
||||
# Read pickle
|
||||
# import pickle
|
||||
# with open("ha16_8_jan_2.pickle", "rb") as f:
|
||||
# with open("ha16_10_jan.pickle", "rb") as f:
|
||||
# saved = pickle.load(f)
|
||||
# scoring_data = saved["scoring_data"]
|
||||
# results_df = saved["results"]
|
||||
|
|
|
|||
|
|
@ -17,6 +17,7 @@ from etl.epc.settings import COLUMNS_TO_MERGE_ON
|
|||
from backend.ml_models.api import ModelApi
|
||||
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
|
||||
from recommendations.recommendation_utils import calculate_cavity_age
|
||||
from recommendation_utils import convert_thickness_to_numeric
|
||||
|
||||
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
|
||||
ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
|
||||
|
|
@ -393,6 +394,42 @@ def analyse_results(results_df, data, survey_list):
|
|||
how="left", on="survey_key"
|
||||
)
|
||||
|
||||
# NEW
|
||||
|
||||
analysis_data["roof_insulation_thickness"] = np.where(
|
||||
pd.isnull(analysis_data["roof_insulation_thickness"]), None, analysis_data["roof_insulation_thickness"]
|
||||
)
|
||||
analysis_data["roof_insulation_thickness_numeric"] = analysis_data["roof_insulation_thickness"].apply(
|
||||
lambda x: convert_thickness_to_numeric(x, is_flat=False, is_pitched=True)
|
||||
)
|
||||
|
||||
warmfront_sold_eco4 = analysis_data[
|
||||
(analysis_data["warmfront_identified"] == True) & (
|
||||
analysis_data["funding_scheme"].isin(["ECO4 A/W", "AFFORDABLE WARMTH"]))
|
||||
]
|
||||
|
||||
warmfront_sold_gbis = analysis_data[
|
||||
(analysis_data["warmfront_identified"] == True) & (
|
||||
analysis_data["funding_scheme"].isin(["ECO4 GBIS (ECO+)"]))
|
||||
]
|
||||
# 1407
|
||||
|
||||
additional_eco4_warmfront_not_sold = analysis_data[
|
||||
(analysis_data["eco4_eligible"] == True) & (analysis_data["warmfront_identified"] == False) & (
|
||||
analysis_data["roof_insulation_thickness_numeric"] <= 100)
|
||||
]
|
||||
|
||||
additional_gbis_warmfront_not_sold = analysis_data[
|
||||
(analysis_data["gbis_eligible"] == True) & (analysis_data["warmfront_identified"] == False) & (
|
||||
~analysis_data["row_id"].isin(additional_eco4_warmfront_not_sold["row_id"].values)
|
||||
)
|
||||
]
|
||||
|
||||
additional_gbis_warmfront_not_sold["walls"].value_counts()
|
||||
analysis_data["walls"].value_counts()
|
||||
|
||||
# END NEW
|
||||
|
||||
all_identified_eco = analysis_data[
|
||||
(analysis_data["warmfront_identified"] & analysis_data["funding_scheme"].isin(
|
||||
["ECO4 A/W"])) |
|
||||
|
|
@ -480,7 +517,7 @@ def app():
|
|||
|
||||
# Read in pickle
|
||||
# import pickle
|
||||
# with open("ha24_8_jan.pickle", "rb") as f:
|
||||
# with open("ha24_10_jan.pickle", "rb") as f:
|
||||
# saved = pickle.load(f)
|
||||
# scoring_data = saved["scoring_data"]
|
||||
# results_df = saved["results"]
|
||||
|
|
|
|||
|
|
@ -17,6 +17,8 @@ from etl.epc.DataProcessor import DataProcessor
|
|||
from etl.epc.settings import COLUMNS_TO_MERGE_ON
|
||||
from backend.ml_models.api import ModelApi
|
||||
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
|
||||
from recommendations.recommendation_utils import calculate_cavity_age
|
||||
from recommendation_utils import convert_thickness_to_numeric
|
||||
|
||||
import re
|
||||
|
||||
|
|
@ -341,7 +343,7 @@ def get_epc_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup,
|
|||
"Guest Room": {"property-type": None, "built-form": None}
|
||||
}
|
||||
|
||||
for _, property_meta in tqdm(data.iterrows(), total=len(data)):
|
||||
for _, property_meta in tqdm(data, total=len(data)):
|
||||
|
||||
searcher = SearchEpc(
|
||||
address1=property_meta["HouseNo"],
|
||||
|
|
@ -368,22 +370,35 @@ def get_epc_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup,
|
|||
older_epcs = searcher.older_epcs
|
||||
full_sap_epc = searcher.full_sap_epc
|
||||
# We also want to get the penultimate epc
|
||||
penultimate_epc, _ = searcher.filter_newest_epc(older_epcs)
|
||||
if not penultimate_epc:
|
||||
penultimate_epc = newest_epc
|
||||
# penultimate_epc, _ = searcher.filter_newest_epc(older_epcs)
|
||||
# if not penultimate_epc:
|
||||
# penultimate_epc = newest_epc
|
||||
|
||||
eligibility = Eligibility(epc=newest_epc, cleaned=cleaned)
|
||||
eligibility.check_gbis_warmfront()
|
||||
eligibility.check_eco4_warmfront()
|
||||
|
||||
if (not eligibility.eco4_warmfront["eligible"]) and (not eligibility.gbis_warmfront):
|
||||
eligibility = Eligibility(epc=penultimate_epc, cleaned=cleaned)
|
||||
eligibility.check_gbis_warmfront()
|
||||
eligibility.check_eco4_warmfront()
|
||||
# If this is the case, we need to update the older epcs
|
||||
# We don't update just to make data cleaning easier
|
||||
if penultimate_epc.get("estimated") is None:
|
||||
older_epcs = [x for x in searcher.data["rows"] if x["lmk-key"] != penultimate_epc["lmk-key"]]
|
||||
# if (not eligibility.eco4_warmfront["eligible"]) and (not eligibility.gbis_warmfront):
|
||||
# eligibility = Eligibility(epc=penultimate_epc, cleaned=cleaned)
|
||||
# eligibility.check_gbis_warmfront()
|
||||
# eligibility.check_eco4_warmfront()
|
||||
# # If this is the case, we need to update the older epcs
|
||||
# # We don't update just to make data cleaning easier
|
||||
# if penultimate_epc.get("estimated") is None:
|
||||
# older_epcs = [x for x in searcher.data["rows"] if x["lmk-key"] != penultimate_epc["lmk-key"]]
|
||||
|
||||
# If the property is a cavity wall and it's filled, we produce an estimate for the age of the cavity
|
||||
|
||||
# Loft MUST be suitable
|
||||
cavity_age = None
|
||||
if (
|
||||
eligibility.walls["is_cavity_wall"] and
|
||||
eligibility.walls["is_filled_cavity"] and
|
||||
eligibility.loft["suitability"] and
|
||||
eligibility.eco4_warmfront["message"] == "Failed due to full cavity - check cavity age"
|
||||
):
|
||||
# We check the age of the cavity and if it's particularly old, we flag it
|
||||
cavity_age = calculate_cavity_age(newest_epc, older_epcs, cleaned)
|
||||
|
||||
# Full checks
|
||||
eligibility.check_gbis()
|
||||
|
|
@ -396,6 +411,15 @@ def get_epc_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup,
|
|||
if eligibility.epc["construction-age-band"] in ["", None]:
|
||||
eligibility.epc["construction-age-band"] = map_year_to_age_band(property_meta["Build Yr"])
|
||||
|
||||
# This is not the right place to do this but this is temp
|
||||
if eligibility.epc["extension-count"] in ["", None]:
|
||||
eligibility.epc["extension-count"] = 0
|
||||
|
||||
# Not in the right place but temp
|
||||
if eligibility.epc["built-form"] in ["", None]:
|
||||
if not older_epcs:
|
||||
eligibility.epc["built-form"] = "Mid-Terrace"
|
||||
|
||||
scoring_dictionary = prepare_model_data_row(
|
||||
property_id=property_meta["row_id"],
|
||||
modelling_epc=eligibility.epc,
|
||||
|
|
@ -431,6 +455,9 @@ def get_epc_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup,
|
|||
"heating": eligibility.epc["mainheat-description"],
|
||||
"tenure": eligibility.tenure,
|
||||
"date_epc": eligibility.epc["lodgement-date"],
|
||||
"cavity_age": cavity_age,
|
||||
**eligibility.walls,
|
||||
**eligibility.roof,
|
||||
}
|
||||
)
|
||||
|
||||
|
|
@ -657,6 +684,8 @@ def get_epc_data_for_lost_surveys(
|
|||
"heating": eligibility.epc["mainheat-description"],
|
||||
"tenure": eligibility.tenure,
|
||||
"date_epc": eligibility.epc["lodgement-date"],
|
||||
**eligibility.walls,
|
||||
**eligibility.roof,
|
||||
}
|
||||
)
|
||||
|
||||
|
|
@ -758,58 +787,51 @@ def analyse_results(results_df, data, eco4_prospects_survey_list):
|
|||
results_df, how="left", on="row_id"
|
||||
)
|
||||
|
||||
warmfront_identified = analysis_data[analysis_data["warmfront_identified"]]
|
||||
# NEW
|
||||
analysis_data["roof_insulation_thickness"] = np.where(
|
||||
pd.isnull(analysis_data["roof_insulation_thickness"]), None, analysis_data["roof_insulation_thickness"]
|
||||
)
|
||||
analysis_data["roof_insulation_thickness_numeric"] = analysis_data["roof_insulation_thickness"].apply(
|
||||
lambda x: convert_thickness_to_numeric(x, is_flat=False, is_pitched=True)
|
||||
)
|
||||
|
||||
identified_eco = analysis_data[analysis_data["eco4_eligible"] == True]
|
||||
identified_eco = identified_eco[identified_eco["eco4_message"] == "subject to post retrofit sap"]
|
||||
warmfront_identified = analysis_data[
|
||||
(analysis_data["warmfront_identified"] == True)
|
||||
] # 2204
|
||||
|
||||
identified_gbis = analysis_data[
|
||||
(analysis_data["gbis_eligible"] == True) & (analysis_data["eco4_eligible"] == False)
|
||||
# Because we don't know which property is for which scheme, we'll just look at what we found
|
||||
ideal_eco4 = analysis_data[
|
||||
(analysis_data["eco4_eligible"] == True) &
|
||||
(analysis_data["roof_insulation_thickness_numeric"] <= 100) &
|
||||
(analysis_data["sap"] <= 54)
|
||||
] # 335
|
||||
|
||||
gbis = analysis_data[
|
||||
(analysis_data["gbis_eligible"] == True) &
|
||||
~analysis_data["row_id"].isin(ideal_eco4["row_id"].values)
|
||||
]
|
||||
|
||||
# Take just unfilled cavities and remove filled potentials
|
||||
identified_gbis["walls"].value_counts()
|
||||
|
||||
identified_gbis["walls"].value_counts()
|
||||
|
||||
# Of the ECO jobs, what proportion to we get right
|
||||
|
||||
success_rate = (warmfront_identified["eco4_eligible"] | warmfront_identified["gbis_eligible"]).sum() / \
|
||||
warmfront_identified.shape[
|
||||
0]
|
||||
|
||||
# No gbis for this
|
||||
# gbis_success_rate = warmfront_identified_gbis["gbis_eligible"].sum() / warmfront_identified_gbis.shape[0]
|
||||
|
||||
# Additional identified
|
||||
additional_identified_eco = analysis_data[
|
||||
(analysis_data["eco4_eligible"] == True) & (analysis_data["warmfront_identified"] == False)
|
||||
]
|
||||
|
||||
additional_identified_eco["eligibility_classification"].value_counts()
|
||||
|
||||
additional_identified_gbis = analysis_data[
|
||||
(analysis_data["gbis_eligible"] == True) & (analysis_data["eco4_eligible"] == False) & (
|
||||
analysis_data["warmfront_identified"] == False
|
||||
)
|
||||
].shape[0]
|
||||
|
||||
# Future
|
||||
additional_identified_eco_future = analysis_data[
|
||||
(analysis_data["eco4_eligible_future"] == True) & (analysis_data["warmfront_identified"] == False)
|
||||
].shape[0]
|
||||
additional_identified_gbis_future = analysis_data[
|
||||
(analysis_data["gbis_eligible_future"] == True) & (analysis_data["eco4_eligible_future"] == False) & (
|
||||
analysis_data["warmfront_identified"] == False
|
||||
)
|
||||
].shape[0]
|
||||
ideal_eco4 = ideal_eco4[ideal_eco4["sap"] <= 54]
|
||||
|
||||
|
||||
def analyse_lost_surveys(results_df):
|
||||
identified_eco = results_df[results_df["eco4_eligible"] == True]
|
||||
# 59 for lost surveys
|
||||
identified_gbis = results_df[results_df["gbis_eligible"] == True]
|
||||
# 107
|
||||
results_df["roof_insulation_thickness"] = np.where(
|
||||
pd.isnull(results_df["roof_insulation_thickness"]), None, results_df["roof_insulation_thickness"]
|
||||
)
|
||||
results_df["roof_insulation_thickness_numeric"] = results_df["roof_insulation_thickness"].apply(
|
||||
lambda x: convert_thickness_to_numeric(x, is_flat=False, is_pitched=True)
|
||||
)
|
||||
|
||||
ideal_eco4 = results_df[
|
||||
(results_df["eco4_eligible"] == True) &
|
||||
(results_df["roof_insulation_thickness_numeric"] <= 100) &
|
||||
(results_df["sap"] <= 54)
|
||||
] # 25
|
||||
|
||||
gbis = results_df[
|
||||
(results_df["gbis_eligible"] == True) &
|
||||
~results_df["row_id"].isin(ideal_eco4["row_id"].values)
|
||||
] # 82
|
||||
|
||||
|
||||
def app():
|
||||
|
|
@ -837,7 +859,7 @@ def app():
|
|||
# Pickle the outputs
|
||||
# Old data was ha25.pickle
|
||||
# import pickle
|
||||
# with open("ha25_9_jan.pickle", "wb") as f:
|
||||
# with open("ha25_10_jan.pickle", "wb") as f:
|
||||
# pickle.dump(
|
||||
# {
|
||||
# "results_df": results_df,
|
||||
|
|
@ -848,9 +870,9 @@ def app():
|
|||
# )
|
||||
|
||||
# Load in pickle
|
||||
# import pickle
|
||||
# with open("ha25_9_jan.pickle", "rb") as f:
|
||||
# saved = pickle.load(f)
|
||||
# results_df = saved["results_df"]
|
||||
# scoring_data = saved["scoring_data"]
|
||||
# nodata = saved["nodata"]
|
||||
import pickle
|
||||
with open("ha25_10_jan.pickle", "rb") as f:
|
||||
saved = pickle.load(f)
|
||||
results_df = saved["results_df"]
|
||||
scoring_data = saved["scoring_data"]
|
||||
nodata = saved["nodata"]
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
import os
|
||||
import msgpack
|
||||
from pathlib import Path
|
||||
from datetime import datetime
|
||||
|
|
@ -6,7 +7,7 @@ import pandas as pd
|
|||
from utils.s3 import read_from_s3
|
||||
from utils.logger import setup_logger
|
||||
from dotenv import load_dotenv
|
||||
from backend.app.utils import read_parquet_from_s3
|
||||
from utils.s3 import read_dataframe_from_s3_parquet
|
||||
from tqdm import tqdm
|
||||
from backend.SearchEpc import SearchEpc
|
||||
from etl.eligibility.Eligibility import Eligibility
|
||||
|
|
@ -14,9 +15,13 @@ from etl.eligibility.ha_15_32.app import prepare_model_data_row
|
|||
from etl.epc.DataProcessor import DataProcessor
|
||||
from etl.epc.settings import COLUMNS_TO_MERGE_ON
|
||||
from backend.ml_models.api import ModelApi
|
||||
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
|
||||
from recommendations.recommendation_utils import calculate_cavity_age
|
||||
from recommendation_utils import convert_thickness_to_numeric
|
||||
|
||||
import re
|
||||
|
||||
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
|
||||
ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
|
||||
|
||||
logger = setup_logger()
|
||||
|
|
@ -52,7 +57,7 @@ def standardise_ha_4(data):
|
|||
return data
|
||||
|
||||
|
||||
def get_ha_4_data(data, cleaned, cleaning_data, created_at):
|
||||
def get_ha_4_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup, floor_area_decile_thresholds):
|
||||
scoring_data = []
|
||||
results = []
|
||||
nodata = []
|
||||
|
|
@ -62,19 +67,33 @@ def get_ha_4_data(data, cleaned, cleaning_data, created_at):
|
|||
searcher = SearchEpc(
|
||||
address1=property_meta["Address Line 1"],
|
||||
postcode=property_meta["Post Code"],
|
||||
size=1000
|
||||
auth_token=EPC_AUTH_TOKEN,
|
||||
os_api_key=None,
|
||||
property_type=property_type_lookup.get(house["Archetype"]),
|
||||
)
|
||||
|
||||
searcher.search()
|
||||
searcher.find_property(skip_os=True)
|
||||
|
||||
if searcher.data is None:
|
||||
if searcher.newest_epc is None:
|
||||
searcher = SearchEpc(
|
||||
address1=property_meta["Location Name"],
|
||||
postcode=property_meta["Post Code"],
|
||||
size=1000
|
||||
auth_token=EPC_AUTH_TOKEN,
|
||||
os_api_key=None,
|
||||
property_type=property_type_lookup.get(house["Archetype"]),
|
||||
)
|
||||
searcher.search()
|
||||
|
||||
if searcher.newest_epc is None:
|
||||
nodata.append(house["row_id"])
|
||||
continue
|
||||
|
||||
newest_epc = searcher.newest_epc
|
||||
older_epcs = searcher.older_epcs
|
||||
full_sap_epc = searcher.full_sap_epc
|
||||
|
||||
searcher.search()
|
||||
|
||||
if searcher.data is None:
|
||||
nodata.append(property_meta.to_dict())
|
||||
continue
|
||||
|
|
@ -273,17 +292,21 @@ def app():
|
|||
)
|
||||
cleaned = msgpack.unpackb(cleaned, raw=False)
|
||||
|
||||
cleaning_data = read_parquet_from_s3(
|
||||
cleaning_data = read_dataframe_from_s3_parquet(
|
||||
bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
|
||||
)
|
||||
|
||||
created_at = datetime.now().isoformat()
|
||||
|
||||
photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
|
||||
|
||||
results_df, scoring_data, nodata = get_ha_4_data(
|
||||
data=data,
|
||||
cleaned=cleaned,
|
||||
cleaning_data=cleaning_data,
|
||||
created_at=created_at
|
||||
created_at=created_at,
|
||||
photo_supply_lookup=photo_supply_lookup,
|
||||
floor_area_decile_thresholds=floor_area_decile_thresholds
|
||||
)
|
||||
|
||||
# Store the data locally as a pickle
|
||||
|
|
|
|||
|
|
@ -17,6 +17,8 @@ from etl.epc.DataProcessor import DataProcessor
|
|||
from etl.epc.settings import COLUMNS_TO_MERGE_ON
|
||||
from backend.ml_models.api import ModelApi
|
||||
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
|
||||
from recommendations.recommendation_utils import calculate_cavity_age
|
||||
from recommendation_utils import convert_thickness_to_numeric
|
||||
|
||||
ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
|
||||
|
||||
|
|
@ -112,6 +114,19 @@ def get_ha7_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup,
|
|||
eligibility.check_gbis_warmfront()
|
||||
eligibility.check_eco4_warmfront()
|
||||
|
||||
# If the property is a cavity wall and it's filled, we produce an estimate for the age of the cavity
|
||||
|
||||
# Loft MUST be suitable
|
||||
cavity_age = None
|
||||
if (
|
||||
eligibility.walls["is_cavity_wall"] and
|
||||
eligibility.walls["is_filled_cavity"] and
|
||||
eligibility.loft["suitability"] and
|
||||
eligibility.eco4_warmfront["message"] == "Failed due to full cavity - check cavity age"
|
||||
):
|
||||
# We check the age of the cavity and if it's particularly old, we flag it
|
||||
cavity_age = calculate_cavity_age(newest_epc, older_epcs, cleaned)
|
||||
|
||||
# If the house is not identified, we do a full gbis and eco4 check
|
||||
eligibility.check_gbis()
|
||||
eligibility.check_eco4()
|
||||
|
|
@ -151,6 +166,9 @@ def get_ha7_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup,
|
|||
"tenure": eligibility.tenure,
|
||||
"date_epc": eligibility.epc["lodgement-date"],
|
||||
**newest_epc,
|
||||
"cavity_age": cavity_age,
|
||||
**eligibility.walls,
|
||||
**eligibility.roof,
|
||||
}
|
||||
)
|
||||
|
||||
|
|
@ -250,21 +268,56 @@ def get_ha7_data(data, cleaned, cleaning_data, created_at, photo_supply_lookup,
|
|||
|
||||
|
||||
def analyse_ha_7(results_df, data):
|
||||
df = results_df.merge(
|
||||
analysis_data = results_df.merge(
|
||||
data[["row_id", "row_code", "Property Type", "Construction Year Band"]], how="left", on="row_id"
|
||||
)
|
||||
warmfront_identification = df["row_code"].value_counts()
|
||||
warmfront_identified = df[df["row_code"] == "potential ECO4"]
|
||||
|
||||
# NEW
|
||||
|
||||
analysis_data["roof_insulation_thickness"] = np.where(
|
||||
pd.isnull(analysis_data["roof_insulation_thickness"]), None, analysis_data["roof_insulation_thickness"]
|
||||
)
|
||||
analysis_data["roof_insulation_thickness_numeric"] = analysis_data["roof_insulation_thickness"].apply(
|
||||
lambda x: convert_thickness_to_numeric(x, is_flat=False, is_pitched=True)
|
||||
)
|
||||
|
||||
ideal_eco4 = analysis_data[
|
||||
(analysis_data["eco4_eligible"] == True) & (
|
||||
analysis_data["roof_insulation_thickness_numeric"] <= 100)
|
||||
]
|
||||
|
||||
secondary_eco4_warmfront_not_sold = analysis_data[
|
||||
(analysis_data["eco4_eligible"] == True) & (
|
||||
analysis_data["roof_insulation_thickness_numeric"] > 100)
|
||||
]
|
||||
|
||||
# underperforming cavities
|
||||
underperforming_cavities = analysis_data[
|
||||
(analysis_data["eco4_message"] == "Failed due to full cavity - check cavity age") & (
|
||||
analysis_data["cavity_age"] > 9 * 365
|
||||
) & (analysis_data["roof_insulation_thickness_numeric"] <= 100)
|
||||
]
|
||||
|
||||
identified_gbis_not_sold = analysis_data[
|
||||
(analysis_data["gbis_eligible"] == True) & (
|
||||
analysis_data["eco4_eligible"] == False
|
||||
)
|
||||
]
|
||||
|
||||
# END NEW
|
||||
|
||||
warmfront_identification = analysis_data["row_code"].value_counts()
|
||||
warmfront_identified = analysis_data[analysis_data["row_code"] == "potential ECO4"]
|
||||
warmfront_identified["walls"].value_counts(normalize=True)
|
||||
|
||||
df["Construction Year Band"].value_counts(normalize=True)
|
||||
analysis_data["Construction Year Band"].value_counts(normalize=True)
|
||||
|
||||
# Number of days from today
|
||||
|
||||
days_to_today = (datetime.now() - pd.to_datetime(warmfront_identified["date_epc"])).dt.days
|
||||
days_to_today.mean()
|
||||
|
||||
property_types = df["Property Type"].value_counts()
|
||||
property_types = analysis_data["Property Type"].value_counts()
|
||||
|
||||
n_identified = (results_df["gbis_eligible"] | results_df["eco4_eligible"]).sum()
|
||||
|
||||
|
|
@ -312,12 +365,12 @@ def app():
|
|||
|
||||
# Pickle results
|
||||
# import pickle
|
||||
# with open("ha7_results.pkl", "wb") as f:
|
||||
# with open("ha7_results_jan_10.pkl", "wb") as f:
|
||||
# pickle.dump({"results_df": results_df, "scoring_data": scoring_data, "nodata": nodata}, f)
|
||||
|
||||
# Read in the old data
|
||||
# import pickle
|
||||
# with open("ha7_results.pkl", "rb") as f:
|
||||
# with open("ha7_results_jan_10.pkl", "rb") as f:
|
||||
# old_data = pickle.load(f)
|
||||
# results_df = old_data["results_df"]
|
||||
# scoring_data = old_data["scoring_data"]
|
||||
|
|
|
|||
|
|
@ -176,12 +176,16 @@ class Costs:
|
|||
"""
|
||||
material_cost_per_m2 = material["material_cost"]
|
||||
|
||||
# We inflate material costs due to recent price increases
|
||||
material_cost_per_m2 = material_cost_per_m2 * 1.5
|
||||
|
||||
base_material_cost = material_cost_per_m2 * floor_area
|
||||
labour_cost = material["labour_cost"] * floor_area * self.labour_adjustment_factor
|
||||
|
||||
subtotal_before_profit = base_material_cost + labour_cost
|
||||
|
||||
contingency_cost = subtotal_before_profit * self.CONTINGENCY
|
||||
# We use high risk contingency because of the possibility of access issues and clearing existing insulation
|
||||
contingency_cost = subtotal_before_profit * self.HIGH_RISK_CONTINGENCY
|
||||
preliminaries_cost = subtotal_before_profit * self.PRELIMINARIES
|
||||
profit_cost = subtotal_before_profit * self.PROFIT_MARGIN
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue