mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
fml fml
This commit is contained in:
parent
bee07a253b
commit
9b255029b3
1 changed files with 96 additions and 45 deletions
|
|
@ -20,6 +20,9 @@ from backend.ml_models.api import ModelApi
|
|||
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
|
||||
from recommendations.recommendation_utils import calculate_cavity_age
|
||||
from etl.epc.Record import EPCRecord
|
||||
from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
|
||||
from etl.epc.DataProcessor import EPCDataProcessor
|
||||
from datetime import datetime
|
||||
|
||||
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
|
||||
ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
|
||||
|
|
@ -5188,9 +5191,6 @@ def classify_loft(x):
|
|||
|
||||
|
||||
def fml_analysis(loader):
|
||||
from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
|
||||
from etl.epc.DataProcessor import EPCDataProcessor
|
||||
from datetime import datetime
|
||||
assumed_ciga_pass_rate = 0.731
|
||||
has_bruh = ["HA7", "HA14", "HA25", "HA39", "HA16"]
|
||||
|
||||
|
|
@ -5216,15 +5216,20 @@ def fml_analysis(loader):
|
|||
bucket_name="retrofit-datalake-dev",
|
||||
s3_file_name=f"ha-analysis/revised/{ha_name}/epc_data.pickle"
|
||||
)
|
||||
# We make sure we don't have duplicated. We do a super basic drop duplicates because it shouldn't be a huge
|
||||
# issue at this point
|
||||
epc_data = epc_data.drop_duplicates("uprn")
|
||||
|
||||
# time from the inspection to now
|
||||
epc_data["epc_age"] = (datetime.now() - pd.to_datetime(epc_data["inspection-date"])).dt.days
|
||||
if "estimated" not in epc_data.columns:
|
||||
epc_data["estimated"] = None
|
||||
# For all after HA7, we don't use estimated surveys
|
||||
epc_data["estimated"] = False
|
||||
|
||||
fuck_this = fml.merge(
|
||||
epc_data, how="left", on="asset_list_row_id"
|
||||
)
|
||||
fuck_this["estimated"] = fuck_this["estimated"].fillna(True)
|
||||
if fuck_this.shape[0] != fml.shape[0]:
|
||||
raise Exception("What the fuck bruv")
|
||||
|
||||
|
|
@ -5259,7 +5264,15 @@ def fml_analysis(loader):
|
|||
)
|
||||
insulation_thicknesses = pd.DataFrame(insulation_thicknesses)
|
||||
|
||||
before_merge_shape = fuck_this.shape[0]
|
||||
fuck_this = fuck_this.merge(insulation_thicknesses, how="left", on="uprn")
|
||||
|
||||
if fuck_this.shape[0] != before_merge_shape:
|
||||
raise Exception("SOMETHING WENT WRONG")
|
||||
|
||||
if any(fuck_this["ECO Eligibility"].str.contains("subject to archetype")):
|
||||
blah
|
||||
|
||||
# clean roof insulation
|
||||
fuck_this["roof_insulation_thickness"] = fuck_this["roof_insulation_thickness"].fillna("0")
|
||||
fuck_this["roof_insulation_thickness"] = fuck_this[
|
||||
|
|
@ -5283,7 +5296,7 @@ def fml_analysis(loader):
|
|||
#
|
||||
# fuck_this['age_lower_bound'] = fuck_this['construction-age-band'].apply(extract_lower_bound)
|
||||
|
||||
had_survey = fuck_this[pd.isnull(fuck_this["estimated"])]
|
||||
had_survey = fuck_this[fuck_this["estimated"] == False]
|
||||
|
||||
# proportion with a survey:
|
||||
proportion_with_survey = 100 * had_survey.shape[0] / fuck_this.shape[0]
|
||||
|
|
@ -5294,27 +5307,11 @@ def fml_analysis(loader):
|
|||
had_survey["ECO Eligibility"] == "eco4"
|
||||
]
|
||||
|
||||
# Walls:
|
||||
# Cavity wall, as built, insulated (assumed)
|
||||
# Cavity wall, as built, no insulation (assumed)
|
||||
# Cavity wall, as built, partial insulation (assumed)
|
||||
|
||||
# Roof:
|
||||
# Less than 100mm = high confidence
|
||||
# Less than 270mm & EPC at least 5 years old = medium confidence
|
||||
# Otherwise, low confidence
|
||||
|
||||
# SAP criteria is EPC C or below
|
||||
|
||||
# Pre is 54 or below
|
||||
|
||||
no_ciga_check_needed_with_archetype = no_ciga_check_needed[
|
||||
no_ciga_check_needed_eligible = no_ciga_check_needed[
|
||||
(no_ciga_check_needed["walls-description"].isin(no_ciga_cavity_descriptions)) &
|
||||
(no_ciga_check_needed["roof_classiciation"].isin(["high", "medium"])) &
|
||||
(no_ciga_check_needed["current-energy-efficiency"].astype(float) <= 80)
|
||||
]
|
||||
if not no_ciga_check_needed_with_archetype.empty:
|
||||
raise Exception("SORT ME OUT")
|
||||
|
||||
# Characterise no CIGA check needed
|
||||
|
||||
|
|
@ -5327,9 +5324,20 @@ def fml_analysis(loader):
|
|||
ciga_check_passed = had_survey[
|
||||
had_survey["ECO Eligibility"] == "eco4 - passed ciga"
|
||||
]
|
||||
# These should be treated the same as one that have passed their ciga checks, from a detection perspective
|
||||
ciga_check_passed_eligible = ciga_check_passed[
|
||||
(ciga_check_passed["walls-description"].str.lower().str.contains("cavity") == True) &
|
||||
(ciga_check_passed["roof_classiciation"].isin(["high", "medium"])) &
|
||||
(ciga_check_passed["current-energy-efficiency"].astype(float) <= 80)
|
||||
]
|
||||
|
||||
if not ciga_check_passed.empty:
|
||||
raise Exception("SORT ME BRUV")
|
||||
if not loader.data[ha_name]["ciga_list"].empty:
|
||||
|
||||
proportions = loader.data[ha_name]["ciga_list"]["Guarantee"].value_counts(normalize=True)
|
||||
ha_ciga_pass_rate = proportions[proportions.index == "No"].values[0]
|
||||
|
||||
else:
|
||||
ha_ciga_pass_rate = assumed_ciga_pass_rate
|
||||
|
||||
# We take just the cavity walls
|
||||
# UCL paper: https://discovery.ucl.ac.uk/id/eprint/10110371/
|
||||
|
|
@ -5338,53 +5346,96 @@ def fml_analysis(loader):
|
|||
# differ between variables; floor and wall type errors occur in ~10-15% of EPCs,
|
||||
# compared with ~5% for wall insulation and glazing performance
|
||||
|
||||
ciga_check_needed_plausible = ciga_check_needed[
|
||||
ciga_check_needed_eligible = ciga_check_needed[
|
||||
(ciga_check_needed["walls-description"].str.lower().str.contains("cavity") == True) &
|
||||
(ciga_check_needed["roof_classiciation"].isin(["high", "medium"])) &
|
||||
(ciga_check_needed["current-energy-efficiency"].astype(float) <= 80)
|
||||
]
|
||||
|
||||
if not loader.data[ha_name]["ciga_list"].empty:
|
||||
raise NotImplementedError("SORT OUT THE CIGA BRUV")
|
||||
else:
|
||||
ha_ciga_pass_rate = assumed_ciga_pass_rate
|
||||
|
||||
ciga_check_expectation = np.round(ciga_check_needed_plausible.shape[0] * ha_ciga_pass_rate)
|
||||
without_ciga_expectation = no_ciga_check_needed_with_archetype.shape[0]
|
||||
ciga_check_expectation = np.round(ciga_check_needed_eligible.shape[0] * ha_ciga_pass_rate)
|
||||
without_ciga_expectation = no_ciga_check_needed_eligible.shape[0]
|
||||
passed_ciga_expectation = ciga_check_passed_eligible.shape[0]
|
||||
|
||||
# Need to add on the non-ciga
|
||||
total_expectation = ciga_check_expectation + without_ciga_expectation
|
||||
total_expectation = ciga_check_expectation + without_ciga_expectation + passed_ciga_expectation
|
||||
|
||||
if proportion_with_survey < 100:
|
||||
# We estimate the rest
|
||||
without_survey_needing_ciga = fuck_this[
|
||||
(pd.isnull(fuck_this["estimated"]) == False) &
|
||||
(fuck_this["estimated"] == True) &
|
||||
(fuck_this["ECO Eligibility"].str.contains("subject to ciga") == True)
|
||||
]
|
||||
|
||||
# We apply the same conversion rate as the properties with a survey
|
||||
without_survey_without_ciga_expected = np.round(
|
||||
without_survey_needing_ciga.shape[0] * (ciga_check_expectation / ciga_check_needed.shape[0])
|
||||
)
|
||||
if without_survey_needing_ciga.empty:
|
||||
without_survey_without_ciga_expected = 0
|
||||
else:
|
||||
# We apply the same conversion rate as the properties with a survey
|
||||
without_survey_without_ciga_expected = np.round(
|
||||
without_survey_needing_ciga.shape[0] * (ciga_check_expectation / ciga_check_needed.shape[0])
|
||||
)
|
||||
|
||||
total_expectation += without_survey_without_ciga_expected
|
||||
|
||||
without_survey_without_ciga = fuck_this[
|
||||
(pd.isnull(fuck_this["estimated"]) == False) & (fuck_this["ECO Eligibility"].isin(["eco4"]))
|
||||
without_survey_passed_ciga = fuck_this[
|
||||
(fuck_this["estimated"] == True) &
|
||||
(fuck_this["ECO Eligibility"] == "eco4 - passed ciga")
|
||||
]
|
||||
|
||||
if not without_survey_without_ciga.empty:
|
||||
raise Exception("Estimate the rest!!")
|
||||
if without_survey_passed_ciga.empty:
|
||||
without_survey_passed_ciga_expected = 0
|
||||
else:
|
||||
# We apply the same conversion rate as the properties with a survey
|
||||
without_survey_passed_ciga_expected = np.round(
|
||||
without_survey_passed_ciga.shape[0] * (passed_ciga_expectation / ciga_check_passed.shape[0])
|
||||
)
|
||||
|
||||
# Finally, no ciga needed
|
||||
without_survey_eco4 = fuck_this[
|
||||
(fuck_this["estimated"] == True) &
|
||||
(fuck_this["ECO Eligibility"] == "eco4")
|
||||
]
|
||||
|
||||
if without_survey_eco4.empty:
|
||||
without_survey_eco4_expected = 0
|
||||
else:
|
||||
# We apply the same conversion rate as the properties with a survey
|
||||
without_survey_eco4_expected = np.round(
|
||||
without_survey_eco4.shape[0] * (without_ciga_expectation / no_ciga_check_needed.shape[0])
|
||||
)
|
||||
|
||||
total_expectation = (
|
||||
total_expectation +
|
||||
without_survey_without_ciga_expected +
|
||||
without_survey_passed_ciga_expected +
|
||||
without_survey_eco4_expected
|
||||
)
|
||||
|
||||
surveys = loader.data[ha_name]["survey_list"]
|
||||
sold_now = 0
|
||||
if not surveys.empty:
|
||||
sold_now = surveys[
|
||||
surveys["installation_status"].str.lower().str.contains("eco4")
|
||||
].shape[0]
|
||||
|
||||
sales_since_nov = sold_now - original_figures["No. of Tech surveys complete - Eco 4"].values[0]
|
||||
|
||||
results.append(
|
||||
{
|
||||
"HA Name": ha_name,
|
||||
"Original ECO4 Estimate - Remaining": original_remaining,
|
||||
"Of which sold": sales_since_nov,
|
||||
"Of which ECO4 Eligible - Remaining": int(total_expectation),
|
||||
"Proportion with a survey": proportion_with_survey,
|
||||
"total_expectation": total_expectation
|
||||
}
|
||||
)
|
||||
|
||||
results_df = pd.DataFrame(results)
|
||||
|
||||
results_df["Delta vs November"] = 100 * (
|
||||
results_df["Of which ECO4 Eligible - Remaining"] - results_df["Original ECO4 Estimate - Remaining"]
|
||||
) / results_df["Original ECO4 Estimate - Remaining"]
|
||||
|
||||
# TODO: Split into high and low confidence?
|
||||
#
|
||||
|
||||
|
||||
def app():
|
||||
"""
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue