This commit is contained in:
Khalim Conn-Kowlessar 2024-03-14 17:36:09 +00:00
parent bee07a253b
commit 9b255029b3

View file

@ -20,6 +20,9 @@ from backend.ml_models.api import ModelApi
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
from recommendations.recommendation_utils import calculate_cavity_age
from etl.epc.Record import EPCRecord
from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
from etl.epc.DataProcessor import EPCDataProcessor
from datetime import datetime
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
ENV_FILE = Path(__file__).parent / "etl" / "eligibility" / "ha_15_32" / ".env"
@ -5188,9 +5191,6 @@ def classify_loft(x):
def fml_analysis(loader):
from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
from etl.epc.DataProcessor import EPCDataProcessor
from datetime import datetime
assumed_ciga_pass_rate = 0.731
has_bruh = ["HA7", "HA14", "HA25", "HA39", "HA16"]
@ -5216,15 +5216,20 @@ def fml_analysis(loader):
bucket_name="retrofit-datalake-dev",
s3_file_name=f"ha-analysis/revised/{ha_name}/epc_data.pickle"
)
# We make sure we don't have duplicated. We do a super basic drop duplicates because it shouldn't be a huge
# issue at this point
epc_data = epc_data.drop_duplicates("uprn")
# time from the inspection to now
epc_data["epc_age"] = (datetime.now() - pd.to_datetime(epc_data["inspection-date"])).dt.days
if "estimated" not in epc_data.columns:
epc_data["estimated"] = None
# For all after HA7, we don't use estimated surveys
epc_data["estimated"] = False
fuck_this = fml.merge(
epc_data, how="left", on="asset_list_row_id"
)
fuck_this["estimated"] = fuck_this["estimated"].fillna(True)
if fuck_this.shape[0] != fml.shape[0]:
raise Exception("What the fuck bruv")
@ -5259,7 +5264,15 @@ def fml_analysis(loader):
)
insulation_thicknesses = pd.DataFrame(insulation_thicknesses)
before_merge_shape = fuck_this.shape[0]
fuck_this = fuck_this.merge(insulation_thicknesses, how="left", on="uprn")
if fuck_this.shape[0] != before_merge_shape:
raise Exception("SOMETHING WENT WRONG")
if any(fuck_this["ECO Eligibility"].str.contains("subject to archetype")):
blah
# clean roof insulation
fuck_this["roof_insulation_thickness"] = fuck_this["roof_insulation_thickness"].fillna("0")
fuck_this["roof_insulation_thickness"] = fuck_this[
@ -5283,7 +5296,7 @@ def fml_analysis(loader):
#
# fuck_this['age_lower_bound'] = fuck_this['construction-age-band'].apply(extract_lower_bound)
had_survey = fuck_this[pd.isnull(fuck_this["estimated"])]
had_survey = fuck_this[fuck_this["estimated"] == False]
# proportion with a survey:
proportion_with_survey = 100 * had_survey.shape[0] / fuck_this.shape[0]
@ -5294,27 +5307,11 @@ def fml_analysis(loader):
had_survey["ECO Eligibility"] == "eco4"
]
# Walls:
# Cavity wall, as built, insulated (assumed)
# Cavity wall, as built, no insulation (assumed)
# Cavity wall, as built, partial insulation (assumed)
# Roof:
# Less than 100mm = high confidence
# Less than 270mm & EPC at least 5 years old = medium confidence
# Otherwise, low confidence
# SAP criteria is EPC C or below
# Pre is 54 or below
no_ciga_check_needed_with_archetype = no_ciga_check_needed[
no_ciga_check_needed_eligible = no_ciga_check_needed[
(no_ciga_check_needed["walls-description"].isin(no_ciga_cavity_descriptions)) &
(no_ciga_check_needed["roof_classiciation"].isin(["high", "medium"])) &
(no_ciga_check_needed["current-energy-efficiency"].astype(float) <= 80)
]
if not no_ciga_check_needed_with_archetype.empty:
raise Exception("SORT ME OUT")
# Characterise no CIGA check needed
@ -5327,9 +5324,20 @@ def fml_analysis(loader):
ciga_check_passed = had_survey[
had_survey["ECO Eligibility"] == "eco4 - passed ciga"
]
# These should be treated the same as one that have passed their ciga checks, from a detection perspective
ciga_check_passed_eligible = ciga_check_passed[
(ciga_check_passed["walls-description"].str.lower().str.contains("cavity") == True) &
(ciga_check_passed["roof_classiciation"].isin(["high", "medium"])) &
(ciga_check_passed["current-energy-efficiency"].astype(float) <= 80)
]
if not ciga_check_passed.empty:
raise Exception("SORT ME BRUV")
if not loader.data[ha_name]["ciga_list"].empty:
proportions = loader.data[ha_name]["ciga_list"]["Guarantee"].value_counts(normalize=True)
ha_ciga_pass_rate = proportions[proportions.index == "No"].values[0]
else:
ha_ciga_pass_rate = assumed_ciga_pass_rate
# We take just the cavity walls
# UCL paper: https://discovery.ucl.ac.uk/id/eprint/10110371/
@ -5338,53 +5346,96 @@ def fml_analysis(loader):
# differ between variables; floor and wall type errors occur in ~10-15% of EPCs,
# compared with ~5% for wall insulation and glazing performance
ciga_check_needed_plausible = ciga_check_needed[
ciga_check_needed_eligible = ciga_check_needed[
(ciga_check_needed["walls-description"].str.lower().str.contains("cavity") == True) &
(ciga_check_needed["roof_classiciation"].isin(["high", "medium"])) &
(ciga_check_needed["current-energy-efficiency"].astype(float) <= 80)
]
if not loader.data[ha_name]["ciga_list"].empty:
raise NotImplementedError("SORT OUT THE CIGA BRUV")
else:
ha_ciga_pass_rate = assumed_ciga_pass_rate
ciga_check_expectation = np.round(ciga_check_needed_plausible.shape[0] * ha_ciga_pass_rate)
without_ciga_expectation = no_ciga_check_needed_with_archetype.shape[0]
ciga_check_expectation = np.round(ciga_check_needed_eligible.shape[0] * ha_ciga_pass_rate)
without_ciga_expectation = no_ciga_check_needed_eligible.shape[0]
passed_ciga_expectation = ciga_check_passed_eligible.shape[0]
# Need to add on the non-ciga
total_expectation = ciga_check_expectation + without_ciga_expectation
total_expectation = ciga_check_expectation + without_ciga_expectation + passed_ciga_expectation
if proportion_with_survey < 100:
# We estimate the rest
without_survey_needing_ciga = fuck_this[
(pd.isnull(fuck_this["estimated"]) == False) &
(fuck_this["estimated"] == True) &
(fuck_this["ECO Eligibility"].str.contains("subject to ciga") == True)
]
# We apply the same conversion rate as the properties with a survey
without_survey_without_ciga_expected = np.round(
without_survey_needing_ciga.shape[0] * (ciga_check_expectation / ciga_check_needed.shape[0])
)
if without_survey_needing_ciga.empty:
without_survey_without_ciga_expected = 0
else:
# We apply the same conversion rate as the properties with a survey
without_survey_without_ciga_expected = np.round(
without_survey_needing_ciga.shape[0] * (ciga_check_expectation / ciga_check_needed.shape[0])
)
total_expectation += without_survey_without_ciga_expected
without_survey_without_ciga = fuck_this[
(pd.isnull(fuck_this["estimated"]) == False) & (fuck_this["ECO Eligibility"].isin(["eco4"]))
without_survey_passed_ciga = fuck_this[
(fuck_this["estimated"] == True) &
(fuck_this["ECO Eligibility"] == "eco4 - passed ciga")
]
if not without_survey_without_ciga.empty:
raise Exception("Estimate the rest!!")
if without_survey_passed_ciga.empty:
without_survey_passed_ciga_expected = 0
else:
# We apply the same conversion rate as the properties with a survey
without_survey_passed_ciga_expected = np.round(
without_survey_passed_ciga.shape[0] * (passed_ciga_expectation / ciga_check_passed.shape[0])
)
# Finally, no ciga needed
without_survey_eco4 = fuck_this[
(fuck_this["estimated"] == True) &
(fuck_this["ECO Eligibility"] == "eco4")
]
if without_survey_eco4.empty:
without_survey_eco4_expected = 0
else:
# We apply the same conversion rate as the properties with a survey
without_survey_eco4_expected = np.round(
without_survey_eco4.shape[0] * (without_ciga_expectation / no_ciga_check_needed.shape[0])
)
total_expectation = (
total_expectation +
without_survey_without_ciga_expected +
without_survey_passed_ciga_expected +
without_survey_eco4_expected
)
surveys = loader.data[ha_name]["survey_list"]
sold_now = 0
if not surveys.empty:
sold_now = surveys[
surveys["installation_status"].str.lower().str.contains("eco4")
].shape[0]
sales_since_nov = sold_now - original_figures["No. of Tech surveys complete - Eco 4"].values[0]
results.append(
{
"HA Name": ha_name,
"Original ECO4 Estimate - Remaining": original_remaining,
"Of which sold": sales_since_nov,
"Of which ECO4 Eligible - Remaining": int(total_expectation),
"Proportion with a survey": proportion_with_survey,
"total_expectation": total_expectation
}
)
results_df = pd.DataFrame(results)
results_df["Delta vs November"] = 100 * (
results_df["Of which ECO4 Eligible - Remaining"] - results_df["Original ECO4 Estimate - Remaining"]
) / results_df["Original ECO4 Estimate - Remaining"]
# TODO: Split into high and low confidence?
#
def app():
"""