mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
rough attempt to attribute surplus ciga dependent eco4 jobs
This commit is contained in:
parent
41c17aa1da
commit
6a327629bf
1 changed files with 107 additions and 37 deletions
|
|
@ -176,6 +176,10 @@ class DataLoader:
|
|||
"address": "Full Address",
|
||||
"postcode": "Postcode"
|
||||
},
|
||||
"HA49": {
|
||||
"address": "Property Address Full",
|
||||
"postcode": "Property Postcode"
|
||||
},
|
||||
"HA54": {
|
||||
"address": "Postal Address",
|
||||
"postcode": "matching_postcode"
|
||||
|
|
@ -219,7 +223,7 @@ class DataLoader:
|
|||
|
||||
def create_asset_list_matching_address(self, ha_name, asset_list):
|
||||
|
||||
if ha_name in ["HA1", "HA6", "HA12", "HA16", "HA24", "HA30", "HA31", "HA48", "HA54"]:
|
||||
if ha_name in ["HA1", "HA6", "HA12", "HA16", "HA24", "HA30", "HA31", "HA48", "HA49", "HA54"]:
|
||||
asset_list["matching_address"] = asset_list[
|
||||
self.COLUMN_CONFIG[ha_name]["address"]
|
||||
].astype(str).str.lower().str.strip()
|
||||
|
|
@ -382,6 +386,16 @@ class DataLoader:
|
|||
asset_list["Address2"].astype(str).str.lower().str.strip() + ", " + \
|
||||
asset_list["PostCode"].astype(str).str.lower().str.strip()
|
||||
asset_list["matching_postcode"] = asset_list["PostCode"].astype(str).str.lower().str.strip()
|
||||
elif ha_name == "HAXX":
|
||||
asset_list["matching_address"] = asset_list["Address"].astype(str).str.lower().str.strip() + ", " + \
|
||||
asset_list["PostCode"].astype(str).str.lower().str.strip()
|
||||
asset_list["matching_postcode"] = asset_list["PostCode"].astype(str).str.lower().str.strip()
|
||||
elif ha_name == "HAXXX":
|
||||
asset_list["matching_address"] = (
|
||||
asset_list["Combined Address"].astype(str).str.lower().str.strip() + ", " +
|
||||
asset_list["Postcode"].astype(str).str.lower().str.strip()
|
||||
)
|
||||
asset_list["matching_postcode"] = asset_list["Postcode"].astype(str).str.lower().str.strip()
|
||||
else:
|
||||
raise NotImplementedError("implement me")
|
||||
|
||||
|
|
@ -467,6 +481,8 @@ class DataLoader:
|
|||
asset_list["HouseNo"] = asset_list["House_Number"].copy()
|
||||
elif ha_name == "HA9":
|
||||
asset_list["HouseNo"] = asset_list["House Number"].copy()
|
||||
elif ha_name == "HAXXX":
|
||||
asset_list["HouseNo"] = asset_list["Door Number"].copy()
|
||||
else:
|
||||
split_addresses = asset_list['matching_address'].str.split(',', expand=True)
|
||||
house_numbers = split_addresses[0].str.split(' ', expand=True)
|
||||
|
|
@ -1999,6 +2015,10 @@ class DataLoader:
|
|||
|
||||
return survey_list
|
||||
|
||||
@staticmethod
|
||||
def correct_ha49_survey_list(survey_list):
|
||||
return survey_list
|
||||
|
||||
@staticmethod
|
||||
def levenstein_match(matching_string, df):
|
||||
match_to = df["matching_address"].tolist()
|
||||
|
|
@ -5080,8 +5100,11 @@ def app():
|
|||
# Add in:
|
||||
priority_has = [
|
||||
"HA1", "HA2", "HA6", "HA7", "HA9", "HA12", "HA13", "HA14", "HA15", "HA16", "HA18", "HA19", "HA24", "HA25",
|
||||
"HA27", "HA28", "HA30", "HA31", "HA32", "HA34", "HA35", "HA39", "HA41", "HA48", "HA50", "HA54", "HA56", "HA63",
|
||||
"HA107", "HA117"
|
||||
"HA27", "HA28", "HA30", "HA31", "HA32", "HA34", "HA35", "HA39", "HA41", "HA48", "HA49", "HA50", "HA54", "HA56",
|
||||
"HA63", "HA107", "HA117",
|
||||
|
||||
# New HAS
|
||||
"HAXX", "HAXXX",
|
||||
]
|
||||
# Next HAs to do: 14 [DONE], 15[DONE], 32 [DONE], 33 [Input format is 4 parts and no eco4 jobs identified - come
|
||||
# back on this], 28 [DONE], 41 [DONE], 50 [DONE], 48 [DONE], 2 [DONE], 63 [DONE], 12 [DONE], 117 [DONE], 13 [DONE],
|
||||
|
|
@ -5100,39 +5123,86 @@ def app():
|
|||
|
||||
forecast_remaining_sales(loader)
|
||||
|
||||
# We load in the additional data required to perform the analysis
|
||||
# cleaned = read_from_s3(
|
||||
# s3_file_name="cleaned_epc_data/cleaned.bson",
|
||||
# bucket_name="retrofit-data-dev"
|
||||
# )
|
||||
# cleaned = msgpack.unpackb(cleaned, raw=False)
|
||||
# cleaned = patch_cleaned(cleaned)
|
||||
#
|
||||
# cleaning_data = read_dataframe_from_s3_parquet(
|
||||
# bucket_name="retrofit-data-dev", file_key="sap_change_model/cleaning_dataset.parquet",
|
||||
# )
|
||||
# created_at = datetime.now().isoformat()
|
||||
#
|
||||
# photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load(bucket="retrofit-data-dev")
|
||||
#
|
||||
# outputs = get_epc_data(
|
||||
# loader=loader,
|
||||
# cleaned=cleaned,
|
||||
# cleaning_data=cleaning_data,
|
||||
# created_at=created_at,
|
||||
# photo_supply_lookup=photo_supply_lookup,
|
||||
# floor_area_decile_thresholds=floor_area_decile_thresholds,
|
||||
# pull_data=pull_data
|
||||
# )
|
||||
conversion_rate = 0.95
|
||||
archetype_check_conversion = 0.7
|
||||
res = []
|
||||
for k, v in loader.data.items():
|
||||
asset_list = v["asset_list"].copy()
|
||||
agg = asset_list["ECO Eligibility"].value_counts()
|
||||
# We find a case where there are properties that have passed CIGA
|
||||
if not any("passed" in x for x in agg.index):
|
||||
continue
|
||||
|
||||
# import pickle
|
||||
# with open("ha_analysis.pickle", "wb") as f:
|
||||
# pickle.dump({"outputs": outputs, "loader": loader}, f)
|
||||
agg = pd.DataFrame(agg).reset_index()
|
||||
|
||||
# To read:
|
||||
# import pickle
|
||||
# with open("ha_analysis.pickle", "rb") as f:
|
||||
# outputs = pickle.load(f)["outputs"]
|
||||
#
|
||||
# with open("loader.pickle", "rb") as f:
|
||||
# loader = pickle.load(f)
|
||||
passed_ciga = agg[agg["ECO Eligibility"] == "eco4 - passed ciga"]
|
||||
passed_ciga = passed_ciga["count"].values[0] if not passed_ciga.empty else 0
|
||||
|
||||
failed_ciga = agg[agg["ECO Eligibility"] == "failed ciga"]
|
||||
failed_ciga = failed_ciga["count"].values[0] if not failed_ciga.empty else 0
|
||||
|
||||
ciga_pass_rate = passed_ciga / (passed_ciga + failed_ciga) if (passed_ciga + failed_ciga) > 0 else 1
|
||||
|
||||
dormant_ciga = agg[
|
||||
agg["ECO Eligibility"].str.contains("subject to ciga") &
|
||||
~agg["ECO Eligibility"].str.contains("subject to archetype")
|
||||
]
|
||||
|
||||
dormant_ciga = dormant_ciga['count'].values[0] if not dormant_ciga.empty else 0
|
||||
|
||||
dormant_ciga_archetype = agg[
|
||||
agg["ECO Eligibility"].str.contains("subject to ciga") &
|
||||
agg["ECO Eligibility"].str.contains("subject to archetype")
|
||||
]
|
||||
|
||||
dormant_ciga_archetype = dormant_ciga_archetype['count'].values[0] if not dormant_ciga_archetype.empty else 0
|
||||
|
||||
needing_check = dormant_ciga + dormant_ciga_archetype * archetype_check_conversion
|
||||
needing_check = np.round(needing_check)
|
||||
|
||||
additional_jobs = (dormant_ciga * ciga_pass_rate * conversion_rate) + (
|
||||
dormant_ciga_archetype * archetype_check_conversion * ciga_pass_rate * conversion_rate
|
||||
)
|
||||
additional_jobs = np.round(additional_jobs)
|
||||
|
||||
# We attempt to estimate the uplift and how much of that is attributed to surplus subject to ciga jobs
|
||||
original_estimate = loader.december_figures[
|
||||
loader.december_figures["HA Name"] == k
|
||||
]
|
||||
|
||||
original_estimate = original_estimate["ECO4"].values[0] if not original_estimate.empty else 0
|
||||
base_eco_figures = agg[
|
||||
agg["ECO Eligibility"].isin(["eco4", "eco4 - passed ciga"])
|
||||
]["count"].sum()
|
||||
eco4_from_ciga = original_estimate - base_eco_figures
|
||||
eco4_from_ciga = eco4_from_ciga if eco4_from_ciga > 0 else 0
|
||||
surplus_from_dormant = additional_jobs - eco4_from_ciga
|
||||
surplus_from_dormant = 0 if surplus_from_dormant < 0 else surplus_from_dormant
|
||||
|
||||
res.append(
|
||||
{
|
||||
"ha_name": k,
|
||||
"additional_eco4": additional_jobs,
|
||||
"needing_check": needing_check,
|
||||
"surplus_from_dormant": surplus_from_dormant
|
||||
}
|
||||
)
|
||||
|
||||
res = pd.DataFrame(res)
|
||||
# Drop the HAs that are not in that pervious draft
|
||||
# In the v2 draft, there are 12 HAs
|
||||
|
||||
v5_surplus = res[
|
||||
~res["ha_name"].isin(["HA9"])
|
||||
]["additional_eco4"].sum()
|
||||
# 7212 properties
|
||||
# This is not a perfect difference though, because of the variations in how the numbers are recorded in the November
|
||||
# all HAs sheet. E.g for HA 107, there were 1239 properties identified. In the postcode list, there are 1255,
|
||||
# however 531 are still needing a CIGA check. Therefore their original figures, in this case, included properties
|
||||
# pre-CIGA
|
||||
|
||||
v5_surplus_from_dormant = res[
|
||||
~res["ha_name"].isin(["HA9"])
|
||||
]["surplus_from_dormant"].sum()
|
||||
# 5539.0
|
||||
# 9471690
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue