mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
Creating the output structure
This commit is contained in:
parent
e9bfd63c35
commit
6ae21bbcb0
2 changed files with 220 additions and 339 deletions
|
|
@ -365,7 +365,7 @@ class Eligibility:
|
|||
return
|
||||
|
||||
# Near perfect
|
||||
if self.cavity["suitability"] and (current_sap < 55):
|
||||
if self.cavity["suitability"] and (current_sap < 69):
|
||||
self.gbis_warmfront = {
|
||||
"eligible": True,
|
||||
"strict": True,
|
||||
|
|
@ -373,15 +373,6 @@ class Eligibility:
|
|||
}
|
||||
return
|
||||
|
||||
# Suitable cavity, but high sap
|
||||
if self.cavity["suitability"] and (current_sap >= 55):
|
||||
self.gbis_warmfront = {
|
||||
"eligible": True,
|
||||
"strict": False,
|
||||
"message": "Meets cavity, fails SAP check",
|
||||
}
|
||||
return
|
||||
|
||||
self.gbis_warmfront = {
|
||||
"eligible": False,
|
||||
"strict": False,
|
||||
|
|
|
|||
|
|
@ -1646,10 +1646,26 @@ def get_epc_data(
|
|||
|
||||
|
||||
def get_col_widths(dataframe):
|
||||
# First we find the maximum length of the index column
|
||||
idx_max = max([len(str(s)) for s in dataframe.index.values] + [len(str(dataframe.index.name))])
|
||||
# Then, we concatenate this to the max of the lengths of column name and its max value for each column, row-wise
|
||||
return [idx_max] + [max(dataframe[col].astype(str).map(len).max(), len(col)) for col in dataframe.columns]
|
||||
# Define a maximum width for any column to prevent excessively wide columns
|
||||
max_allowed_width = 25
|
||||
|
||||
# Calculate widths for columns
|
||||
widths = []
|
||||
|
||||
if isinstance(dataframe.columns, pd.MultiIndex):
|
||||
# For MultiIndex, calculate max width considering the header and data
|
||||
header_widths = [max(len(str(item)) for item in col) + 2 for col in dataframe.columns.values] # +2 for padding
|
||||
for i, column in enumerate(dataframe.columns):
|
||||
max_data_width = max(dataframe[column].astype(str).apply(len).max(), header_widths[i])
|
||||
widths.append(min(max_data_width, max_allowed_width))
|
||||
else:
|
||||
# For non-MultiIndex, calculate width normally
|
||||
for col in dataframe.columns:
|
||||
# Calculate the max length of data or column name and limit it
|
||||
max_length = max(dataframe[col].astype(str).apply(len).max(), len(str(col)) + 2) # +2 for padding
|
||||
widths.append(min(max_length, max_allowed_width))
|
||||
|
||||
return widths
|
||||
|
||||
|
||||
def analyse_ha_data(outputs, loader):
|
||||
|
|
@ -1671,42 +1687,13 @@ def analyse_ha_data(outputs, loader):
|
|||
:return:
|
||||
"""
|
||||
|
||||
eco4_rate = 1710
|
||||
gbis_rate = 600
|
||||
|
||||
ha_analysis_results = []
|
||||
ha_revenue_results = []
|
||||
for ha_name, datasets in outputs.items():
|
||||
|
||||
inputs = [x for k, x in loader.data.items() if k == ha_name][0]
|
||||
# TODO: This is placeholder because we don't have the schemes that the properties have been qualified for
|
||||
# yet
|
||||
#
|
||||
import random
|
||||
randomly_allocated_schemes = random.choices(["ECO4", "GBIS"], k=inputs["asset_list"].shape[0])
|
||||
inputs["asset_list"]["randomly_allocated_schemes"] = randomly_allocated_schemes
|
||||
inputs["asset_list"]["funding_scheme"] = None
|
||||
inputs["asset_list"]["funding_scheme"] = np.where(
|
||||
inputs["asset_list"]["row_meaning"] == "identified potential eco works (CWI)",
|
||||
inputs["asset_list"]["randomly_allocated_schemes"],
|
||||
inputs["asset_list"]["funding_scheme"]
|
||||
)
|
||||
|
||||
# TODO: Also temp, just for HA 6
|
||||
if ha_name == "ha_6":
|
||||
inputs["survey_list"]["funding_scheme"] = None
|
||||
inputs["survey_list"]["funding_scheme"] = np.where(
|
||||
inputs["survey_list"][
|
||||
'AFFORDABLE WARMTH OR EPC FOR HOUSING ASSOCIATION '] == "AFFORDABLE WARMTH",
|
||||
"ECO4",
|
||||
"GBIS"
|
||||
)
|
||||
|
||||
# End placholder
|
||||
|
||||
results_df = datasets["results_df"].copy()
|
||||
|
||||
analysis_data = inputs["asset_list"][['asset_list_row_id', "row_meaning", "funding_scheme"]].rename(
|
||||
analysis_data = inputs["asset_list"][['asset_list_row_id', "ECO Eligibility"]].rename(
|
||||
columns={"row_meaning": "asset_identification_status"}
|
||||
).merge(
|
||||
results_df,
|
||||
|
|
@ -1715,293 +1702,236 @@ def analyse_ha_data(outputs, loader):
|
|||
left_on="asset_list_row_id"
|
||||
)
|
||||
|
||||
# We now merge the survey list onto the analysis data and remove anything that is sold, to give us just what is
|
||||
# remaining
|
||||
################################################################################################
|
||||
# We take the properties that strictly qualified under eco
|
||||
################################################################################################
|
||||
|
||||
if inputs["matched_lookup"] is not None:
|
||||
analysis_data = analysis_data.merge(
|
||||
inputs["matched_lookup"], how="left", on="asset_list_row_id"
|
||||
eco4_identified = analysis_data[analysis_data["ECO Eligibility"] == "eco4"].copy()
|
||||
eco4_identified["identification_type"] = None
|
||||
eco4_identified["identification_type"] = np.where(
|
||||
(eco4_identified["eco4_eligible"] == True) & (eco4_identified["eco4_strict"] == True),
|
||||
"strict",
|
||||
eco4_identified["identification_type"]
|
||||
)
|
||||
|
||||
eco4_identified["identification_type"] = np.where(
|
||||
(eco4_identified["eco4_eligible"] == True) & (eco4_identified["eco4_strict"] == False),
|
||||
"expansive",
|
||||
eco4_identified["identification_type"]
|
||||
)
|
||||
################################################################################################
|
||||
# We take the properties dependent on CIGA
|
||||
################################################################################################
|
||||
|
||||
ciga_dependent_identified = analysis_data[
|
||||
analysis_data["ECO Eligibility"].isin(
|
||||
[
|
||||
"eco4 (subject to ciga)",
|
||||
"eco4 - passed ciga"
|
||||
]
|
||||
)
|
||||
# Drop any rows that have a survey_list_row_id
|
||||
analysis_data = analysis_data[pd.isnull(analysis_data["survey_list_row_id"])]
|
||||
].copy()
|
||||
|
||||
# If we have a survey list, we merge this onto the results
|
||||
n_properties_in_asset_list = analysis_data["asset_list_row_id"].nunique()
|
||||
|
||||
properties_sold = (
|
||||
inputs["survey_list"].groupby("funding_scheme")["survey_list_row_id"].nunique().reset_index() if
|
||||
inputs["survey_list"] is not None else pd.DataFrame(columns=["funding_scheme"])
|
||||
)
|
||||
properties_sold_eco4 = (
|
||||
properties_sold[properties_sold["funding_scheme"] == "ECO4"]["survey_list_row_id"].values[0] if
|
||||
(not properties_sold.empty) and ("ECO4" in properties_sold["funding_scheme"].values) else 0
|
||||
)
|
||||
properties_sold_gbis = (
|
||||
properties_sold[properties_sold["funding_scheme"] == "GBIS"]["survey_list_row_id"].values[0] if
|
||||
(not properties_sold.empty) and ("GBIS" in properties_sold["funding_scheme"].values) else 0
|
||||
# These are properties that show filled cavity
|
||||
ciga_dependent_identified["identification_type"] = None
|
||||
ciga_dependent_identified["identification_type"] = np.where(
|
||||
ciga_dependent_identified["eco4_message"].isin(
|
||||
[
|
||||
"Perfect suitability",
|
||||
"Meets cavity and sap",
|
||||
"Fails cavity, meets loft, fails SAP",
|
||||
"Meets fabric, fails SAP check",
|
||||
"Meets cavity, loft borderline, meets sap",
|
||||
]
|
||||
),
|
||||
"strict",
|
||||
ciga_dependent_identified["identification_type"]
|
||||
)
|
||||
|
||||
# We now calculate the number of remaining properties, by scheme
|
||||
remaining_properties = analysis_data[
|
||||
analysis_data["asset_identification_status"] == "identified potential eco works (CWI)"
|
||||
].copy()
|
||||
remaining_properties["prospect_type"] = None
|
||||
|
||||
remaining_properties_by_scheme = (
|
||||
remaining_properties.groupby("funding_scheme")["asset_list_row_id"].nunique().reset_index()
|
||||
ciga_dependent_identified["identification_type"] = np.where(
|
||||
(ciga_dependent_identified["eco4_message"].isin(["All conditions fail", "failed fabric check"])) &
|
||||
(ciga_dependent_identified["walls"].isin(["Cavity wall, filled cavity"])),
|
||||
"expansive",
|
||||
ciga_dependent_identified["identification_type"]
|
||||
)
|
||||
|
||||
n_remaining_properties_eco4 = remaining_properties_by_scheme[
|
||||
remaining_properties_by_scheme["funding_scheme"] == "ECO4"
|
||||
]["asset_list_row_id"].values[0]
|
||||
ciga_dependent_identified["identification_type"] = np.where(
|
||||
(ciga_dependent_identified["eco4_message"].isin(["Meets just cavity"])) | (
|
||||
ciga_dependent_identified["walls"].isin(["Cavity wall, filled cavity"])
|
||||
),
|
||||
"expansive",
|
||||
ciga_dependent_identified["identification_type"]
|
||||
)
|
||||
|
||||
n_remaining_properties_gbis = remaining_properties_by_scheme[
|
||||
remaining_properties_by_scheme["funding_scheme"] == "GBIS"
|
||||
]["asset_list_row_id"].values[0]
|
||||
################################################################################################
|
||||
# We properties that qualified for gbis
|
||||
################################################################################################
|
||||
gbis_identified = analysis_data[analysis_data["ECO Eligibility"] == "gbis"].copy()
|
||||
gbis_identified["identification_type"] = None
|
||||
gbis_identified["identification_type"] = np.where(
|
||||
(gbis_identified["gbis_eligible"] == True) & (gbis_identified["sap"] < 69),
|
||||
"strict",
|
||||
gbis_identified["identification_type"]
|
||||
)
|
||||
|
||||
# For the remaining properties, we use the results of the eligibility process to classify the property into
|
||||
# one of multiple categories
|
||||
#
|
||||
# For properties that have been identified as ECO4
|
||||
# 1) Strict ECO4 candidate - Has required fabric and EPC is D or below. We consider D or below here, because
|
||||
# Warmfront regularly re-surveys properties which then fall within the SAP requirement
|
||||
# - This is not the very strictest definition of ECO4 eligible, but we aim to characterise the properties
|
||||
# here and re-surveying is a common practicce by Warmfront. Additionally, many of the social homes have
|
||||
# very old EPCs which may score lower when re-done
|
||||
# 2) Meets Fabric requirements, not SAP
|
||||
# Warmfront has identified the property as eligible, but the EPC is not D or below. We consider this but
|
||||
# label is separately as not a strict
|
||||
# 3) Subject to CIGA check - Meets loft conditions but shows a filled cavity.
|
||||
# - we don't have a SAP constraint here because the EPC is (currently) showing what the property might
|
||||
# actually look like after retrofit and so the EPC currently being a C or above means little, because
|
||||
# the updated EPC, showing an empty cavity, could bring the property within
|
||||
# 4) Loft insulation too thick - Meets empty cavity but shows a loft with between 101 and 270mm insulation.
|
||||
# - No SAP constraint, for the same reason as in category 2)
|
||||
# 5) Looks like GBIS instead
|
||||
# 6) Does not look like ECO4 candidate
|
||||
#
|
||||
# For properties that have been identified as GBIS
|
||||
# 1) Strict GBIS candidates
|
||||
# 2) Properties that actually look like strict GBIS candidates
|
||||
# 3) Subject to CIGA check - Filled cavity
|
||||
# 4) Does not look like a GBIS candidate
|
||||
gbis_identified["identification_type"] = np.where(
|
||||
(gbis_identified["gbis_eligible"] == True) & (gbis_identified["sap"] >= 69),
|
||||
"expansive",
|
||||
gbis_identified["identification_type"]
|
||||
)
|
||||
|
||||
remaining_eco4_df = remaining_properties[
|
||||
remaining_properties["funding_scheme"] == "ECO4"
|
||||
].copy()
|
||||
# Finally, we look at the properties that have not been identified by Warmfront
|
||||
not_identified = analysis_data[
|
||||
analysis_data["ECO Eligibility"].isin(
|
||||
[
|
||||
"not eligible"
|
||||
]
|
||||
)
|
||||
].copy()
|
||||
|
||||
####################################
|
||||
surplus_eco4 = not_identified[
|
||||
(not_identified["eco4_eligible"] == True) & (not_identified["eco4_message"].isin(
|
||||
["Perfect suitability", "Meets cavity, loft borderline, meets sap", "Near perfect suitability"]
|
||||
))
|
||||
]
|
||||
|
||||
surplus_gbis = not_identified[
|
||||
(not_identified["gbis_eligible"] == True) & (
|
||||
~not_identified["asset_list_row_id"].isin(surplus_eco4["asset_list_row_id"].values)
|
||||
) & (not_identified["sap"] < 69) & (
|
||||
(not_identified["cavity_type"].isin(["empty", "partial insulation"])) | (
|
||||
not_identified["walls"].str.contains("partial", case=False, na=False)
|
||||
)
|
||||
)
|
||||
]
|
||||
surplus_gbis = surplus_gbis[surplus_gbis["is_estimated"] == False]
|
||||
|
||||
# Output variables
|
||||
# ECO4
|
||||
####################################
|
||||
|
||||
# 1) We identify this if:
|
||||
# - remaining_properties["eco4_eligible"] == True
|
||||
|
||||
remaining_eco4_df["prospect_type"] = np.where(
|
||||
(remaining_eco4_df["eco4_eligible"] == True),
|
||||
"strict ECO4",
|
||||
remaining_eco4_df["prospect_type"]
|
||||
n_properties_in_asset_list = inputs["asset_list"].shape[0]
|
||||
n_warmfront_identified_eco4 = eco4_identified.shape[0] + ciga_dependent_identified.shape[0]
|
||||
eco4_of_which_identified_strict = (
|
||||
eco4_identified[eco4_identified["identification_type"] == "strict"].shape[0] +
|
||||
ciga_dependent_identified[ciga_dependent_identified["identification_type"] == "strict"].shape[0]
|
||||
)
|
||||
|
||||
# 2) Meets fabric requirements
|
||||
remaining_eco4_df["prospect_type"] = np.where(
|
||||
(
|
||||
(remaining_eco4_df["eco4_message"] == "sap too high") &
|
||||
remaining_eco4_df["eligibility_cavity_type"].isin(["partial", "empty"]) &
|
||||
remaining_eco4_df["eligibility_loft_type"].isin(["0-100mm"]) &
|
||||
pd.isnull(remaining_eco4_df["prospect_type"])
|
||||
),
|
||||
"ECO4 if SAP downgrade",
|
||||
remaining_eco4_df["prospect_type"]
|
||||
eco4_of_which_identified_expansive = (
|
||||
eco4_identified[eco4_identified["identification_type"] == "expansive"].shape[0] +
|
||||
ciga_dependent_identified[ciga_dependent_identified["identification_type"] == "expansive"].shape[0]
|
||||
)
|
||||
|
||||
# 3) We identify this if it has a filled cavity but meets the loft conditions
|
||||
# TODO: Consider if we should also allow 100-270mm or if we should add some slight tolerance (e.g. 150mm)
|
||||
# to account for measurement error
|
||||
remaining_eco4_df["prospect_type"] = np.where(
|
||||
(
|
||||
remaining_eco4_df["eligibility_cavity_type"].isin(["full"]) &
|
||||
remaining_eco4_df["eligibility_loft_type"].isin(["0-100mm"])
|
||||
),
|
||||
"ECO4 - Filled cavity - subject to CIGA check",
|
||||
remaining_eco4_df["prospect_type"]
|
||||
)
|
||||
|
||||
# 4) We identify this by ensuring the cavity if empty or partial, and the loft has between 101 and 270mm
|
||||
remaining_eco4_df["prospect_type"] = np.where(
|
||||
(
|
||||
remaining_eco4_df["eligibility_cavity_type"].isin(["empty", "partial"]) &
|
||||
remaining_eco4_df["eligibility_loft_type"].isin(["100-270mm"])
|
||||
),
|
||||
"ECO4 prospect - empty cavity, loft insulation below regulation",
|
||||
remaining_eco4_df["prospect_type"]
|
||||
)
|
||||
|
||||
# 5) Looks like GBIS instead
|
||||
remaining_eco4_df["prospect_type"] = np.where(
|
||||
(remaining_eco4_df["gbis_eligible"] == True) & pd.isnull(remaining_eco4_df["prospect_type"]),
|
||||
"Looks like GBIS",
|
||||
remaining_eco4_df["prospect_type"]
|
||||
)
|
||||
|
||||
# 6) This is everything else (i.e. both the cavity is full and the loft insulation is above 100mm)
|
||||
remaining_eco4_df["prospect_type"] = remaining_eco4_df["prospect_type"].fillna(
|
||||
"Does not look like ECO4 candidate"
|
||||
)
|
||||
|
||||
####################################
|
||||
# GBIS
|
||||
####################################
|
||||
|
||||
remaining_gbis = remaining_properties[
|
||||
remaining_properties["funding_scheme"] == "GBIS"
|
||||
].copy()
|
||||
|
||||
# 1) Strict GBIS candidates
|
||||
remaining_gbis["prospect_type"] = np.where(
|
||||
(
|
||||
(remaining_gbis["gbis_eligible"] == True) & (remaining_gbis["eco4_eligible"] == False)
|
||||
),
|
||||
"strict GBIS",
|
||||
remaining_gbis["prospect_type"]
|
||||
)
|
||||
|
||||
# 2) GBIS candidates that look like strict ECO4 candidates
|
||||
remaining_gbis["prospect_type"] = np.where(
|
||||
(remaining_gbis["eco4_eligible"] == True),
|
||||
"GBIS - Upgradable to ECO4",
|
||||
remaining_gbis["prospect_type"]
|
||||
)
|
||||
|
||||
# 3) Subject to CIGA check - Filled cavity
|
||||
remaining_gbis["prospect_type"] = np.where(
|
||||
(
|
||||
remaining_gbis["eligibility_cavity_type"].isin(["full"]) &
|
||||
pd.isnull(remaining_gbis["prospect_type"])
|
||||
),
|
||||
"GBIS - Filled cavity - subject to CIGA check",
|
||||
remaining_gbis["prospect_type"]
|
||||
)
|
||||
|
||||
# 4) Everything else
|
||||
remaining_gbis["prospect_type"] = remaining_gbis["prospect_type"].fillna(
|
||||
"Does not look like GBIS candidate"
|
||||
)
|
||||
|
||||
####################################
|
||||
# Surplus properties
|
||||
####################################
|
||||
|
||||
# Take properties that were not identified by Warmfront and identify those that look like they would qualify
|
||||
# under the strictest criteria
|
||||
surplus_df = analysis_data[
|
||||
analysis_data["asset_identification_status"] != "identified potential eco works (CWI)"
|
||||
].copy()
|
||||
|
||||
eco4_surplus = surplus_df[
|
||||
(
|
||||
(surplus_df["eco4_eligible"] == True) & (surplus_df["eco4_message"] == "subject to post retrofit sap") &
|
||||
(
|
||||
surplus_df["eligibility_classification"].isin(
|
||||
["high confidence", "highest confidence", "medium confidence"]
|
||||
)
|
||||
)
|
||||
)
|
||||
].copy()
|
||||
|
||||
gbis_surplus = surplus_df[
|
||||
(
|
||||
(surplus_df["gbis_eligible"] == True) & (surplus_df["eco4_eligible"] == False) & (
|
||||
surplus_df["eligibility_cavity_type"].isin(["empty", "partial"])
|
||||
)
|
||||
)
|
||||
].copy()
|
||||
|
||||
# Perform some checks to make sure we have all of the values
|
||||
remaining_eco4_dict = remaining_eco4_df["prospect_type"].value_counts().to_dict()
|
||||
if n_remaining_properties_eco4 != sum([v for k, v in remaining_eco4_dict.items()]):
|
||||
raise ValueError(
|
||||
"Number of remaining properties does not match the number of properties in remaining ECO4 dict"
|
||||
)
|
||||
|
||||
remaining_gbis_dict = remaining_gbis["prospect_type"].value_counts().to_dict()
|
||||
if n_remaining_properties_gbis != sum([v for k, v in remaining_gbis_dict.items()]):
|
||||
raise ValueError(
|
||||
"Number of remaining properties does not match the number of properties in remaining GBIS dict"
|
||||
)
|
||||
n_warmfront_identified_gbis = gbis_identified.shape[0]
|
||||
gbis_of_which_identified_strict = gbis_identified[gbis_identified["identification_type"] == "strict"].shape[0]
|
||||
gbis_of_which_identified_expansive = \
|
||||
gbis_identified[gbis_identified["identification_type"] == "expansive"].shape[0]
|
||||
|
||||
to_append = {
|
||||
"ha_name": ha_name,
|
||||
"n_properties_in_asset_list": n_properties_in_asset_list,
|
||||
("", "HA Name"): ha_name,
|
||||
("", "# Properties in asset list"): n_properties_in_asset_list,
|
||||
############
|
||||
# ECO4
|
||||
############
|
||||
"properties_sold_eco4": properties_sold_eco4,
|
||||
"n_remaining_properties_eco4": n_remaining_properties_eco4,
|
||||
**remaining_eco4_dict,
|
||||
("ECO4", "# Properties identieid by Warmfront"): n_warmfront_identified_eco4,
|
||||
("ECO4", "Of which identified by model - strict"): eco4_of_which_identified_strict,
|
||||
("ECO4", "Of which identified by model - expansive"): eco4_of_which_identified_expansive,
|
||||
("ECO4", "Of which identified by model - total"): (
|
||||
eco4_of_which_identified_strict + eco4_of_which_identified_expansive),
|
||||
("ECO4", "Additional properties"): surplus_eco4.shape[0],
|
||||
############
|
||||
# GBIS
|
||||
############
|
||||
"properties_sold_gbis": properties_sold_gbis,
|
||||
"n_remaining_properties_gbis": n_remaining_properties_gbis,
|
||||
**remaining_gbis_dict,
|
||||
############
|
||||
# GBIS
|
||||
############
|
||||
"n_eco4_surplus": eco4_surplus.shape[0],
|
||||
"n_gbis_surplus": gbis_surplus.shape[0],
|
||||
("GBIS", "# Properties identieid by Warmfront"): n_warmfront_identified_gbis,
|
||||
("GBIS", "Of which identified by model - strict"): gbis_of_which_identified_strict,
|
||||
("GBIS", "Of which identified by model - expansive"): gbis_of_which_identified_expansive,
|
||||
("GBIS", "Of which identified by model - total"): (
|
||||
gbis_of_which_identified_strict + gbis_of_which_identified_expansive
|
||||
),
|
||||
("GBIS", "Additional properties"): surplus_gbis.shape[0]
|
||||
}
|
||||
|
||||
ha_analysis_results.append(to_append)
|
||||
|
||||
revenue_to_append = {
|
||||
"ha_name": ha_name,
|
||||
"£ Remaining from asset list": (
|
||||
n_remaining_properties_eco4 * eco4_rate + n_remaining_properties_gbis * gbis_rate
|
||||
),
|
||||
"Of which: Strict": (
|
||||
to_append.get('strict ECO4', 0) * eco4_rate + to_append.get('strict GBIS', 0) * gbis_rate +
|
||||
to_append.get('GBIS - Upgradable to ECO4', 0) * gbis_rate
|
||||
),
|
||||
"Of which: Subject to CIGA": (
|
||||
to_append.get("ECO4 - Filled cavity - subject to CIGA check", 0) * eco4_rate +
|
||||
to_append.get("GBIS - Filled cavity - subject to CIGA check", 0) * gbis_rate
|
||||
),
|
||||
"Of which: Prospect, not perfect strict prospect": (
|
||||
to_append.get("ECO4 prospect - empty cavity, loft insulation below regulation", 0) * eco4_rate +
|
||||
to_append.get("ECO4 if SAP downgrade", 0) * eco4_rate
|
||||
),
|
||||
"Of which: Potential downgrade to GBIS": to_append["Looks like GBIS"] * eco4_rate,
|
||||
"Of which: Does not look like prospect": (
|
||||
to_append.get("Does not look like ECO4 candidate", 0) * eco4_rate +
|
||||
to_append.get("Does not look like GBIS candidate", 0) * gbis_rate
|
||||
),
|
||||
"Surplus: Unidentified properties": eco4_surplus.shape[0] * eco4_rate + gbis_surplus.shape[0] * gbis_rate,
|
||||
"Surplus: GBIS Updates to ECO4": to_append.get("GBIS - Upgradable to ECO4", 0) * (eco4_rate - gbis_rate)
|
||||
}
|
||||
|
||||
# Perform a quick check:
|
||||
if revenue_to_append["£ Remaining from asset list"] - (
|
||||
revenue_to_append["Of which: Strict"] + revenue_to_append["Of which: Subject to CIGA"] +
|
||||
revenue_to_append["Of which: Prospect, not perfect strict prospect"] +
|
||||
revenue_to_append["Of which: Potential downgrade to GBIS"] +
|
||||
revenue_to_append["Of which: Does not look like prospect"]
|
||||
) > 1:
|
||||
raise ValueError("Error between top level revenue figures and breakdown - investigate me")
|
||||
|
||||
ha_revenue_results.append(revenue_to_append)
|
||||
|
||||
ha_analysis_results = pd.DataFrame(ha_analysis_results)
|
||||
ha_revenue_results = pd.DataFrame(ha_revenue_results)
|
||||
ha_analysis_results.columns = pd.MultiIndex.from_tuples(ha_analysis_results.columns)
|
||||
|
||||
facts_and_figures = loader.facts_and_figures.copy()
|
||||
facts_and_figures["ha_number"] = facts_and_figures["HA Name"].str.extract(r'(\d+)').astype(int)
|
||||
facts_and_figures = facts_and_figures.sort_values("ha_number")
|
||||
facts_and_figures = facts_and_figures.drop(columns=["ha_number"])
|
||||
|
||||
# Rename some of the cols
|
||||
facts_and_figures = facts_and_figures.rename(
|
||||
columns={
|
||||
# ECO4 cols
|
||||
"ECO4": "ECO4 - December",
|
||||
"GBIS": "GBIS - December",
|
||||
"eco4 (subject to ciga)": "ECO4 - subject to ciga",
|
||||
"eco4": "ECO4 - doesn't need CIGA",
|
||||
"eco4 - passed ciga": "ECO4 - passed CIGA",
|
||||
"failed ciga": "ECO4 - failed CIGA",
|
||||
"ECO4 - partially cancelled": "ECO4 - Install downgrade to GBIS",
|
||||
"ECO4 - in progress": "ECO4 - Install in progress",
|
||||
"ECO4 - cancelled": "ECO4 - Install cancelled",
|
||||
# GBIS cols
|
||||
"gbis": "GBIS total (asset list)"
|
||||
}
|
||||
)
|
||||
# We calculate the eco4 total from the asset list
|
||||
# 1) If ciga checks have been completed (i.e. ECO4 - passed ciga > 0) this sum is
|
||||
# ECO4 - doesn't need CIGA + ECO4 - passed CIGA
|
||||
# 2) if ciga checks haven't been completed (i.e. ECO4 - passed ciga is missing), this sum is
|
||||
# ECO4 - doesn't need CIGA + ECO4 - subject to ciga
|
||||
facts_and_figures["ECO4 total (asset list)"] = np.where(
|
||||
facts_and_figures["ECO4 - passed CIGA"] > 0,
|
||||
facts_and_figures["ECO4 - doesn't need CIGA"] + facts_and_figures["ECO4 - passed CIGA"],
|
||||
facts_and_figures["ECO4 - doesn't need CIGA"] + facts_and_figures["ECO4 - subject to ciga"]
|
||||
)
|
||||
|
||||
# Re-arrange the columns
|
||||
facts_and_figures = facts_and_figures[
|
||||
[
|
||||
'HA Name',
|
||||
'ECO4 - December',
|
||||
'GBIS - December',
|
||||
'ECO4 total (asset list)',
|
||||
'GBIS total (asset list)',
|
||||
'ECO4 - subject to ciga',
|
||||
"ECO4 - doesn't need CIGA",
|
||||
'ECO4 - passed CIGA',
|
||||
'ECO4 - failed CIGA',
|
||||
'ECO4 - installed',
|
||||
'ECO4 - Install in progress',
|
||||
'ECO4 - Install cancelled',
|
||||
'ECO4 - partially installed',
|
||||
'ECO4 - Install downgrade to GBIS',
|
||||
]
|
||||
]
|
||||
# Addd a note to flag any rows where ECO4 (
|
||||
# subject to ciga is greater than 0) and (ECO4 - passed ciga is greater than 0
|
||||
# )
|
||||
facts_and_figures["Missed CIGA checks opportunity"] = None
|
||||
facts_and_figures["Missed CIGA checks opportunity"] = np.where(
|
||||
(facts_and_figures["ECO4 - subject to ciga"] > 0) & (facts_and_figures["ECO4 - passed CIGA"] > 0),
|
||||
"potential opportunity of " + facts_and_figures["ECO4 - subject to ciga"].astype(
|
||||
str) + " ECO4 properties needing a CIGA check",
|
||||
facts_and_figures["Missed CIGA checks opportunity"]
|
||||
)
|
||||
|
||||
# Re arrage the columns
|
||||
|
||||
# Also sort ha_analysis_results by ha number
|
||||
ha_analysis_results["ha_number"] = ha_analysis_results[("", "HA Name")].str.extract(r'(\d+)').astype(int)
|
||||
ha_analysis_results = ha_analysis_results.sort_values("ha_number")
|
||||
ha_analysis_results = ha_analysis_results.drop(columns=["ha_number"])
|
||||
|
||||
# We save 2 sheets
|
||||
# Automate creation of the excel
|
||||
# Create a Pandas Excel writer using XlsxWriter as the engine
|
||||
with pd.ExcelWriter('HA Analysis - batch3.xlsx', engine='xlsxwriter') as writer:
|
||||
with pd.ExcelWriter('HA Analysis Results.xlsx', engine='xlsxwriter') as writer:
|
||||
# Write each dataframe to a different worksheet without the index
|
||||
for df, sheet in [(ha_revenue_results, 'Total Revenue'),
|
||||
(ha_analysis_results, 'By ECO4 and GBIS')]:
|
||||
for df, sheet in [(facts_and_figures, 'HA Facts and Figures'),
|
||||
(ha_analysis_results, 'Asset Identification')]:
|
||||
|
||||
df.to_excel(writer, sheet_name=sheet, index=False)
|
||||
df.to_excel(writer, sheet_name=sheet)
|
||||
|
||||
# Auto-adjust columns' width
|
||||
for i, width in enumerate(get_col_widths(df)):
|
||||
|
|
@ -2134,7 +2064,7 @@ def app():
|
|||
# Determines if we want to use the cached data in s3
|
||||
use_cache = True
|
||||
# Determines if we want to perform the data pull
|
||||
pull_data = True
|
||||
pull_data = False
|
||||
|
||||
# List all of the data in the folder
|
||||
directories = [str(list(entry.iterdir())[0]) for entry in DATA_FOLDER.iterdir() if entry.is_dir()]
|
||||
|
|
@ -2173,43 +2103,3 @@ def app():
|
|||
floor_area_decile_thresholds=floor_area_decile_thresholds,
|
||||
pull_data=pull_data
|
||||
)
|
||||
|
||||
# for ha_name, datasets in outputs.items():
|
||||
# datasets["results_df"] = datasets["results_df"].drop(
|
||||
# columns=["eligibility_cavity_type", "eligibility_loft_type"]
|
||||
# )
|
||||
#
|
||||
# # Re-do
|
||||
# res = []
|
||||
# for _, row in tqdm(datasets["results_df"].iterrows(), total=datasets["results_df"].shape[0]):
|
||||
# epc = {
|
||||
# "walls-description": row["walls"],
|
||||
# "roof-description": row["roof"],
|
||||
# "floor-description": "",
|
||||
# "tenure": "",
|
||||
# "current-energy-efficiency": row["sap"],
|
||||
# }
|
||||
# eligibility = Eligibility(epc=epc, cleaned=cleaned)
|
||||
# eligibility.check_eco4_warmfront()
|
||||
# res.append(
|
||||
# {
|
||||
# "row_id": row["row_id"],
|
||||
# "eligibility_cavity_type": eligibility.eco4_warmfront["cavity_type"],
|
||||
# "eligibility_loft_type": eligibility.eco4_warmfront["loft_type"]
|
||||
# }
|
||||
# )
|
||||
#
|
||||
# # Merge back on
|
||||
# res = pd.DataFrame(res)
|
||||
# datasets["results_df"] = datasets["results_df"].merge(res, how="left", on="row_id")
|
||||
#
|
||||
# # Re-save in s3
|
||||
# save_pickle_to_s3(
|
||||
# data={
|
||||
# "results_df": datasets["results_df"],
|
||||
# "scoring_df": datasets["scoring_df"],
|
||||
# "nodata": datasets["nodata"]
|
||||
# },
|
||||
# bucket_name="retrofit-datalake-dev",
|
||||
# s3_file_name=f"ha-analysis/{ha_name}/processed_results.pickle"
|
||||
# )
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue