working on eco eligibility code

This commit is contained in:
Khalim Conn-Kowlessar 2024-02-26 13:37:50 +00:00
parent 6693ab4ca6
commit 8b48dbac9e

View file

@ -725,6 +725,13 @@ class DataLoader:
def load(self):
# Get the december figures, which is just a csv
self.december_figures = pd.read_csv(self.december_figures_filepath)
# Remove the spaces in HA Name
self.december_figures["HA Name"] = self.december_figures["HA Name"].str.replace(" ", "")
self.december_figures["ECO4"] = self.december_figures["ECO4"].astype("Int64")
self.december_figures["GBIS"] = self.december_figures["GBIS"].astype("Int64")
if self.use_cache:
self.data = read_pickle_from_s3(
bucket_name="retrofit-datalake-dev",
@ -732,11 +739,6 @@ class DataLoader:
)
return
# Get the december figures, which is just a csv
self.december_figures = pd.read_csv(self.december_figures_filepath)
# Remove the spaces in HA Name
self.december_figures["HA Name"] = december_figures["HA Name"].str.replace(" ", "")
data = {}
for filepath in self.directories:
ha_name = filepath.split("/")[2]
@ -768,46 +770,135 @@ class DataLoader:
This function will return a dictionary of facts and figures for each HA
:return:
"""
scheme_map = {
"ECO4": "ECO4",
"AFFORDABLE WARMTH": "ECO4",
}
eco_eligibility_map = {
"not eligble": "not eligible"
}
ha_facts_and_figures = []
for ha_name, data_assets in self.data.items():
asset_list = data_assets["asset_list"].copy()
survey_list = data_assets["survey_list"].copy()
ciga_list = data_assets["ciga_list"].copy()
asset_list["ECO Eligibility"].value_counts()
# Change the column name if it's ECO eligibility
asset_list = asset_list.rename(columns={"ECO eligibility": "ECO Eligibility"})
# Remove surplus whitespace from the ECO Eligibility column
asset_list["ECO Eligibility"] = asset_list["ECO Eligibility"].str.strip()
# Push to lower case
asset_list["ECO Eligibility"] = asset_list["ECO Eligibility"].str.lower()
# Remap
asset_list["ECO Eligibility"] = asset_list["ECO Eligibility"].map(eco_eligibility_map)
# We merge on ciga and update the status to reflect if it has failed ciga or not
# If Guarantee is Yes, this means that there is a guarantee in place, and the property failed the CIGA
# check
asset_list = asset_list.merge(
ciga_list[["asset_list_row_id", "Guarantee"]],
how='left',
on="asset_list_row_id"
)
if not ciga_list.empty:
# We merge on ciga and update the status to reflect if it has failed ciga or not
# If Guarantee is Yes, this means that there is a guarantee in place, and the property failed the CIGA
# check
asset_list = asset_list.merge(
ciga_list[["asset_list_row_id", "Guarantee"]],
how='left',
on="asset_list_row_id"
)
asset_list["ECO Eligibility"].value_counts()
asset_list["ECO Eligibility"].value_counts()
asset_list["ECO Eligibility"] = np.where(
(
asset_list["ECO Eligibility"].str.contains("(Subject to CIGA)", regex=False) &
(asset_list["Guarantee"] == "Yes")
),
"Failed CIGA",
asset_list["ECO Eligibility"]
)
asset_list["ECO Eligibility"] = np.where(
(
asset_list["ECO Eligibility"].str.contains("(subject to ciga)", regex=False) &
(asset_list["Guarantee"] == "Yes")
),
"failed ciga",
asset_list["ECO Eligibility"]
)
# We replace any remaining "Subject to CIGA" with pass Ciga
asset_list["ECO Eligibility"] = np.where(
asset_list["ECO Eligibility"].str.contains("Subject to CIGA", regex=False),
"Pass CIGA",
asset_list["ECO Eligibility"]
)
# We replace any remaining "Subject to CIGA" with pass Ciga
asset_list["ECO Eligibility"] = np.where(
asset_list["ECO Eligibility"].str.contains("Subject to CIGA", regex=False),
"eco4 - passed ciga",
asset_list["ECO Eligibility"]
)
asset_list = asset_list.drop(columns=["Guarantee"])
asset_list = asset_list.drop(columns=["Guarantee"])
# Update the asset list with the categorisations
# Update the asset list with the categorisations and rename changes
self.data[ha_name]["asset_list"] = asset_list
# Report on sales
sales_report = {}
if not survey_list.empty:
scheme_column = survey_list.columns[0]
# We clean up the survey list installation or cancelled
survey_list["installed_or_cancelled_clean"] = survey_list["INSTALLED OR CANCELLED"].str.lower()
# Remove all punctuation
survey_list["installed_or_cancelled_clean"] = survey_list["installed_or_cancelled_clean"].str.replace(
r'[^\w\s]', '', regex=True
)
# Remove double spaces
survey_list["installed_or_cancelled_clean"] = survey_list["installed_or_cancelled_clean"].str.replace(
r'\s+', ' ', regex=True
)
# Remove trailing spaces
survey_list["installed_or_cancelled_clean"] = survey_list["installed_or_cancelled_clean"].str.strip()
# Remap the values in the scheme column
survey_list[scheme_column] = survey_list[scheme_column].map(scheme_map)
survey_list["installation_status"] = None
survey_list["installation_status"] = np.where(
survey_list["installed_or_cancelled_clean"].isin(["installed", "installed see notes"]),
"installed",
survey_list["installation_status"]
)
survey_list["installation_status"] = np.where(
survey_list["installed_or_cancelled_clean"].isin(["cancelled"]),
"cancelled",
survey_list["installation_status"]
)
# Find partial installations
survey_list["installation_status"] = np.where(
survey_list["installed_or_cancelled_clean"].str.contains("still to be installed"),
"partially installed",
survey_list["installation_status"]
)
# Find partial cancellations
# TODO: We might have more indications of partial cancellations
survey_list["installation_status"] = np.where(
survey_list["installed_or_cancelled_clean"].isin(["loft cancelled"]),
"partially cancelled",
survey_list["installation_status"]
)
# Finally, for other cases, we set the status to "in progress"
survey_list["installation_status"] = survey_list["installation_status"].fillna("in progress")
# We concatenate the scheme name with the installation status
survey_list["installation_status"] = (
survey_list[scheme_column] + " - " + survey_list["installation_status"]
)
# We get the sales
sales_report = survey_list["installation_status"].value_counts().to_dict()
ha_facts_and_figures.append(
{
"HA Name": ha_name,
**asset_list["ECO Eligibility"].value_counts().to_dict(),
**sales_report
}
)
ha_facts_and_figures = pd.DataFrame(ha_facts_and_figures)
ha_facts_and_figures = ha_facts_and_figures.drop(
columns=["not eligible"]
)
ha_facts_and_figures = self.december_figures.merge(ha_facts_and_figures, how="inner", on="HA Name")
return ha_facts_and_figures