fixed variance for HA6

This commit is contained in:
Khalim Conn-Kowlessar 2024-03-06 19:34:43 +00:00
parent e966dfdf6e
commit e2055b3b7d

View file

@ -730,6 +730,81 @@ class DataLoader:
"Post Code"
] = "ST5 7BY"
# PERFORM ADDITIONAL DROPS
# Dropping rows based on multiple conditions
conditions_to_drop = [
(survey_list['Street / Block Name'] == "Bedford Crescent") & (survey_list['Post Code'] == "ST5 3EH") & (
survey_list['NO.'] == 23) & (survey_list['INSTALLED OR CANCELLED'].str.contains("NO UPDATE YET")),
(survey_list['Street / Block Name'] == "Hereford Avenue") & (survey_list['Post Code'] == "ST5 3EJ") & (
survey_list['NO.'] == 92) & (survey_list['INSTALLED OR CANCELLED'].str.contains("NO UPDATE YET")),
(survey_list['Street / Block Name'] == "Seabridge Lane") & (survey_list['Post Code'] == "ST5 3EX") & (
survey_list['NO.'].isin([16, 18, 42])) & (
survey_list['INSTALLED OR CANCELLED'].str.contains("NO UPDATE YET")),
(survey_list['Street / Block Name'] == "ESKDALE PLACE") & (survey_list['Post Code'] == "ST5 3QW") & (
survey_list['NO.'] == 5) & (survey_list['SUBMISSION DATE'].astype(str) == "2023-03-06 00:00:00"),
(survey_list['Street / Block Name'] == "Birch House road") & (survey_list['Post Code'] == "ST6 2LS") & (
survey_list['NO.'].isin([56, 58])),
(survey_list['Street / Block Name'] == "Blackthorn Place") & (survey_list['Post Code'] == "ST6 2LS") & (
survey_list['NO.'].isin([37, 39])),
(survey_list['Street / Block Name'] == "Whitethorn Way") & (survey_list['Post Code'] == "ST5 7BT") & (
survey_list['NO.'].isin([17, 6])),
(survey_list['Street / Block Name'] == "Lion Grove") & (survey_list['Post Code'] == "ST5 7HQ") & (
survey_list['NO.'].isin([10, 12])) & (
survey_list['INSTALLED OR CANCELLED'].str.contains("NO UPDATE YET")),
(survey_list['Street / Block Name'] == "DENRY CRESCENT") & (survey_list['Post Code'] == "ST5 8JW") & (
survey_list['NO.'] == 87) & (survey_list['INSTALLED OR CANCELLED'].str.contains("NO UPDATE YET")),
(survey_list['Street / Block Name'] == "HOLLINS CRESCENT") & (survey_list['Post Code'] == "ST7 1JW") & (
survey_list['NO.'] == 19)
]
# Combine all conditions with an OR "|"
combined_condition = np.logical_or.reduce(conditions_to_drop)
# Drop rows that meet the combined condition
survey_list = survey_list[~combined_condition]
# Making replacements using np.where
survey_list['Post Code'] = np.where(
(survey_list['Street / Block Name'] == "Whitethorn Way") & (survey_list['Post Code'] == "ST5 3EH") & (
survey_list['NO.'] == 17),
"ST5 7BT",
survey_list['Post Code']
)
survey_list['Post Code'] = np.where(
(survey_list['Street / Block Name'] == "Whitethorn Way") & (survey_list['Post Code'] == "ST5 3ED") & (
survey_list['NO.'] == 6),
"ST5 7BT",
survey_list['Post Code']
)
# Maple avenue (stoke on trent, not newcastle) should be st7 1jw
survey_list["Post Code"] = np.where(
(survey_list["Street / Block Name"].str.lower().str.contains("maple avenue")) & (
survey_list["Post Code"].str.lower() == "st7 1jx"
),
"st7 1jw",
survey_list["Post Code"]
)
# Hollins Crescent should be st7 1jx
survey_list["Post Code"] = np.where(
(survey_list["Street / Block Name"].str.lower().str.contains("hollins crescent")) & (
survey_list["Post Code"].str.lower() == "st7 1jw"
),
"st7 1jx",
survey_list["Post Code"]
)
# Additional drops as the above misses some:
survey_list = survey_list[
~((survey_list["NO."].astype(str).isin(["18", "42"])) &
(survey_list["Street / Block Name"] == "Seabridge Lane") &
(survey_list["Post Code"] == "ST5 3EY") &
(survey_list["SUBMISSION DATE"].astype(str) == "24.07.2023") &
(survey_list["INSTALLED OR CANCELLED"].str.contains("NO UPDATE YET")))
]
return survey_list
@staticmethod
@ -1176,6 +1251,11 @@ class DataLoader:
if matching_lookup.shape[0] != survey_list.shape[0]:
raise ValueError("Mismatch in the number of survey rows and matching lookup rows")
matching_lookup = matching_lookup[~pd.isnull(matching_lookup["asset_list_row_id"])]
if matching_lookup["asset_list_row_id"].duplicated().sum():
raise ValueError("Duplicated matches in survey list")
# Merge onto the survey list
survey_list = survey_list.merge(matching_lookup, how='left', on="survey_list_row_id")
@ -1483,7 +1563,7 @@ class DataLoader:
# TODO: We might have more indications of partial cancellations
survey_list["installation_status"] = np.where(
survey_list["installed_or_cancelled_clean"].isin(["loft cancelled"]),
"partially cancelled",
"cancelled",
survey_list["installation_status"]
)
else:
@ -3174,6 +3254,8 @@ def forecast_remaining_sales(loader):
if survey_list.empty:
asset_list_remaining = asset_list.copy()
else:
# For HA6, there are a small number of postcodes that do not match to any item in the asset list
survey_list = survey_list[~pd.isnull(survey_list["asset_list_row_id"])]
asset_list_remaining = asset_list.merge(
survey_list[["asset_list_row_id", "installation_status"]],
how="left",
@ -3183,6 +3265,47 @@ def forecast_remaining_sales(loader):
asset_list_remaining = asset_list_remaining[pd.isnull(asset_list_remaining["installation_status"])]
asset_list_remaining = asset_list_remaining.drop(columns=["installation_status"])
# # TODO: TEMP
# n_pre_ciga = asset_list[
# asset_list["ECO Eligibility"].isin(
# [
# "eco4 - passed ciga",
# "eco4 (subject to ciga)",
# "failed ciga",
# "eco4"
# ]
# )
# ].shape[0]
#
# n_pre_ciga_remaining = asset_list_remaining[
# asset_list_remaining["ECO Eligibility"].isin(
# [
# "eco4 - passed ciga",
# "eco4 (subject to ciga)",
# "failed ciga",
# "eco4"
# ]
# )
# ].shape[0]
#
# compare_to_ids = asset_list_remaining["asset_list_row_id"].values
# assets_diff_ids = [x for x in asset_list["asset_list_row_id"].values if x not in compare_to_ids]
# diff = asset_list[asset_list["asset_list_row_id"].isin(assets_diff_ids)]
#
# n_sold = survey_list[survey_list["installation_status"].str.contains("ECO4")].shape[0]
# # cancellations = survey_list[]
# asset_list["ECO Eligibility"].value_counts()
#
# # Revenenue
# pre_ciga_revenue = n_pre_ciga * eco4_rate
# pre_ciga_remaining_revenue = n_pre_ciga_remaining * eco4_rate
# sold_revenue = n_sold * eco4_rate
#
# pre_ciga_revenue - (pre_ciga_remaining_revenue + sold_revenue)
# # MISSING 1 SALE from sold
# cancelled = survey_list[survey_list["installation_status"].str.contains("ECO4")].shape[0]
# # TODO: END TEMP
eligiblity_counts = pd.DataFrame(asset_list["ECO Eligibility"].value_counts()).reset_index()
eligiblity_counts_remaining = pd.DataFrame(asset_list_remaining["ECO Eligibility"].value_counts()).reset_index()
@ -3402,13 +3525,13 @@ def forecast_remaining_sales(loader):
("", "Warmfront post code list", "Total #", "ECO4 total (pre-ciga)"): eco4_pre_ciga,
("ECO4 pre-ciga", "", "Remaining - #", ""): eco4_pre_ciga_remaining,
("ECO4 pre-ciga", "", "Total - £", ""): eco4_pre_ciga_revenue,
("ECO4 pre-ciga", "", "Remaining - £", ""): eco4_pre_ciga_remaining_revenue,
("ECO4 pre-ciga", "", "VARIANCE - TOTAL", ""): variance_total,
("ECO4 pre-ciga", "", "VARIANCE - REMAINING", ""): variance_remaining,
("ECO4 pre-ciga", "", "Sold - £", ""): eco4_actually_sold,
("ECO4 pre-ciga", "", "Confirmed cancellations - £", ""): eco4_confirmed_cancellations,
("ECO4 pre-ciga", "", "Confirmed cancellations - £", ""): eco4_confirmed_cancellations * eco4_rate,
# This is for jobs that are in-progress and could still cancel
("ECO4 pre-ciga", "", "Unconfirmed cancellations - £", ""): eco4_expected_cancellations,
("ECO4 pre-ciga", "", "Remaining - £", ""): eco4_pre_ciga_remaining_revenue,
("ECO4 pre-ciga", "", "Unconfirmed cancellations - £", ""): eco4_expected_cancellations * eco4_rate,
# ECO4 - asset list, post ciga, total
("ECO4 post-ciga", "", "Estimated total eligible - #", "ECO4 total"):
eco4_post_ciga_total_results[
@ -3460,9 +3583,9 @@ def forecast_remaining_sales(loader):
("GBIS Postcode list", "Warmfront post code list", "Total - #", "GBIS total"): gbis_total,
("GBIS Postcode list", "Warmfront post code list", "Total - £", "GBIS total"): gbis_total_revenue,
("GBIS Postcode list", "Warmfront post code list", "Sold - £", "GBIS total"): gbis_actually_sold,
("GBIS Postcode list", "", "Confirmed cancellations - £", ""): gbis_confirmed_cancellations,
("GBIS Postcode list", "", "Confirmed cancellations - £", ""): gbis_confirmed_cancellations * gbis_rate,
# This is for jobs that are in-progress and could still cancel
("GBIS Postcode list", "", "Unconfirmed cancellations - £", ""): gbis_expected_cancellations,
("GBIS Postcode list", "", "Unconfirmed cancellations - £", ""): gbis_expected_cancellations * gbis_rate,
("GBIS Postcode list", "Warmfront post code list", "Remaining - #", "GBIS total"): gbis_remaining,
("GBIS Postcode list", "Warmfront post code list", "Remaining - £", "GBIS total"): gbis_remaining_revenue,
("GBIS Postcode list", "", "Delta vs original estimate, remaining - %", ""):