fixed duplication variance for HA16

This commit is contained in:
Khalim Conn-Kowlessar 2024-03-06 19:46:28 +00:00
parent e2055b3b7d
commit 21082d8d37

View file

@ -960,6 +960,21 @@ class DataLoader:
survey_list["NO."]
)
# Delete some duplicated entries
survey_list = survey_list[
~((survey_list["Street / Block Name"] == "york road") &
(survey_list["NO."].astype(str) == "12") &
(survey_list["Post Code"] == "M44 5HU") &
(survey_list["SUBMISSION DATE"].astype(str) == "45229"))
]
survey_list = survey_list[
~((survey_list["Street / Block Name"] == "peatfield avenue") &
(survey_list["NO."].astype(str) == "23") &
(survey_list["Post Code"] == "M27 9XG") &
(survey_list["SUBMISSION DATE"].astype(str) == "45236"))
]
return survey_list
@staticmethod
@ -3265,7 +3280,7 @@ def forecast_remaining_sales(loader):
asset_list_remaining = asset_list_remaining[pd.isnull(asset_list_remaining["installation_status"])]
asset_list_remaining = asset_list_remaining.drop(columns=["installation_status"])
# # TODO: TEMP
# TODO: TEMP
# n_pre_ciga = asset_list[
# asset_list["ECO Eligibility"].isin(
# [
@ -3304,6 +3319,9 @@ def forecast_remaining_sales(loader):
# pre_ciga_revenue - (pre_ciga_remaining_revenue + sold_revenue)
# # MISSING 1 SALE from sold
# cancelled = survey_list[survey_list["installation_status"].str.contains("ECO4")].shape[0]
# dupes = survey_list[survey_list["asset_list_row_id"].duplicated()]["asset_list_row_id"].values
# z = survey_list[survey_list["asset_list_row_id"].isin(dupes)]
# z[['NO.', 'Street / Block Name', 'Post Code', 'INSTALLED OR CANCELLED', 'SUBMISSION DATE']]
# # TODO: END TEMP
eligiblity_counts = pd.DataFrame(asset_list["ECO Eligibility"].value_counts()).reset_index()