From 21082d8d3779a75cae422becf1a6e589ebcbaba6 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 6 Mar 2024 19:46:28 +0000 Subject: [PATCH] fixed duplication variance for HA16 --- .../ha_15_32/ha_analysis_batch_3.py | 20 ++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py index 8c9f59c2..7859d6d2 100644 --- a/etl/eligibility/ha_15_32/ha_analysis_batch_3.py +++ b/etl/eligibility/ha_15_32/ha_analysis_batch_3.py @@ -960,6 +960,21 @@ class DataLoader: survey_list["NO."] ) + # Delete some duplicated entries + survey_list = survey_list[ + ~((survey_list["Street / Block Name"] == "york road") & + (survey_list["NO."].astype(str) == "12") & + (survey_list["Post Code"] == "M44 5HU") & + (survey_list["SUBMISSION DATE"].astype(str) == "45229")) + ] + + survey_list = survey_list[ + ~((survey_list["Street / Block Name"] == "peatfield avenue") & + (survey_list["NO."].astype(str) == "23") & + (survey_list["Post Code"] == "M27 9XG") & + (survey_list["SUBMISSION DATE"].astype(str) == "45236")) + ] + return survey_list @staticmethod @@ -3265,7 +3280,7 @@ def forecast_remaining_sales(loader): asset_list_remaining = asset_list_remaining[pd.isnull(asset_list_remaining["installation_status"])] asset_list_remaining = asset_list_remaining.drop(columns=["installation_status"]) - # # TODO: TEMP + # TODO: TEMP # n_pre_ciga = asset_list[ # asset_list["ECO Eligibility"].isin( # [ @@ -3304,6 +3319,9 @@ def forecast_remaining_sales(loader): # pre_ciga_revenue - (pre_ciga_remaining_revenue + sold_revenue) # # MISSING 1 SALE from sold # cancelled = survey_list[survey_list["installation_status"].str.contains("ECO4")].shape[0] + # dupes = survey_list[survey_list["asset_list_row_id"].duplicated()]["asset_list_row_id"].values + # z = survey_list[survey_list["asset_list_row_id"].isin(dupes)] + # z[['NO.', 'Street / Block Name', 'Post Code', 'INSTALLED OR CANCELLED', 'SUBMISSION DATE']] # # TODO: END TEMP eligiblity_counts = pd.DataFrame(asset_list["ECO Eligibility"].value_counts()).reset_index()