diff --git a/etl/customers/stonewater/Wave 3 Preparation.py b/etl/customers/stonewater/Wave 3 Preparation.py index 6f98c9fd..5ebb06e2 100644 --- a/etl/customers/stonewater/Wave 3 Preparation.py +++ b/etl/customers/stonewater/Wave 3 Preparation.py @@ -1637,7 +1637,7 @@ def propsed_wave_3_sample(): # TODO: We drop 302 properties that are not priority postcodes - confirm w/ Stonewater and 7 properties missing # UPRN - asset_list = asset_list[~asset_list["Archetype ID"].isin(["NOT PRIORITY POSTCODE", "MISSING UPRN"])] + asset_list = asset_list[~asset_list["Archetype ID"].isin(["MISSING UPRN"])] # Clean address ids asset_list = asset_list[~pd.isnull(asset_list["Address ID"])] asset_list = asset_list[asset_list["Address ID"] != "Address ID"] @@ -1645,12 +1645,13 @@ def propsed_wave_3_sample(): # Create the postal region, taking the first part of the postcode asset_list["Postal Region"] = asset_list["Postcode"].str.split(" ").str[0] + asset_list["Street and Region"] = asset_list["Street name"] + " " + asset_list["Postal Region"] unique_postal_regions = asset_list["Postal Region"].unique() # Keep just the columns we need asset_list = asset_list[ - ["UPRN", "Address ID", "Archetype ID", "Postal Region", "Postcode", "Property Type", "Wall Type", "Roof Type", - "Heating"] + ["UPRN", "Address ID", "Archetype ID", "Postal Region", "Postcode", "Street and Region", + "Property Type", "Wall Type", "Roof Type", "Heating"] ] survey_results = pd.read_excel( @@ -1853,7 +1854,6 @@ def propsed_wave_3_sample(): suffixes=("", "_method2") ) else: - region_assets = region_assets.merge( archetype_surveyed, on="Archetype ID", @@ -1903,20 +1903,20 @@ def propsed_wave_3_sample(): surveyed = survey_results_with_original_features[ ( - survey_results_with_original_features["Property Type"].str.split(":").str[0] == - property["Property Type"].split(":")[0] + survey_results_with_original_features["Property Type"] == + property["Property Type"] ) & ( - survey_results_with_original_features["Wall Type"].str.split(":").str[0] == - property["Wall Type"].split(":")[0] + survey_results_with_original_features["Wall Type"] == + property["Wall Type"] ) & ( - survey_results_with_original_features["Roof Type"].str.split(":").str[0] == - property["Roof Type"].split(":")[0] + survey_results_with_original_features["Roof Type"] == + property["Roof Type"] ) & ( - survey_results_with_original_features["Heating"].str.split(":").str[0] == - property["Heating"].split(":")[0] + survey_results_with_original_features["Heating"] == + property["Heating"] ) ].copy() @@ -1962,7 +1962,10 @@ def propsed_wave_3_sample(): if "Electric" in property["Heating"]: # Take other electric heating systems surveyed = surveyed[surveyed["Heating"].str.contains("Electric")] - elif property["Heating"] == "Community Heating Systems: Community boilers only (RdSAP)": + elif property["Heating"] in [ + "Community Heating Systems: Community boilers only (RdSAP)", + "Community Heating Systems: Community CHP and boilers (RdSAP)" + ]: # Take other community heating systems surveyed = surveyed[surveyed["Heating"].str.contains("Community")] elif property["Heating"] == 'Heat Pump: (from database)': @@ -2001,8 +2004,8 @@ def propsed_wave_3_sample(): if any(surveyed["Postal Region"] == property["Postal Region"]): surveyed = surveyed[surveyed["Postal Region"] == property["Postal Region"]] - # Take the 5 nearest - surveyed = surveyed.head(5) + # Take the 3 nearest + surveyed = surveyed.head(3) # # We allow a max distance of 10km # surveyed = surveyed[surveyed["distance_meters"] < 10000] @@ -2176,6 +2179,9 @@ def propsed_wave_3_sample(): results = pd.concat(results) + # home = results[results["Confidence Tier"] == "5 - EPC C or above"].sample(1) + # region = home["Postal Region"].values[0] + # Create a pivot table for counts of Confidence Tier by Postal Region geographic_summary = results.pivot_table( index='Postal Region', @@ -2192,7 +2198,9 @@ def propsed_wave_3_sample(): # '3 - similar property, weighted on distance' gain_columns = [ - '1 - Archetype surveyed', '1 - property was surveyed', '2 - same archetype', + '1 - Archetype surveyed', + '1 - property was surveyed', + '2 - same archetype', '3 - similar property, weighted on distance' ] # @@ -2200,8 +2208,11 @@ def propsed_wave_3_sample(): # '4 - no similar property, needs survey to confirm', # '5 - EPC C or above', '5 - property was surveyed' - loss_columns = ['4 - no similar property, needs survey to confirm', '5 - EPC C or above', - '5 - property was surveyed'] + loss_columns = [ + '4 - no similar property, needs survey to confirm', + '5 - EPC C or above', + '5 - property was surveyed' + ] geographic_summary["Gain"] = geographic_summary[gain_columns].sum(axis=1) geographic_summary["Loss"] = geographic_summary[loss_columns].sum(axis=1) @@ -2249,26 +2260,30 @@ def propsed_wave_3_sample(): # We now see if there are any postcodes that have no loss that can be added unselected_regions = geographic_summary[~geographic_summary["Selected"]]["Postal Region"].values + # TODO: Try on street + postcode_summary = results.pivot_table( - index='Postcode', + index='Street and Region', columns='Confidence Tier', aggfunc='size', fill_value=0 ).reset_index() - postcode_summary = postcode_summary.merge( - results[["Postcode", "Postal Region"]].drop_duplicates(), - how="left", on="Postcode" - ) - - postcode_summary_unselected_regions = postcode_summary[ - postcode_summary["Postcode"].str.split(" ").str[0].isin(unselected_regions) - ].copy() + # postcode_summary = postcode_summary.merge( + # results[["Postcode", "Postal Region"]].drop_duplicates(), + # how="left", on="Postcode" + # ) + # + postcode_summary_unselected_regions = postcode_summary.copy() + # postcode_summary_unselected_regions = postcode_summary[ + # postcode_summary["Postcode"].str.split(" ").str[0].isin(unselected_regions) + # ].copy() postcode_summary_unselected_regions["Gain"] = postcode_summary_unselected_regions[gain_columns].sum(axis=1) postcode_summary_unselected_regions["Loss"] = postcode_summary_unselected_regions[loss_columns].sum(axis=1) # Remaining loss allowed - remaining_loss_constraint = 250 - region_totals["Loss"] + # remaining_loss_constraint = 230 - region_totals["Loss"] + remaining_loss_constraint = 250 postcode_selected_rows, _ = optimise( gain=postcode_summary_unselected_regions["Gain"].values, loss=postcode_summary_unselected_regions["Loss"].values, @@ -2284,12 +2299,40 @@ def propsed_wave_3_sample(): postcode_totals = postcode_optimised_additional_properties[["Gain", "Loss"]].sum() - bid_size = region_totals.sum() + postcode_totals.sum() + bid_size = postcode_totals.sum() print("Bid Size:", bid_size) - total_epc_d_or_below = region_totals["Gain"] + postcode_totals["Gain"] + total_epc_d_or_below = postcode_totals["Gain"] print("Total EPC D or below:", total_epc_d_or_below) - total_epc_c = region_totals["Loss"] + postcode_totals["Loss"] + total_epc_c = postcode_totals["Loss"] print("Total EPC C or above:", total_epc_c) + # Total needing a survey + total_needing_survey = postcode_optimised_additional_properties[ + "4 - no similar property, needs survey to confirm" + ].sum() + print("Total needing survey:", total_needing_survey) + + # Look for postcodes that have no loss + unselected_streets = postcode_summary_unselected_regions[ + ~postcode_summary_unselected_regions["Selected"] + ]["Street and Region"].values + + postcode_summary2 = results[ + results["Street and Region"].isin(unselected_streets) + ].pivot_table( + index='Postcode', + columns='Confidence Tier', + aggfunc='size', + fill_value=0 + ).reset_index() + + postcode_summary2["Gain"] = postcode_summary2[gain_columns].sum(axis=1) + postcode_summary2["Loss"] = postcode_summary2[loss_columns].sum(axis=1) + + no_loss_postcodes = postcode_summary2[postcode_summary2["Loss"] == 0].sort_values("Gain", ascending=False) + total_bid_size = bid_size + no_loss_postcodes["Gain"].sum() + print(total_bid_size) + + z = results[results["Confidence Tier"] == "5 - EPC C or above"] # if __name__ == "__main__": # main()