diff --git a/etl/customers/stonewater/Wave 3 Preparation.py b/etl/customers/stonewater/Wave 3 Preparation.py index 744b3400..c8e61a0e 100644 --- a/etl/customers/stonewater/Wave 3 Preparation.py +++ b/etl/customers/stonewater/Wave 3 Preparation.py @@ -2297,39 +2297,9 @@ def propsed_wave_3_sample(): if pd.isnull(results[c]).sum(): raise Exception("Something went wrong") - # home = results[results["Confidence Tier"] == "5 - EPC C or above"].sample(1) - # region = home["Postal Region"].values[0] - - # Create a pivot table for counts of Confidence Tier by Postal Region - geographic_summary = results.pivot_table( - index='Postal Region', - columns='Confidence Tier', - aggfunc='size', - fill_value=0 - ).reset_index() - - # We create the gain and loss columns - # Gain is the sum of these columns: - # '1 - Archetype surveyed', - # '1 - property was surveyed', - # '2 - same archetype', - # '3 - similar property, weighted on distance' - gain_columns = sorted([x for x in results["Confidence Tier"].unique() if "1 - " in x or "2 - " in x or "3 - " in x]) loss_columns = sorted([x for x in results["Confidence Tier"].unique() if "4 - " in x or "5 - " in x]) - geographic_summary["Gain"] = geographic_summary[gain_columns].sum(axis=1) - geographic_summary["Loss"] = geographic_summary[loss_columns].sum(axis=1) - - print(geographic_summary.sum()) - - geographic_summary = geographic_summary.sort_values("Loss", ascending=True) - geographic_summary["Loss Cumulative Sum"] = geographic_summary["Loss"].cumsum() - geographic_summary[geographic_summary["Loss Cumulative Sum"] <= 250]["Gain"].sum() - - loss = geographic_summary["Loss"].values - gain = geographic_summary["Gain"].values - def optimise(gain, loss, max_loss=250): # Define the coefficients for the objective function (negative because we maximize Gain) @@ -2352,76 +2322,51 @@ def propsed_wave_3_sample(): return selected_rows, optimal_gain - selected_rows, _ = optimise(gain, loss, 250) - - # Select the rows that are selected - geographic_summary["Selected"] = selected_rows == 1 - geographic_summary[geographic_summary["Selected"]].sum() - - region_totals = geographic_summary[ - geographic_summary["Selected"] - ][["Gain", "Loss"]].sum() - - # We now see if there are any postcodes that have no loss that can be added - unselected_regions = geographic_summary[~geographic_summary["Selected"]]["Postal Region"].values - - # TODO: Try on street - - postcode_summary = results.pivot_table( + street_summary = results.pivot_table( index='Street and Region', columns='Confidence Tier', aggfunc='size', fill_value=0 ).reset_index() - # postcode_summary = postcode_summary.merge( - # results[["Postcode", "Postal Region"]].drop_duplicates(), - # how="left", on="Postcode" - # ) - # - postcode_summary_unselected_regions = postcode_summary.copy() - # postcode_summary_unselected_regions = postcode_summary[ - # postcode_summary["Postcode"].str.split(" ").str[0].isin(unselected_regions) - # ].copy() - postcode_summary_unselected_regions["Gain"] = postcode_summary_unselected_regions[gain_columns].sum(axis=1) - postcode_summary_unselected_regions["Loss"] = postcode_summary_unselected_regions[loss_columns].sum(axis=1) + street_summary["Gain"] = street_summary[gain_columns].sum(axis=1) + street_summary["Loss"] = street_summary[loss_columns].sum(axis=1) - # Remaining loss allowed - # remaining_loss_constraint = 230 - region_totals["Loss"] - remaining_loss_constraint = 220 - postcode_selected_rows, _ = optimise( - gain=postcode_summary_unselected_regions["Gain"].values, - loss=postcode_summary_unselected_regions["Loss"].values, - max_loss=int(remaining_loss_constraint) + print(street_summary.sum()) + + selected_rows, _ = optimise( + gain=street_summary["Gain"].values, + loss=street_summary["Loss"].values, + max_loss=250 ) - postcode_summary_unselected_regions["Selected"] = postcode_selected_rows == 1 - postcode_summary_unselected_regions[postcode_summary_unselected_regions["Selected"]][["Gain", "Loss"]].sum() + street_summary["Selected"] = selected_rows == 1 + print(street_summary[street_summary["Selected"]][["Gain", "Loss"]].sum()) - postcode_optimised_additional_properties = postcode_summary_unselected_regions[ - postcode_summary_unselected_regions["Selected"] + selected_streets = street_summary[ + street_summary["Selected"] ] - postcode_totals = postcode_optimised_additional_properties[["Gain", "Loss"]].sum() + totals = selected_streets[["Gain", "Loss"]].sum() - bid_size = postcode_totals.sum() + bid_size = totals.sum() print("Bid Size:", bid_size) - total_epc_d_or_below = postcode_totals["Gain"] + total_epc_d_or_below = totals["Gain"] print("Total EPC D or below:", total_epc_d_or_below) - total_epc_c = postcode_totals["Loss"] + total_epc_c = totals["Loss"] print("Total EPC C or above:", total_epc_c) # Total needing a survey - total_needing_survey = postcode_optimised_additional_properties[ + total_needing_survey = selected_streets[ "4 - no similar property, needs survey to confirm" ].sum() print("Total needing survey:", total_needing_survey) # Look for postcodes that have no loss - unselected_streets = postcode_summary_unselected_regions[ - ~postcode_summary_unselected_regions["Selected"] + unselected_streets = street_summary[ + ~street_summary["Selected"] ]["Street and Region"].values - postcode_summary2 = results[ + postcode_summary = results[ results["Street and Region"].isin(unselected_streets) ].pivot_table( index='Postcode', @@ -2430,14 +2375,12 @@ def propsed_wave_3_sample(): fill_value=0 ).reset_index() - postcode_summary2["Gain"] = postcode_summary2[gain_columns].sum(axis=1) - postcode_summary2["Loss"] = postcode_summary2[loss_columns].sum(axis=1) + postcode_summary["Gain"] = postcode_summary[gain_columns].sum(axis=1) + postcode_summary["Loss"] = postcode_summary[loss_columns].sum(axis=1) - no_loss_postcodes = postcode_summary2[postcode_summary2["Loss"] == 0].sort_values("Gain", ascending=False) + no_loss_postcodes = postcode_summary[postcode_summary["Loss"] == 0].sort_values("Gain", ascending=False) total_bid_size = bid_size + no_loss_postcodes["Gain"].sum() print(total_bid_size) - z = results[results["Confidence Tier"] == "5 - EPC C or above"] - # if __name__ == "__main__": # main()