creating loss and gain columns

This commit is contained in:
Khalim Conn-Kowlessar 2024-11-17 16:28:43 +00:00
parent 05cf751478
commit 7d209d5d8e

View file

@ -1703,7 +1703,7 @@ def propsed_wave_3_sample():
region_assets["Confidence Tier"] = np.where(
region_assets["Current EPC Band"].isin(["C", "B", "A"]),
"6 - property was surveyed", region_assets["Confidence Tier"]
"5 - property was surveyed", region_assets["Confidence Tier"]
)
archetypes = region_assets[
@ -1721,6 +1721,7 @@ def propsed_wave_3_sample():
(survey_results["Postal Region"] == region)
].groupby("Archetype ID")[["Current SAP Rating"]].mean().reset_index()
region_surveyed["Current EPC Band"] = region_surveyed["Current SAP Rating"].apply(sap_to_epc)
region_surveyed = region_surveyed.drop(columns=["Current SAP Rating"])
region_assets = region_assets.merge(
region_surveyed,
@ -1743,7 +1744,7 @@ def propsed_wave_3_sample():
# Handle EPC C
region_assets["Confidence Tier"] = np.where(
region_assets["Current EPC Band"].isin(["C", "B", "A"]) & pd.isnull(region_assets["Confidence Tier"]),
"6 - EPC C or above", region_assets["Confidence Tier"]
"5 - EPC C or above", region_assets["Confidence Tier"]
)
region_assets = region_assets.drop(columns=["Current EPC Band_method1"])
@ -1773,7 +1774,8 @@ def propsed_wave_3_sample():
)
region_assets["Confidence Tier"] = np.where(
region_assets["Current EPC Band_method2"].isin(["D", "E", "F", "G"]),
region_assets["Current EPC Band_method2"].isin(["D", "E", "F", "G"]) & pd.isnull(
region_assets["Confidence Tier"]),
"2 - same archetype", region_assets["Confidence Tier"]
)
@ -1786,8 +1788,8 @@ def propsed_wave_3_sample():
# We label EPC C properties
region_assets["Confidence Tier"] = np.where(
region_assets["Current EPC Band"].isin(["C", "B", "A"]),
"6 - EPC C or above", region_assets["Confidence Tier"]
region_assets["Current EPC Band"].isin(["C", "B", "A"]) & pd.isnull(region_assets["Confidence Tier"]),
"5 - EPC C or above", region_assets["Confidence Tier"]
)
missed_addressids = region_assets[pd.isnull(region_assets["Confidence Tier"])]["Address ID"].unique().tolist()
@ -1823,7 +1825,7 @@ def propsed_wave_3_sample():
final_missed_matches.append(
{
"Address ID": a_id,
"Confidence Tier": "5 - no similar property, needs survey to confirm",
"Confidence Tier": "4 - no similar property, needs survey to confirm",
"Current EPC Band": "Unknown"
}
)
@ -1832,7 +1834,7 @@ def propsed_wave_3_sample():
expected_sap = surveyed_similar["Current SAP Rating"].mean()
expected_epc = sap_to_epc(expected_sap)
if expected_epc in ["C", "B", "A"]:
tier = "6 - EPC C or above"
tier = "5 - EPC C or above"
else:
tier = "3 - similar property"
@ -1861,12 +1863,42 @@ def propsed_wave_3_sample():
region_assets["Current EPC Band_method3"], region_assets["Current EPC Band"]
)
region_assets = region_assets.drop(columns=["Confidence Tier_method3"])
region_assets = region_assets.drop(columns=["Confidence Tier_method3", "Current EPC Band_method3"])
if pd.isnull(region_assets["Current EPC Band"]).sum():
raise Exception("Something went wrong")
results.append(region_assets)
results = pd.concat(results)
# Create a pivot table for counts of Confidence Tier by Postal Region
geographic_summary = results.pivot_table(
index='Postal Region',
columns='Confidence Tier',
aggfunc='size',
fill_value=0
).reset_index()
# We create the gain and loss columns
# Gain is the sum of these columns:
# '1 - Archetype surveyed', '1 - property was surveyed',
# '2 - same archetype', '3 - similar property',
# Loss is the sum of these columns:
# '4 - no similar property, needs survey to confirm',
# '5 - EPC C or above', '5 - property was surveyed'
geographic_summary["Gain"] = geographic_summary[
['1 - Archetype surveyed', '1 - property was surveyed', '2 - same archetype', '3 - similar property']
].sum(axis=1)
geographic_summary["Loss"] = geographic_summary[
['4 - no similar property, needs survey to confirm', '5 - EPC C or above', '5 - property was surveyed']
].sum(axis=1)
geographic_summary.sum()
geographic_summary = geographic_summary.sort_values("Loss", ascending=True)
geographic_summary["Loss Cumulative Sum"] = geographic_summary["Loss"].cumsum()
# if __name__ == "__main__":
# main()