mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
creating loss and gain columns
This commit is contained in:
parent
05cf751478
commit
7d209d5d8e
1 changed files with 40 additions and 8 deletions
|
|
@ -1703,7 +1703,7 @@ def propsed_wave_3_sample():
|
|||
|
||||
region_assets["Confidence Tier"] = np.where(
|
||||
region_assets["Current EPC Band"].isin(["C", "B", "A"]),
|
||||
"6 - property was surveyed", region_assets["Confidence Tier"]
|
||||
"5 - property was surveyed", region_assets["Confidence Tier"]
|
||||
)
|
||||
|
||||
archetypes = region_assets[
|
||||
|
|
@ -1721,6 +1721,7 @@ def propsed_wave_3_sample():
|
|||
(survey_results["Postal Region"] == region)
|
||||
].groupby("Archetype ID")[["Current SAP Rating"]].mean().reset_index()
|
||||
region_surveyed["Current EPC Band"] = region_surveyed["Current SAP Rating"].apply(sap_to_epc)
|
||||
region_surveyed = region_surveyed.drop(columns=["Current SAP Rating"])
|
||||
|
||||
region_assets = region_assets.merge(
|
||||
region_surveyed,
|
||||
|
|
@ -1743,7 +1744,7 @@ def propsed_wave_3_sample():
|
|||
# Handle EPC C
|
||||
region_assets["Confidence Tier"] = np.where(
|
||||
region_assets["Current EPC Band"].isin(["C", "B", "A"]) & pd.isnull(region_assets["Confidence Tier"]),
|
||||
"6 - EPC C or above", region_assets["Confidence Tier"]
|
||||
"5 - EPC C or above", region_assets["Confidence Tier"]
|
||||
)
|
||||
|
||||
region_assets = region_assets.drop(columns=["Current EPC Band_method1"])
|
||||
|
|
@ -1773,7 +1774,8 @@ def propsed_wave_3_sample():
|
|||
)
|
||||
|
||||
region_assets["Confidence Tier"] = np.where(
|
||||
region_assets["Current EPC Band_method2"].isin(["D", "E", "F", "G"]),
|
||||
region_assets["Current EPC Band_method2"].isin(["D", "E", "F", "G"]) & pd.isnull(
|
||||
region_assets["Confidence Tier"]),
|
||||
"2 - same archetype", region_assets["Confidence Tier"]
|
||||
)
|
||||
|
||||
|
|
@ -1786,8 +1788,8 @@ def propsed_wave_3_sample():
|
|||
|
||||
# We label EPC C properties
|
||||
region_assets["Confidence Tier"] = np.where(
|
||||
region_assets["Current EPC Band"].isin(["C", "B", "A"]),
|
||||
"6 - EPC C or above", region_assets["Confidence Tier"]
|
||||
region_assets["Current EPC Band"].isin(["C", "B", "A"]) & pd.isnull(region_assets["Confidence Tier"]),
|
||||
"5 - EPC C or above", region_assets["Confidence Tier"]
|
||||
)
|
||||
|
||||
missed_addressids = region_assets[pd.isnull(region_assets["Confidence Tier"])]["Address ID"].unique().tolist()
|
||||
|
|
@ -1823,7 +1825,7 @@ def propsed_wave_3_sample():
|
|||
final_missed_matches.append(
|
||||
{
|
||||
"Address ID": a_id,
|
||||
"Confidence Tier": "5 - no similar property, needs survey to confirm",
|
||||
"Confidence Tier": "4 - no similar property, needs survey to confirm",
|
||||
"Current EPC Band": "Unknown"
|
||||
}
|
||||
)
|
||||
|
|
@ -1832,7 +1834,7 @@ def propsed_wave_3_sample():
|
|||
expected_sap = surveyed_similar["Current SAP Rating"].mean()
|
||||
expected_epc = sap_to_epc(expected_sap)
|
||||
if expected_epc in ["C", "B", "A"]:
|
||||
tier = "6 - EPC C or above"
|
||||
tier = "5 - EPC C or above"
|
||||
else:
|
||||
tier = "3 - similar property"
|
||||
|
||||
|
|
@ -1861,12 +1863,42 @@ def propsed_wave_3_sample():
|
|||
region_assets["Current EPC Band_method3"], region_assets["Current EPC Band"]
|
||||
)
|
||||
|
||||
region_assets = region_assets.drop(columns=["Confidence Tier_method3"])
|
||||
region_assets = region_assets.drop(columns=["Confidence Tier_method3", "Current EPC Band_method3"])
|
||||
|
||||
if pd.isnull(region_assets["Current EPC Band"]).sum():
|
||||
raise Exception("Something went wrong")
|
||||
|
||||
results.append(region_assets)
|
||||
|
||||
results = pd.concat(results)
|
||||
|
||||
# Create a pivot table for counts of Confidence Tier by Postal Region
|
||||
geographic_summary = results.pivot_table(
|
||||
index='Postal Region',
|
||||
columns='Confidence Tier',
|
||||
aggfunc='size',
|
||||
fill_value=0
|
||||
).reset_index()
|
||||
|
||||
# We create the gain and loss columns
|
||||
# Gain is the sum of these columns:
|
||||
# '1 - Archetype surveyed', '1 - property was surveyed',
|
||||
# '2 - same archetype', '3 - similar property',
|
||||
# Loss is the sum of these columns:
|
||||
# '4 - no similar property, needs survey to confirm',
|
||||
# '5 - EPC C or above', '5 - property was surveyed'
|
||||
geographic_summary["Gain"] = geographic_summary[
|
||||
['1 - Archetype surveyed', '1 - property was surveyed', '2 - same archetype', '3 - similar property']
|
||||
].sum(axis=1)
|
||||
|
||||
geographic_summary["Loss"] = geographic_summary[
|
||||
['4 - no similar property, needs survey to confirm', '5 - EPC C or above', '5 - property was surveyed']
|
||||
].sum(axis=1)
|
||||
|
||||
geographic_summary.sum()
|
||||
|
||||
geographic_summary = geographic_summary.sort_values("Loss", ascending=True)
|
||||
geographic_summary["Loss Cumulative Sum"] = geographic_summary["Loss"].cumsum()
|
||||
|
||||
# if __name__ == "__main__":
|
||||
# main()
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue