mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
implementing distance weighting
This commit is contained in:
parent
7d63c16404
commit
eff80e637f
1 changed files with 248 additions and 84 deletions
|
|
@ -1635,8 +1635,9 @@ def propsed_wave_3_sample():
|
||||||
header=4
|
header=4
|
||||||
)
|
)
|
||||||
|
|
||||||
# TODO: We drop 302 properties that are not priority postcodes - confirm w/ Stonewater
|
# TODO: We drop 302 properties that are not priority postcodes - confirm w/ Stonewater and 7 properties missing
|
||||||
asset_list = asset_list[asset_list["Archetype ID"] != "NOT PRIORITY POSTCODE"]
|
# UPRN
|
||||||
|
asset_list = asset_list[~asset_list["Archetype ID"].isin(["NOT PRIORITY POSTCODE", "MISSING UPRN"])]
|
||||||
# Clean address ids
|
# Clean address ids
|
||||||
asset_list = asset_list[~pd.isnull(asset_list["Address ID"])]
|
asset_list = asset_list[~pd.isnull(asset_list["Address ID"])]
|
||||||
asset_list = asset_list[asset_list["Address ID"] != "Address ID"]
|
asset_list = asset_list[asset_list["Address ID"] != "Address ID"]
|
||||||
|
|
@ -1648,7 +1649,7 @@ def propsed_wave_3_sample():
|
||||||
|
|
||||||
# Keep just the columns we need
|
# Keep just the columns we need
|
||||||
asset_list = asset_list[
|
asset_list = asset_list[
|
||||||
["Address ID", "Archetype ID", "Postal Region", "Postcode", "Property Type", "Wall Type", "Roof Type",
|
["UPRN", "Address ID", "Archetype ID", "Postal Region", "Postcode", "Property Type", "Wall Type", "Roof Type",
|
||||||
"Heating"]
|
"Heating"]
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
@ -1665,7 +1666,7 @@ def propsed_wave_3_sample():
|
||||||
survey_results["Postal Region"] = survey_results["Postcode"].str.split(" ").str[0]
|
survey_results["Postal Region"] = survey_results["Postcode"].str.split(" ").str[0]
|
||||||
|
|
||||||
survey_results_with_original_features = survey_results.merge(
|
survey_results_with_original_features = survey_results.merge(
|
||||||
asset_list[["Address ID", "Property Type", "Wall Type", "Roof Type", "Heating"]],
|
asset_list[["UPRN", "Address ID", "Property Type", "Wall Type", "Roof Type", "Heating"]],
|
||||||
on="Address ID",
|
on="Address ID",
|
||||||
how="left"
|
how="left"
|
||||||
)
|
)
|
||||||
|
|
@ -1673,6 +1674,45 @@ def propsed_wave_3_sample():
|
||||||
if survey_results_with_original_features.shape[0] != survey_results.shape[0]:
|
if survey_results_with_original_features.shape[0] != survey_results.shape[0]:
|
||||||
raise ValueError("Something went wrong")
|
raise ValueError("Something went wrong")
|
||||||
|
|
||||||
|
# We get longitude & Latitude
|
||||||
|
from utils.s3 import read_pickle_from_s3
|
||||||
|
archetyping_spatial_features = read_pickle_from_s3(
|
||||||
|
bucket_name="retrofit-data-dev", s3_file_name="scustomers/Stonewater/clustering/spatial_data_to_uprn.pkl",
|
||||||
|
)
|
||||||
|
archetyping_spatial_features = pd.concat(archetyping_spatial_features)
|
||||||
|
archetyping_spatial_features = archetyping_spatial_features[["UPRN", 'LATITUDE', 'LONGITUDE']].rename(
|
||||||
|
columns={"LATITUDE": "latitude", "LONGITUDE": "longitude"}
|
||||||
|
)
|
||||||
|
# Merge them onto both datasets
|
||||||
|
asset_list = asset_list.merge(
|
||||||
|
archetyping_spatial_features, how="left", on="UPRN"
|
||||||
|
)
|
||||||
|
if pd.isnull(asset_list["longitude"]).sum():
|
||||||
|
raise ValueError("Something went wrong")
|
||||||
|
|
||||||
|
survey_results_with_original_features = survey_results_with_original_features.merge(
|
||||||
|
archetyping_spatial_features, how="left", on="UPRN"
|
||||||
|
)
|
||||||
|
if pd.isnull(survey_results_with_original_features["longitude"]).sum():
|
||||||
|
raise ValueError("Something went wrong")
|
||||||
|
|
||||||
|
def haversine(lat1, lon1, lat2, lon2):
|
||||||
|
# Radius of Earth in meters
|
||||||
|
R = 6371000
|
||||||
|
|
||||||
|
# Convert degrees to radians
|
||||||
|
lat1, lon1, lat2, lon2 = map(np.radians, [lat1, lon1, lat2, lon2])
|
||||||
|
|
||||||
|
# Differences
|
||||||
|
dlat = lat2 - lat1
|
||||||
|
dlon = lon2 - lon1
|
||||||
|
|
||||||
|
# Haversine formula
|
||||||
|
a = np.sin(dlat / 2.0) ** 2 + np.cos(lat1) * np.cos(lat2) * np.sin(dlon / 2.0) ** 2
|
||||||
|
c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))
|
||||||
|
distance = R * c
|
||||||
|
return distance
|
||||||
|
|
||||||
# Tier definitions
|
# Tier definitions
|
||||||
# Tier 1: We have a property in the same postal region and same archetype that was surveyed and is below EPC D
|
# Tier 1: We have a property in the same postal region and same archetype that was surveyed and is below EPC D
|
||||||
# Tier 2: We have a property in the same archetype that was surveyed and is below EPC D
|
# Tier 2: We have a property in the same archetype that was surveyed and is below EPC D
|
||||||
|
|
@ -1716,6 +1756,7 @@ def propsed_wave_3_sample():
|
||||||
][["Archetype ID", "Current EPC Band"]].drop_duplicates()
|
][["Archetype ID", "Current EPC Band"]].drop_duplicates()
|
||||||
|
|
||||||
if region_surveyed["Archetype ID"].duplicated().sum():
|
if region_surveyed["Archetype ID"].duplicated().sum():
|
||||||
|
blah1
|
||||||
region_surveyed = survey_results[
|
region_surveyed = survey_results[
|
||||||
survey_results["Archetype ID"].isin(archetypes) &
|
survey_results["Archetype ID"].isin(archetypes) &
|
||||||
(survey_results["Postal Region"] == region)
|
(survey_results["Postal Region"] == region)
|
||||||
|
|
@ -1755,23 +1796,46 @@ def propsed_wave_3_sample():
|
||||||
survey_results["Archetype ID"].isin(missed_archetypes)
|
survey_results["Archetype ID"].isin(missed_archetypes)
|
||||||
][["Archetype ID", "Current EPC Band"]].drop_duplicates()
|
][["Archetype ID", "Current EPC Band"]].drop_duplicates()
|
||||||
|
|
||||||
# TODO - We could average the property?? And call it borderline, call out it was averaged!!!
|
|
||||||
# We could also find the nearest property to it, with similar wall, roof, heating?
|
|
||||||
# Can use long/lag to distance calc. We have this data from previous
|
|
||||||
|
|
||||||
if archetype_surveyed["Archetype ID"].duplicated().sum():
|
if archetype_surveyed["Archetype ID"].duplicated().sum():
|
||||||
archetype_surveyed = survey_results[
|
|
||||||
survey_results["Archetype ID"].isin(missed_archetypes)
|
|
||||||
].groupby("Archetype ID")[["Current SAP Rating"]].mean().reset_index()
|
|
||||||
archetype_surveyed["Current EPC Band"] = archetype_surveyed["Current SAP Rating"].apply(sap_to_epc)
|
|
||||||
archetype_surveyed = archetype_surveyed.drop(columns=["Current SAP Rating"])
|
|
||||||
|
|
||||||
region_assets = region_assets.merge(
|
archetype_surveyed = []
|
||||||
archetype_surveyed,
|
for arch_id in missed_archetypes:
|
||||||
on="Archetype ID",
|
for _, property in region_assets[region_assets["Archetype ID"] == arch_id].iterrows():
|
||||||
how="left",
|
archetype_data = survey_results_with_original_features[
|
||||||
suffixes=("", "_method2")
|
survey_results["Archetype ID"] == arch_id
|
||||||
)
|
].copy()
|
||||||
|
if archetype_data.empty:
|
||||||
|
continue
|
||||||
|
archetype_data["distance_meters"] = haversine(
|
||||||
|
lat1=property.latitude, lon1=property.longitude,
|
||||||
|
lat2=archetype_data["latitude"].values, lon2=archetype_data["longitude"].values
|
||||||
|
)
|
||||||
|
expected_sap = np.average(
|
||||||
|
archetype_data["Current SAP Rating"], weights=1 / (archetype_data["distance_meters"] + 1)
|
||||||
|
)
|
||||||
|
expected_epc = sap_to_epc(expected_sap)
|
||||||
|
archetype_surveyed.append(
|
||||||
|
{
|
||||||
|
"Archetype ID": arch_id,
|
||||||
|
"Address ID": property["Address ID"],
|
||||||
|
"Current EPC Band": expected_epc
|
||||||
|
}
|
||||||
|
)
|
||||||
|
archetype_surveyed = pd.DataFrame(archetype_surveyed)
|
||||||
|
region_assets = region_assets.merge(
|
||||||
|
archetype_surveyed,
|
||||||
|
on=["Archetype ID", "Address ID"],
|
||||||
|
how="left",
|
||||||
|
suffixes=("", "_method2")
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
|
||||||
|
region_assets = region_assets.merge(
|
||||||
|
archetype_surveyed,
|
||||||
|
on="Archetype ID",
|
||||||
|
how="left",
|
||||||
|
suffixes=("", "_method2")
|
||||||
|
)
|
||||||
|
|
||||||
region_assets["Confidence Tier"] = np.where(
|
region_assets["Confidence Tier"] = np.where(
|
||||||
region_assets["Current EPC Band_method2"].isin(["D", "E", "F", "G"]) & pd.isnull(
|
region_assets["Current EPC Band_method2"].isin(["D", "E", "F", "G"]) & pd.isnull(
|
||||||
|
|
@ -1792,6 +1856,16 @@ def propsed_wave_3_sample():
|
||||||
"5 - EPC C or above", region_assets["Confidence Tier"]
|
"5 - EPC C or above", region_assets["Confidence Tier"]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
region_assets["Confidence Tier"] = np.where(
|
||||||
|
region_assets["Archetype ID"] == "EPC C OR ABOVE",
|
||||||
|
"5 - EPC C or above", region_assets["Confidence Tier"]
|
||||||
|
)
|
||||||
|
|
||||||
|
region_assets["Current EPC Band"] = np.where(
|
||||||
|
region_assets["Archetype ID"] == "EPC C OR ABOVE",
|
||||||
|
"C", region_assets["Current EPC Band"]
|
||||||
|
)
|
||||||
|
|
||||||
missed_addressids = region_assets[pd.isnull(region_assets["Confidence Tier"])]["Address ID"].unique().tolist()
|
missed_addressids = region_assets[pd.isnull(region_assets["Confidence Tier"])]["Address ID"].unique().tolist()
|
||||||
|
|
||||||
if not missed_addressids:
|
if not missed_addressids:
|
||||||
|
|
@ -1803,17 +1877,10 @@ def propsed_wave_3_sample():
|
||||||
for a_id in missed_addressids:
|
for a_id in missed_addressids:
|
||||||
property = asset_list[asset_list["Address ID"] == a_id].squeeze()
|
property = asset_list[asset_list["Address ID"] == a_id].squeeze()
|
||||||
|
|
||||||
if property["Property Type"].split(":")[0] in ["House", "Bungalow"]:
|
surveyed = survey_results_with_original_features[
|
||||||
filter_property_types = ["House", "Bungalow"]
|
|
||||||
else:
|
|
||||||
filter_property_types = ["Flat"]
|
|
||||||
|
|
||||||
surveyed_similar = survey_results_with_original_features[
|
|
||||||
(survey_results_with_original_features["Postcode"] == property["Postcode"]) &
|
|
||||||
(
|
(
|
||||||
survey_results_with_original_features["Property Type"].str.split(":").str[0].isin(
|
survey_results_with_original_features["Property Type"].str.split(":").str[0] ==
|
||||||
filter_property_types
|
property["Property Type"].split(":")[0]
|
||||||
)
|
|
||||||
) &
|
) &
|
||||||
(
|
(
|
||||||
survey_results_with_original_features["Wall Type"].str.split(":").str[0] ==
|
survey_results_with_original_features["Wall Type"].str.split(":").str[0] ==
|
||||||
|
|
@ -1827,62 +1894,38 @@ def propsed_wave_3_sample():
|
||||||
survey_results_with_original_features["Heating"].str.split(":").str[0] ==
|
survey_results_with_original_features["Heating"].str.split(":").str[0] ==
|
||||||
property["Heating"].split(":")[0]
|
property["Heating"].split(":")[0]
|
||||||
)
|
)
|
||||||
]
|
].copy()
|
||||||
if surveyed_similar.empty:
|
|
||||||
surveyed_similar = survey_results_with_original_features[
|
|
||||||
(survey_results_with_original_features["Postal Region"] == property["Postal Region"]) &
|
|
||||||
(survey_results_with_original_features["Property Type"].str.split(":").str[0].isin(
|
|
||||||
filter_property_types
|
|
||||||
)) &
|
|
||||||
(survey_results_with_original_features["Wall Type"].str.split(":").str[0] ==
|
|
||||||
property["Wall Type"].split(":")[0]) &
|
|
||||||
(survey_results_with_original_features["Roof Type"].str.split(":").str[0] ==
|
|
||||||
property["Roof Type"].split(":")[0]) &
|
|
||||||
(survey_results_with_original_features["Heating"].str.split(":").str[0] ==
|
|
||||||
property["Heating"].split(":")[0])
|
|
||||||
]
|
|
||||||
|
|
||||||
if surveyed_similar.empty:
|
if surveyed.empty:
|
||||||
|
blah3
|
||||||
|
|
||||||
# We get an average based on the postcode
|
# Calculate distance
|
||||||
surveyed_similar = survey_results_with_original_features[
|
surveyed["distance_meters"] = haversine(
|
||||||
(survey_results_with_original_features["Postal Region"] == property["Postal Region"]) &
|
lat1=property["latitude"], lon1=property["longitude"],
|
||||||
(survey_results_with_original_features["Property Type"].str.split(":").str[0].isin(
|
lat2=surveyed["latitude"].values, lon2=surveyed["longitude"].values
|
||||||
filter_property_types
|
)
|
||||||
))
|
surveyed = surveyed.sort_values("distance_meters", ascending=True)
|
||||||
]
|
|
||||||
if surveyed_similar.empty:
|
|
||||||
final_missed_matches.append(
|
|
||||||
{
|
|
||||||
"Address ID": a_id,
|
|
||||||
"Confidence Tier": "4 - no similar property, needs survey to confirm",
|
|
||||||
"Current EPC Band": "Unknown"
|
|
||||||
}
|
|
||||||
|
|
||||||
)
|
# Check if we have a postcode match check if surveyed postcode is the same as the property postcode
|
||||||
else:
|
if any(surveyed["Postcode"] == property["Postcode"]):
|
||||||
expected_sap = surveyed_similar["Current SAP Rating"].mean()
|
surveyed_similar = surveyed[surveyed["Postcode"] == property["Postcode"]]
|
||||||
expected_epc = sap_to_epc(expected_sap)
|
|
||||||
if expected_epc in ["C", "B", "A"]:
|
|
||||||
tier = "5 - EPC C or above"
|
|
||||||
else:
|
|
||||||
tier = "3 - similar property, relaxed conditions"
|
|
||||||
|
|
||||||
final_missed_matches.append(
|
if any(surveyed["Postal Region"] == property["Postal Region"]):
|
||||||
{
|
surveyed_similar = surveyed[surveyed["Postal Region"] == property["Postal Region"]]
|
||||||
"Address ID": a_id,
|
|
||||||
"Confidence Tier": tier,
|
# Take the 5 nearest
|
||||||
"Current EPC Band": expected_epc
|
surveyed_similar = surveyed_similar.head(5)
|
||||||
}
|
|
||||||
)
|
# perform a weighted mean of SAP rating - the closer the better
|
||||||
continue
|
expected_sap = np.average(
|
||||||
# We take an average
|
surveyed_similar["Current SAP Rating"], weights=1 / (surveyed_similar["distance_meters"] + 1)
|
||||||
expected_sap = surveyed_similar["Current SAP Rating"].mean()
|
)
|
||||||
expected_epc = sap_to_epc(expected_sap)
|
expected_epc = sap_to_epc(expected_sap)
|
||||||
|
|
||||||
if expected_epc in ["C", "B", "A"]:
|
if expected_epc in ["C", "B", "A"]:
|
||||||
tier = "5 - EPC C or above"
|
tier = "5 - EPC C or above"
|
||||||
else:
|
else:
|
||||||
tier = "3 - similar property"
|
tier = "3 - similar property, weighted on distance"
|
||||||
|
|
||||||
final_missed_matches.append(
|
final_missed_matches.append(
|
||||||
{
|
{
|
||||||
|
|
@ -1891,6 +1934,121 @@ def propsed_wave_3_sample():
|
||||||
"Current EPC Band": expected_epc
|
"Current EPC Band": expected_epc
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
continue
|
||||||
|
|
||||||
|
# if property["Property Type"].split(":")[0] in ["House", "Bungalow"]:
|
||||||
|
# filter_property_types = ["House", "Bungalow"]
|
||||||
|
# else:
|
||||||
|
# filter_property_types = ["Flat"]
|
||||||
|
#
|
||||||
|
# surveyed_similar = survey_results_with_original_features[
|
||||||
|
# (survey_results_with_original_features["Postcode"] == property["Postcode"]) &
|
||||||
|
# (
|
||||||
|
# survey_results_with_original_features["Property Type"].str.split(":").str[0].isin(
|
||||||
|
# filter_property_types
|
||||||
|
# )
|
||||||
|
# ) &
|
||||||
|
# (
|
||||||
|
# survey_results_with_original_features["Wall Type"].str.split(":").str[0] ==
|
||||||
|
# property["Wall Type"].split(":")[0]
|
||||||
|
# ) &
|
||||||
|
# (
|
||||||
|
# survey_results_with_original_features["Roof Type"].str.split(":").str[0] ==
|
||||||
|
# property["Roof Type"].split(":")[0]
|
||||||
|
# ) &
|
||||||
|
# (
|
||||||
|
# survey_results_with_original_features["Heating"].str.split(":").str[0] ==
|
||||||
|
# property["Heating"].split(":")[0]
|
||||||
|
# )
|
||||||
|
# ]
|
||||||
|
# if surveyed_similar.empty:
|
||||||
|
# surveyed_similar = survey_results_with_original_features[
|
||||||
|
# (survey_results_with_original_features["Postal Region"] == property["Postal Region"]) &
|
||||||
|
# (survey_results_with_original_features["Property Type"].str.split(":").str[0].isin(
|
||||||
|
# filter_property_types
|
||||||
|
# )) &
|
||||||
|
# (survey_results_with_original_features["Wall Type"].str.split(":").str[0] ==
|
||||||
|
# property["Wall Type"].split(":")[0]) &
|
||||||
|
# (survey_results_with_original_features["Roof Type"].str.split(":").str[0] ==
|
||||||
|
# property["Roof Type"].split(":")[0]) &
|
||||||
|
# (survey_results_with_original_features["Heating"].str.split(":").str[0] ==
|
||||||
|
# property["Heating"].split(":")[0])
|
||||||
|
# ]
|
||||||
|
#
|
||||||
|
# if surveyed_similar.empty:
|
||||||
|
#
|
||||||
|
# # We get an average based on the postcode
|
||||||
|
# surveyed_similar = survey_results_with_original_features[
|
||||||
|
# (survey_results_with_original_features["Postal Region"] == property["Postal Region"]) &
|
||||||
|
# (survey_results_with_original_features["Property Type"].str.split(":").str[0].isin(
|
||||||
|
# filter_property_types
|
||||||
|
# ))
|
||||||
|
# ]
|
||||||
|
# if surveyed_similar.empty:
|
||||||
|
# surveyed_similar_entire_population = survey_results_with_original_features[
|
||||||
|
# (
|
||||||
|
# survey_results_with_original_features["Property Type"].str.split(":").str[0] == property[
|
||||||
|
# "Property Type"].split(":")[0]
|
||||||
|
# ) &
|
||||||
|
# (
|
||||||
|
# survey_results_with_original_features["Wall Type"].str.split(":").str[0] ==
|
||||||
|
# property["Wall Type"].split(":")[0]
|
||||||
|
# ) &
|
||||||
|
# (
|
||||||
|
# survey_results_with_original_features["Roof Type"].str.split(":").str[0] ==
|
||||||
|
# property["Roof Type"].split(":")[0]
|
||||||
|
# ) &
|
||||||
|
# (
|
||||||
|
# survey_results_with_original_features["Heating"].str.split(":").str[0] ==
|
||||||
|
# property["Heating"].split(":")[0]
|
||||||
|
# )
|
||||||
|
# ]
|
||||||
|
#
|
||||||
|
# # We order them by distance on postcode
|
||||||
|
#
|
||||||
|
# # Average
|
||||||
|
# expected_sap = surveyed_similar_entire_population["Current SAP Rating"].mean()
|
||||||
|
# expected_epc = sap_to_epc(expected_sap)
|
||||||
|
#
|
||||||
|
# final_missed_matches.append(
|
||||||
|
# {
|
||||||
|
# "Address ID": a_id,
|
||||||
|
# "Confidence Tier": "3 - similar property, all areas searched",
|
||||||
|
# "Current EPC Band": expected_epc
|
||||||
|
# }
|
||||||
|
#
|
||||||
|
# )
|
||||||
|
# else:
|
||||||
|
# expected_sap = surveyed_similar["Current SAP Rating"].mean()
|
||||||
|
# expected_epc = sap_to_epc(expected_sap)
|
||||||
|
# if expected_epc in ["C", "B", "A"]:
|
||||||
|
# tier = "5 - EPC C or above"
|
||||||
|
# else:
|
||||||
|
# tier = "3 - similar property, relaxed conditions"
|
||||||
|
#
|
||||||
|
# final_missed_matches.append(
|
||||||
|
# {
|
||||||
|
# "Address ID": a_id,
|
||||||
|
# "Confidence Tier": tier,
|
||||||
|
# "Current EPC Band": expected_epc
|
||||||
|
# }
|
||||||
|
# )
|
||||||
|
# continue
|
||||||
|
# # We take an average
|
||||||
|
# expected_sap = surveyed_similar["Current SAP Rating"].mean()
|
||||||
|
# expected_epc = sap_to_epc(expected_sap)
|
||||||
|
# if expected_epc in ["C", "B", "A"]:
|
||||||
|
# tier = "5 - EPC C or above"
|
||||||
|
# else:
|
||||||
|
# tier = "3 - similar property"
|
||||||
|
#
|
||||||
|
# final_missed_matches.append(
|
||||||
|
# {
|
||||||
|
# "Address ID": a_id,
|
||||||
|
# "Confidence Tier": tier,
|
||||||
|
# "Current EPC Band": expected_epc
|
||||||
|
# }
|
||||||
|
# )
|
||||||
|
|
||||||
final_missed_matches = pd.DataFrame(final_missed_matches)
|
final_missed_matches = pd.DataFrame(final_missed_matches)
|
||||||
|
|
||||||
|
|
@ -1928,27 +2086,33 @@ def propsed_wave_3_sample():
|
||||||
|
|
||||||
# We create the gain and loss columns
|
# We create the gain and loss columns
|
||||||
# Gain is the sum of these columns:
|
# Gain is the sum of these columns:
|
||||||
# '1 - Archetype surveyed', '1 - property was surveyed',
|
# '1 - Archetype surveyed',
|
||||||
# '2 - same archetype', '3 - similar property',
|
# '1 - property was surveyed',
|
||||||
|
# '2 - same archetype',
|
||||||
|
# '3 - similar property',
|
||||||
|
# '3 - similar property, all areas searched',
|
||||||
|
# '3 - similar property, relaxed conditions'
|
||||||
|
#
|
||||||
# Loss is the sum of these columns:
|
# Loss is the sum of these columns:
|
||||||
# '4 - no similar property, needs survey to confirm',
|
# '4 - no similar property, needs survey to confirm',
|
||||||
# '5 - EPC C or above', '5 - property was surveyed'
|
# '5 - EPC C or above', '5 - property was surveyed'
|
||||||
geographic_summary["Gain"] = geographic_summary[
|
geographic_summary["Gain"] = geographic_summary[
|
||||||
['1 - Archetype surveyed', '1 - property was surveyed', '2 - same archetype', '3 - similar property']
|
[
|
||||||
|
'1 - Archetype surveyed', '1 - property was surveyed', '2 - same archetype', '3 - similar property',
|
||||||
|
'3 - similar property, all areas searched', '3 - similar property, relaxed conditions'
|
||||||
|
]
|
||||||
].sum(axis=1)
|
].sum(axis=1)
|
||||||
|
|
||||||
geographic_summary["Loss"] = geographic_summary[
|
geographic_summary["Loss"] = geographic_summary[
|
||||||
['4 - no similar property, needs survey to confirm', '5 - EPC C or above', '5 - property was surveyed']
|
['5 - EPC C or above', '5 - property was surveyed']
|
||||||
].sum(axis=1)
|
].sum(axis=1)
|
||||||
|
|
||||||
geographic_summary.sum()
|
print(geographic_summary.sum())
|
||||||
|
|
||||||
geographic_summary = geographic_summary.sort_values("Loss", ascending=True)
|
geographic_summary = geographic_summary.sort_values("Loss", ascending=True)
|
||||||
geographic_summary["Loss Cumulative Sum"] = geographic_summary["Loss"].cumsum()
|
geographic_summary["Loss Cumulative Sum"] = geographic_summary["Loss"].cumsum()
|
||||||
geographic_summary[geographic_summary["Loss Cumulative Sum"] <= 250]["Gain"].sum()
|
geographic_summary[geographic_summary["Loss Cumulative Sum"] <= 250]["Gain"].sum()
|
||||||
|
|
||||||
geographic_summary[["Loss", "Gain"]].head()
|
|
||||||
|
|
||||||
loss = geographic_summary["Loss"].values
|
loss = geographic_summary["Loss"].values
|
||||||
gain = geographic_summary["Gain"].values
|
gain = geographic_summary["Gain"].values
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue