Added representative SAP

This commit is contained in:
Khalim Conn-Kowlessar 2024-07-23 14:10:53 +01:00
parent 0df3394c6c
commit 3977e911ec

View file

@ -1992,6 +1992,13 @@ def updated_version():
priority_postcodes, previous_waves_address_id, master_sheet = read_stonewater_asset_data()
# Pull in the EPC data
epc_data = read_epc_data(uprn_lookup_2)
########################################################################
# Prepare the data
########################################################################
# Filter the asset list down to the priority postcodes
asset_list["is_priority_postcode"] = asset_list["postcode"].isin(priority_postcodes)
@ -2012,8 +2019,17 @@ def updated_version():
right_on="Address ID"
)
asset_list = asset_list.merge(
epc_data[["internal_id", "current-energy-efficiency", "lodgement-date", "estimated"]],
how="left",
on="internal_id"
)
asset_list["days_since_lodgement_epc"] = (
datetime.now() - pd.to_datetime(asset_list["lodgement-date"], errors="coerce", dayfirst=True)
).dt.days
# Flag properties that were surveyed within the last 5 years
asset_list["epc_within_5_years"] = asset_list["days_since_lodgement"] < 5 * 365
asset_list["epc_within_5_years"] = asset_list["days_since_lodgement_epc"] < 5 * 365
# Identify properties where they've had an EPC done within the last 5 years, where the SAP rating is already
# a EPC C. Alternatively, any property with an EPC rating of 80 or above is also considered, regardless of when
@ -2027,9 +2043,9 @@ def updated_version():
asset_list["is_priority_postcode"] & ~asset_list["In Osmosis Wave 2.1"] & ~asset_list["is_epc_c_or_above"]
][
[
"internal_id", "customer_asset_id", "postcode", "house_number", "address1", "address2", "city_town",
"county", "external_address_id", "owner", "days_since_lodgement", "Lodgement Date", "epc_within_5_years",
"EPC Rating"
"internal_id", "uprn", "udprn", "customer_asset_id", "postcode", "house_number", "address1", "address2",
"city_town", "county", "external_address_id", "owner", "days_since_lodgement", "Lodgement Date",
"epc_within_5_years", "EPC Rating", "estimated", "current-energy-efficiency", "lodgement-date"
]
]
@ -2043,7 +2059,22 @@ def updated_version():
right_on="Address ID"
)
# Pull in the EPC data
# For SAP, we use the most recent EPC if epc_within_5_years is True, otherwise we use the parity modelled sap
clustering_features["current-energy-efficiency"] = clustering_features["current-energy-efficiency"].astype(float)
clustering_features["representative_sap"] = np.where(
clustering_features["epc_within_5_years"],
clustering_features["current-energy-efficiency"],
clustering_features["parity_modelled_sap"]
)
# incorect_epcs = clustering_features[
# clustering_features["EPC Rating"] != clustering_features["current-energy-efficiency"]]
# incorect_epcs = incorect_epcs[
# ~pd.isnull(incorect_epcs["current-energy-efficiency"]) & pd.isnull(incorect_epcs["estimated"])
# ]
# incorect_epcs = incorect_epcs.rename(columns={"current-energy-efficiency": "Current SAP Rating"})
# # Store data
# incorect_epcs.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Stonewater/Incorrect EPCs.csv", index=False)
def read_asset_list():
@ -2260,6 +2291,7 @@ def read_epc_data(uprn_lookup_2):
s3_file_name="customers/Stonewater/clustering/epc_data_batch_2.pkl",
bucket_name="retrofit-data-dev"
)
epc_data_batch_2 = pd.DataFrame(epc_data_batch_2)
complete_epcs = pd.concat([epc_data, epc_data_batch_2])