Model/etl/customers/gla_croydon_demo/asset_list.py
2024-04-26 14:06:48 +01:00

208 lines
8.5 KiB
Python

import pandas as pd
from utils.s3 import save_csv_to_s3
USER_ID = 8
PORTFOLIO_ID = 67
archetype_1_uprns = [100020604138, 200001188299, 100020578756, 200001187196, 200001192253, 100020581792, 200001188304,
100020625813, 100020618060, 100020585305, 100020617489, 100020615039, 100020618076, 100020588913,
200001187197, 100020671205, 100020576940, 100020619814, 100020576472, 100020618083]
archetype_2_uprns = [100020698027, 10001007455, 100020653785, 10090383198, 100020665632, 100020620659, 100020615603,
100020609610, 100020625597, 100020665656, 100020665640, 100020587905, 100020665630, 100020624351,
100020625451, 100020624348, 100020666735, 100020653786, 100020576458, 100020657902, 100020624350,
100020637405, 100020666734, 100020616325, 100020666716, 100020653783, 100020665645, 100020642337,
100020665638, 100022904981, 100020688226, 100020630285, 100020626800, 100020665634, 100022907528,
100020665652, 100020624347, 100020666721, 100020585002, 10014055968, 10001008257, 100020621438,
100020576459, 100020665643, 100020665654, 100022917303]
archetype_3_uprns = [100020577523, 100020616446, 100020605342, 100020594652, 100020585394, 100020601138, 100020597485,
100020614883, 100020633162, 100020697787, 200001185785, 100020646842, 100020581449, 100020595611,
100020641814, 100020575611, 100020652986, 100020654671, 100020647336, 100020610518, 100020607980,
100020692380, 100020581690]
archetype_4_uprns = [100020650603, 100020582907, 100020605116, 100020650607, 100020589325, 100020655500, 100020642537,
200001187539, 100020631683, 100020610165, 100020596436, 100020598277, 100020660228]
def app():
"""
We shall define a small portfolio of properties, based in Croydon
:return:
"""
# Firstly, read in the EPC data for Croydon
epc_data = pd.read_csv(
"local_data/all-domestic-certificates/domestic-E09000008-Croydon/certificates.csv",
low_memory=False
)
# Filter on entries where we have a UPRN
epc_data = epc_data[~pd.isnull(epc_data["UPRN"])]
# Get the newest EPC for each UPRN. We use LODGEMENT_DATE as a proxy for this
epc_data["LODGEMENT_DATE"] = pd.to_datetime(epc_data["LODGEMENT_DATE"])
epc_data = epc_data.sort_values("LODGEMENT_DATE", ascending=False).drop_duplicates("UPRN")
# Now filter on social properties
epc_data = epc_data[epc_data["TENURE"].isin(["rental (social)", "Rented (social)"])]
# There are 17337 properties with a registered EPC in Croydon
# Take below EPC C properties
epc_data = epc_data[epc_data["CURRENT_ENERGY_EFFICIENCY"].astype(int) < 69]
# 7994 properties are below EPC C (46%)
# 79% D, 19% E, 1% F, 0.2% G - it probably makes the most sense to focus on E and D properties
epc_data["CURRENT_ENERGY_RATING"].value_counts(normalize=True)
# For the purpose of the sample, take the properties have surveys done in the last 3 years
# This gives us 1351 remaining properties
three_years_ago = pd.Timestamp.now() - pd.DateOffset(days=int(3 * 365))
epc_data = epc_data[epc_data["LODGEMENT_DATE"] >= three_years_ago]
# Archetype 1: defined below:
# 1) House
# 2) Unfilled cavity
# 3) A roof that could be insulated (flat or pitched with no more than 50mm insulation)
# 4) EPC E or D
# 24 properties
archetype_1_sample = epc_data[
epc_data["PROPERTY_TYPE"].isin(["House"]) &
(epc_data["CURRENT_ENERGY_RATING"].isin(["D", "E"])) &
epc_data["WALLS_DESCRIPTION"].isin(["Cavity wall, as built, no insulation (assumed)"]) &
epc_data["ROOF_DESCRIPTION"].isin(
[
"Pitched, 12 mm loft insulation",
"Pitched, 0 mm loft insulation",
"Pitched, no insulation",
"Pitched, 50 mm loft insulation",
"Flat, no insulation (assumed)",
"Pitched, no insulation (assumed)"
]
)
]
archetype_1_sample_asset_list = archetype_1_sample[["UPRN", "ADDRESS1", "POSTCODE"]].copy()
archetype_1_sample_asset_list["ARCHETYPE"] = "Archetype 1"
# Archetype 2: defined below:
# 1) Flat
# 2) Unfilled cavity
# 3) Another property above
# 4) EPC E
# 57 properties here
archetype_2_sample = epc_data[
epc_data["PROPERTY_TYPE"].isin(["Flat"]) &
(epc_data["CURRENT_ENERGY_RATING"].isin(["E", "D"])) &
epc_data["WALLS_DESCRIPTION"].isin(["Cavity wall, as built, no insulation (assumed)"]) &
epc_data["ROOF_DESCRIPTION"].isin(
[
"(another dwelling above)"
]
)
]
archetype_2_sample_asset_list = archetype_2_sample[["UPRN", "ADDRESS1", "POSTCODE"]].copy()
archetype_2_sample_asset_list["ARCHETYPE"] = "Archetype 2"
# Archetype 3: defined below:
# 1) EPC E or below
# 2) Solid brick wall
# 3) House
# 4) Pitched roof with no insulation
# Just 7 properties (more expensive to retrofit)
archetype_3_sample = epc_data[
epc_data["PROPERTY_TYPE"].isin(["House"]) &
(epc_data["CURRENT_ENERGY_RATING"].isin(["E", "F", "G"])) &
epc_data["WALLS_DESCRIPTION"].isin(["Solid brick, as built, no insulation (assumed)"]) &
epc_data["ROOF_DESCRIPTION"].isin(
[
"Pitched, no insulation",
"Pitched, limited insulation (assumed)",
"Pitched, 100 mm loft insulation",
"Pitched, no insulation (assumed)",
]
)
]
archetype_3_sample_asset_list = archetype_3_sample[["UPRN", "ADDRESS1", "POSTCODE"]].copy()
archetype_3_sample_asset_list["ARCHETYPE"] = "Archetype 3"
# Archetype 4: defined below:
# 1) Maisonette
# 2) Empty cavity
# 3) EPC E
# 16 properties here
archetype_4_sample = epc_data[
epc_data["PROPERTY_TYPE"].isin(["Maisonette"]) &
epc_data["WALLS_DESCRIPTION"].isin(
["Cavity wall, as built, no insulation (assumed)"]
)
]
archetype_4_sample_asset_list = archetype_4_sample[["UPRN", "ADDRESS1", "POSTCODE"]].copy()
archetype_4_sample_asset_list["ARCHETYPE"] = "Archetype 4"
asset_list = pd.concat(
[
archetype_1_sample_asset_list,
archetype_2_sample_asset_list,
archetype_3_sample_asset_list,
archetype_4_sample_asset_list
]
)
asset_list = asset_list.rename(
columns={
"UPRN": "uprn",
"ADDRESS1": "address",
"POSTCODE": "postcode",
"ARCHETYPE": "archetype"
}
)
asset_list["uprn"] = asset_list["uprn"].astype(int)
# We end up with some properties that are currently an EPC C, but we do not have this data in the download, so we
# manually remove
# 1) 3 Reid Close, CR5 3BL
# 2) Flat 6, Collier Court 2A, St. Peters Road CR0 1HD
asset_list = asset_list[
~asset_list["uprn"].isin(
[
100020576460,
100020624352,
]
)
]
# We have slightly too many properties, so we take a random sample of each archetype
# achetype_1_size = 20
# achetype_2_size = 46
# achetype_3_size = 23
# achetype_4_size = 13
# archetype_1_uprns = asset_list[asset_list["archetype"] == "Archetype 1"]["uprn"].sample(
# int(achetype_1_size)
# ).tolist()
# archetype_2_uprns = asset_list[asset_list["archetype"] == "Archetype 2"]["uprn"].sample(
# int(achetype_2_size)
# ).tolist()
# archetype_3_uprns = asset_list[asset_list["archetype"] == "Archetype 3"]["uprn"].sample(
# int(achetype_3_size)
# ).tolist()
# archetype_4_uprns = asset_list[asset_list["archetype"] == "Archetype 4"]["uprn"].sample(
# int(achetype_4_size)
# ).tolist()
uprns_to_keep = archetype_1_uprns + archetype_2_uprns + archetype_3_uprns + archetype_4_uprns
asset_list = asset_list[asset_list["uprn"].isin(uprns_to_keep)]
filename = f"{USER_ID}/{PORTFOLIO_ID}/inputs.csv"
save_csv_to_s3(
dataframe=asset_list,
bucket_name="retrofit-plan-inputs-dev",
file_name=filename
)
body = {
"portfolio_id": str(PORTFOLIO_ID),
"housing_type": "Social",
"goal": "Increase EPC",
"goal_value": "C",
"trigger_file_path": filename,
"budget": None,
"exclusions": ["floor_insulation"]
}
print(body)