set up first basic asset list for gla demo

This commit is contained in:
Khalim Conn-Kowlessar 2024-03-27 18:12:57 +00:00
parent c81b03c458
commit dbeba4db43
2 changed files with 232 additions and 22 deletions

View file

@ -0,0 +1,145 @@
import pandas as pd
from utils.s3 import save_csv_to_s3
USER_ID = 8
PORTFOLIO_ID = 67
def app():
"""
We shall define a small portfolio of properties, based in Croydon
:return:
"""
# Firstly, read in the EPC data for Croydon
epc_data = pd.read_csv(
"local_data/all-domestic-certificates/domestic-E09000008-Croydon/certificates.csv",
low_memory=False
)
# Filter on entries where we have a UPRN
epc_data = epc_data[~pd.isnull(epc_data["UPRN"])]
# Get the newest EPC for each UPRN. We use LODGEMENT_DATE as a proxy for this
epc_data["LODGEMENT_DATE"] = pd.to_datetime(epc_data["LODGEMENT_DATE"])
epc_data = epc_data.sort_values("LODGEMENT_DATE", ascending=False).drop_duplicates("UPRN")
# Now filter on social properties
epc_data = epc_data[epc_data["TENURE"].isin(["rental (social)", "Rented (social)"])]
# There are 17337 properties with a registered EPC in Croydon
# Take below EPC C properties
epc_data = epc_data[epc_data["CURRENT_ENERGY_EFFICIENCY"].astype(int) < 69]
# 7994 properties are below EPC C (46%)
# 79% D, 19% E, 1% F, 0.2% G - it probably makes the most sense to focus on E and D properties
epc_data["CURRENT_ENERGY_RATING"].value_counts(normalize=True)
# For the purpose of the sample, take the properties have surveys done in the last 2 years
# This gives us 1023 remaining properties
two_years_ago = pd.Timestamp.now() - pd.DateOffset(days=int(2.5 * 365))
epc_data = epc_data[epc_data["LODGEMENT_DATE"] >= two_years_ago]
# Archetype 1: defined below:
# 1) House
# 2) Unfilled cavity
# 3) A roof that could be insulated (flat or pitched with no more than 50mm insulation)
# 4) EPC E
# Different buckets of properties
archetype_1_sample = epc_data[
epc_data["PROPERTY_TYPE"].isin(["House"]) &
(epc_data["CURRENT_ENERGY_RATING"] == "E") &
epc_data["WALLS_DESCRIPTION"].isin(["Cavity wall, as built, no insulation (assumed)"]) &
epc_data["ROOF_DESCRIPTION"].isin(
[
"Pitched, 12 mm loft insulation",
"Pitched, 0 mm loft insulation",
"Pitched, no insulation",
"Pitched, 50 mm loft insulation",
"Flat, no insulation (assumed)",
"Pitched, no insulation (assumed)"
]
)
]
archetype_1_sample_asset_list = archetype_1_sample[["UPRN", "ADDRESS1", "POSTCODE"]].copy()
archetype_1_sample_asset_list["ARCHETYPE"] = "Archetype 1"
# Archetype 2: defined below:
# 1) Flat
# 2) Unfilled cavity
# 3) Another property above
# 4) EPC E
archetype_2_sample = epc_data[
epc_data["PROPERTY_TYPE"].isin(["Flat"]) &
(epc_data["CURRENT_ENERGY_RATING"] == "E") &
epc_data["WALLS_DESCRIPTION"].isin(["Cavity wall, as built, no insulation (assumed)"]) &
epc_data["ROOF_DESCRIPTION"].isin(
[
"(another dwelling above)"
]
)
]
archetype_2_sample_asset_list = archetype_2_sample[["UPRN", "ADDRESS1", "POSTCODE"]].copy()
archetype_2_sample_asset_list["ARCHETYPE"] = "Archetype 2"
# Archetype 3: defined below:
# 1) EPC F
# 2) Solid brick wall
# 3) House
# 4) Pitched roof with no insulation
# Just 1 property (more expensive to retrofit)
archetype_3_sample = epc_data[
epc_data["PROPERTY_TYPE"].isin(["House"]) &
(epc_data["CURRENT_ENERGY_RATING"] == "F") &
epc_data["ROOF_DESCRIPTION"].isin(["Pitched, no insulation"])
]
archetype_3_sample_asset_list = archetype_3_sample[["UPRN", "ADDRESS1", "POSTCODE"]].copy()
archetype_3_sample_asset_list["ARCHETYPE"] = "Archetype 3"
# Archetype 4: defined below:
# 1) Maisonette
# 2) Empty cavity
# 3) EPC E
# 14 properties here
archetype_4_sample = epc_data[
epc_data["PROPERTY_TYPE"].isin(["Maisonette"]) &
epc_data["WALLS_DESCRIPTION"].isin(["Cavity wall, as built, no insulation (assumed)"])
]
archetype_4_sample_asset_list = archetype_4_sample[["UPRN", "ADDRESS1", "POSTCODE"]].copy()
archetype_4_sample_asset_list["ARCHETYPE"] = "Archetype 4"
asset_list = pd.concat(
[
archetype_1_sample_asset_list,
archetype_2_sample_asset_list,
archetype_3_sample_asset_list,
archetype_4_sample_asset_list
]
)
asset_list = asset_list.rename(
columns={
"UPRN": "uprn",
"ADDRESS1": "address",
"POSTCODE": "postcode",
"ARCHETYPE": "archetype"
}
)
filename = f"{USER_ID}/{PORTFOLIO_ID}/inputs.csv"
save_csv_to_s3(
dataframe=asset_list,
bucket_name="retrofit-plan-inputs-dev",
file_name=filename
)
body = {
"portfolio_id": str(PORTFOLIO_ID),
"housing_type": "Social",
"goal": "Increase EPC",
"goal_value": "C",
"trigger_file_path": filename,
"budget": None,
"exclusions": ["floor_insulation"]
}
print(body)

View file

@ -6692,6 +6692,92 @@ def create_final_report():
revenue.to_csv("HA Analysis Final - revenue.csv")
def identify_eco_works(loader):
# ha_names = [
# "HA16", # For Housing
# "HA39", # Rooftop
# "HA41", # Settle
# "HA23", # Lambeth
# "HA14", # EMH
# "HA7", # Believe
# "HA102", # Thrive
# ]
# Unitas, fairhive, acis, LHP
ha_names = [
"HA50", # Unitas
"HA15", # Fairhive
"HA107", # ACIS
"HA24", # LHP
]
names = {
"HA50": "Unitas",
"HA15": "Fairhive",
"HA107": "ACIS",
"HA24": "LHP"
}
# gbis rate
breakdowns = []
# lists = {}
for ha, data_assets in loader.data.items():
if ha not in ha_names:
continue
asset_list = data_assets["asset_list"].copy()
survey_list = data_assets["survey_list"].copy()
# Remove things that have sold
if not survey_list.empty:
asset_list = asset_list.merge(
survey_list[["asset_list_row_id", "installation_status"]],
how="left",
on="asset_list_row_id"
)
# Anything that has an installation has gone to installation, and therefore is not remaining
asset_list = asset_list[pd.isnull(asset_list["installation_status"])]
asset_list = asset_list.drop(columns=["installation_status"])
# Needing a CIGA check
needs_cga = asset_list[
asset_list["ECO Eligibility"] == "eco4 (subject to ciga)"
].copy()
eco4 = asset_list[
asset_list["ECO Eligibility"] == "eco4"
].copy()
eco4_passed_ciga = asset_list[
asset_list["ECO Eligibility"] == "eco4 - passed ciga"
].copy()
# lists[ha] = {
# "needs_cga": needs_cga,
# "eco4": eco4,
# "eco4_passed_ciga": eco4_passed_ciga
# }
# Store the data
if not needs_cga.empty:
needs_cga.to_csv(f"local_data/{names[ha]} - needs ciga.csv")
if not eco4.empty:
eco4.to_csv(f"local_data/{names[ha]} - eco4.csv")
if not eco4_passed_ciga.empty:
eco4_passed_ciga.to_csv(f"local_data/{names[ha]} - eco4 passed ciga.csv")
summary = {
"HA Name": ha,
"n_needing_ciga": needs_cga.shape[0],
"eco4": eco4.shape[0],
"eco4_passed_ciga": eco4_passed_ciga.shape[0]
}
breakdowns.append(summary)
breakdowns = pd.DataFrame(breakdowns)
breakdowns = breakdowns.fillna(0)
def app():
"""
This app contains the housin association analysis for HAs 1, 6, 14, 39 and 107.
@ -6739,29 +6825,8 @@ def app():
loader = DataLoader(directories, december_figures_filepath, use_cache, rebuild_inputs)
loader.load()
loader.ha_facts_and_figures()
forecast_remaining_sales(loader)
# gbis rate
# breakdowns = []
# for ha, data_assets in loader.data.items():
# asset_list = data_assets["asset_list"].copy()
# breakdown = asset_list["ECO Eligibility"].value_counts().to_dict()
# breakdowns.append(breakdown)
# breakdowns = pd.DataFrame(breakdowns)
#
# installer = []
# for ha, data_assets in loader.data.items():
# survey_list = data_assets["survey_list"]
# if survey_list.empty:
# continue
# if "INSTALLER" not in survey_list.columns:
# continue
#
# installers = survey_list["INSTALLER"].value_counts().to_dict()
# installers["ha_name"] = ha
# installer.append(installers)
# installer = pd.DataFrame(installer)
# installer.drop(columns=["ha_name"]).sum().sum()
forecast_remaining_sales(loader)
# Adhoc - for HA16, get the properties that still need a CIGA check
asset_list_ha16 = loader.data["HA16"]["asset_list"].copy()