Model/etl/customers/bcc_tender/app.py
2024-09-12 18:10:27 +01:00

219 lines
11 KiB
Python

"""
This script prepares some data for the Birmingham City Council tender
"""
import pandas as pd
import numpy as np
epc_data = pd.read_csv("local_data/all-domestic-certificates/domestic-E08000025-Birmingham/certificates.csv")
# Broad assumptions
# Around 67% of homes in the Uk have an EPC, to be conservative with our estimates, we round up to 70%:
# https://www.ons.gov.uk/peoplepopulationandcommunity/housing/articles/energyefficiencyofhousinginenglandandwales/2023
# However, we have 322128 homes in Birmingham with an EPC, which is 76% of the total number of homes in Birmingham
# based on the 2021 census, which put this figure at 423,500 homes
PROPORTION_OF_HOMES_WITH_AN_EPC = 0.761
N_HOUSEHOLDS_IN_BIRMINGHAM = 423_500
N_HOMES_WITHOUT_AN_EPC = 423_500 - 322128
# 55% of households are recipients of benefits in the West Midlands
# (2021/2022 - https://www.statista.com/statistics/382858/uk-state-benefits-by-region/)
PROPORTION_OF_HOMES_ON_BENEFITS = 0.55
# https://www.justgroupplc.co.uk/~/media/Files/J/Just-Retirement-Corp/news-doc/2023/six-in-10-homeowners-eligible-for
# -benefits-failing-to-claim-just-group-annual-insight-report.pdf
PROPORTION_OF_HOMEOWNERS_CLAIMING_FOR_BENEFITS = 0.106
# Breakdown of properties in council tax bands in the UK, to give us an estimate of the number of properties in A-D
band_a_proportion = 0.239
band_b_proportion = 0.195
band_c_proportion = 0.219
band_d_proportion = 0.156
COUNCIL_TAX_BAND_A_TO_D_PROPORTION = band_a_proportion + band_b_proportion + band_c_proportion + band_d_proportion
# Get the newest record, based on lodgment datetime, by uprn
epc_data["LODGEMENT_DATETIME"] = pd.to_datetime(epc_data["LODGEMENT_DATETIME"], errors="coerce")
epc_data = epc_data.sort_values(["LODGEMENT_DATETIME"], ascending=False).drop_duplicates("UPRN")
# We want to figure out the number of properties that are eligible for ECO/GBIS funding
social_tenures = ["Rented (social)", "rental (social)"]
owner_occupied_tenures = ["Owner-occupied", "owner-occupied"]
prs_tenures = ["Rented (private)", "rental (private)"]
# If social tenure, then as long as the property is EPC D-G, it's eligible
epc_data["eligibility_type"] = None
# Eligibiltiy 1: ECO4 help to heat group OO - tenure is owner occupied and EPC rating D-G
epc_data["eligibility_type"] = np.where(
(
epc_data["TENURE"].isin(owner_occupied_tenures) &
epc_data["CURRENT_ENERGY_RATING"].isin(["D", "E", "F", "G"]) &
pd.isnull(epc_data["eligibility_type"])
),
"eco4_oo_hthg_needs_scaling_on_benefits",
epc_data["eligibility_type"]
)
# Eligibility 2: ECO4 help to heat group PRS - tenure is private rental and EPC rating E-G
epc_data["eligibility_type"] = np.where(
(
epc_data["TENURE"].isin(prs_tenures) &
epc_data["CURRENT_ENERGY_RATING"].isin(["E", "F", "G"]) &
pd.isnull(epc_data["eligibility_type"])
),
"eco4_prs_hthg_needs_scaling_on_benefits",
epc_data["eligibility_type"]
)
# Eligibiltiy 3: ECO4 Social housing - tenure is social rented and EPC rating D-G
epc_data["eligibility_type"] = np.where(
(
epc_data["TENURE"].isin(social_tenures) &
epc_data["CURRENT_ENERGY_RATING"].isin(["D", "E", "F", "G"]) &
pd.isnull(epc_data["eligibility_type"])
),
"eco4_social_housing",
epc_data["eligibility_type"]
)
# Eligibility 4: GBIS General Eligibility, OO - tenure is owner occupied and EPC rating D-G
# This is a subset of Eligiblity 1. We scale eco4_oo_hthg_needs_scaling based on thhe % of properties on benefits
# For any properties left over that are deemed as not eligibile, a % of these will be eligible for GBIS via Eligibility
# 4, and therefore any properties that fall out of Eligibility 1, a % will fall into eligibility 4 based a % of units
# being in council tax bands A-D
# Eligibility 5: GBIS General Eligibility, PRS - tenure is private rental and EPC rating D-G
# Additionally, some units that fall our of Eligibility 2 will be eligible for GBIS via Eligibility 5, via the same
# mechanism as Eligibility 4. We handle this later
epc_data["eligibility_type"] = np.where(
(
epc_data["TENURE"].isin(prs_tenures) &
epc_data["CURRENT_ENERGY_RATING"].isin(["D", "E", "F", "G"]) &
pd.isnull(epc_data["eligibility_type"])
),
"gbis_prs_ge_needs_scaling_on_council_tax_band",
epc_data["eligibility_type"]
)
# Example EPCS to analysis
analysis_epcs = epc_data[~pd.isnull(epc_data["eligibility_type"])].copy()
# Keep just columns we need
analysis_epcs = analysis_epcs[
[
"UPRN", "TENURE", "CURRENT_ENERGY_RATING", "WALLS_DESCRIPTION", "ROOF_DESCRIPTION",
"CONSTRUCTION_AGE_BAND", "TOTAL_FLOOR_AREA", "PROPERTY_TYPE", "BUILT_FORM", "MAINHEAT_DESCRIPTION",
"eligibility_type", "PHOTO_SUPPLY", "ADDRESS1", "POSTCODE"
]
]
analysis_epcs["grouped_epc_band"] = np.where(
analysis_epcs["CURRENT_ENERGY_RATING"].isin(["D"]),
"EPC D",
"EPC E-G"
)
analysis_epcs[pd.isnull(analysis_epcs["PHOTO_SUPPLY"])][["ADDRESS1", "POSTCODE"]].sample(1)
analysis_epcs["PHOTO_SUPPLY"] = analysis_epcs["PHOTO_SUPPLY"].fillna(0)
analysis_epcs["PHOTO_SUPPLY"] = analysis_epcs["PHOTO_SUPPLY"].astype(float)
analysis_epcs["has_solar"] = np.where(analysis_epcs["PHOTO_SUPPLY"] > 0, 1, 0)
analysis_epcs["has_solar"].value_counts()
analysis_epcs.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/bcc tender/analysis_epcs.csv", index=False)
# Create aggregations and we store this information
agg_cols = ["CURRENT_ENERGY_RATING", "CONSTRUCTION_AGE_BAND", "PROPERTY_TYPE", "BUILT_FORM", "grouped_epc_band"]
agg_cols = ["WALLS_DESCRIPTION", "ROOF_DESCRIPTION", "MAINHEAT_DESCRIPTION"]
for col in agg_cols:
agg_df = analysis_epcs.groupby([col]).size().reset_index(name="Number of Properties")
agg_df["Percentage of Properties"] = 100 * agg_df["Number of Properties"] / agg_df["Number of Properties"].sum()
agg_df.to_csv(f"/Users/khalimconn-kowlessar/Documents/hestia/Customers/bcc tender/{col}.csv", index=False)
# Eligibiilty 6: GBIS General Eligibility, Social - tenure is social rented and EPC rating D-G, but also the property
# should be rented out below market rate
# This is a subset of Eligibility 3 - we likely don't need to do any scaling
n_eco4_oo_hthg_needs_scaling_on_benefits = epc_data[
epc_data["eligibility_type"] == "eco4_oo_hthg_needs_scaling_on_benefits"
].shape[0]
n_eco4_prs_hthg_needs_scaling_on_benefits = epc_data[
epc_data["eligibility_type"] == "eco4_prs_hthg_needs_scaling_on_benefits"
].shape[0]
n_eco4_social = epc_data[
epc_data["eligibility_type"] == "eco4_social_housing"
].shape[0]
n_gbis_prs_ge_needs_scaling_on_council_tax_band = epc_data[
epc_data["eligibility_type"] == "gbis_prs_ge_needs_scaling_on_council_tax_band"
].shape[0]
# We're going to make the broad assumption that all homeowners claiming for benefits, live in homes in council tax
# bands A-D. There there are no additionals in eligibility 4 and 5
# n_eligibility_1 = np.floor(n_eco4_oo_hthg_needs_scaling_on_benefits * PROPORTION_OF_HOMEOWNERS_CLAIMING_FOR_BENEFITS)
n_eligibility_1 = np.floor(n_eco4_oo_hthg_needs_scaling_on_benefits * COUNCIL_TAX_BAND_A_TO_D_PROPORTION)
# n_eligibility_2 = np.floor(n_eco4_prs_hthg_needs_scaling_on_benefits * PROPORTION_OF_HOMES_ON_BENEFITS)
n_eligibility_2 = np.floor(n_eco4_prs_hthg_needs_scaling_on_benefits * COUNCIL_TAX_BAND_A_TO_D_PROPORTION)
n_eligiblity_3 = n_eco4_social
# We subtract the number of homes in eligiblity 1, from the number of homes under ECO4 OO, HTHG, before scaling on
# benefits. This gives us the number of homes that were not on benefits. We then scale this number based on the % of
# homes in council tax bands A-D
# n_eligiblity_4 = np.floor(
# (n_eco4_oo_hthg_needs_scaling_on_benefits - n_eligibility_1) * COUNCIL_TAX_BAND_A_TO_D_PROPORTION
# )
# We also need to add on homes that fall out of eligibility 2
n_eligibiltiy_5 = np.floor(
np.floor(n_gbis_prs_ge_needs_scaling_on_council_tax_band * COUNCIL_TAX_BAND_A_TO_D_PROPORTION)
# np.floor((n_eco4_prs_hthg_needs_scaling_on_benefits - n_eligibility_2) * COUNCIL_TAX_BAND_A_TO_D_PROPORTION)
)
# We don't scale up the # of homes based on % of homes with an EPC, because
n_owner_occupied = epc_data[epc_data["TENURE"].isin(owner_occupied_tenures)].shape[0]
oo_eligibility = n_eligibility_1
# 68% of owner occupied are eligibiltiy
proportion_of_oo_eligible = oo_eligibility / n_owner_occupied
# We then use this % on the rest of the homes in Birmingham that do not have an EPC
oo_eligible_without_an_epc = np.floor(N_HOMES_WITHOUT_AN_EPC * proportion_of_oo_eligible)
oo_eligibility = oo_eligibility + oo_eligible_without_an_epc
# All private rentals require an EPC
prs_eligibility = (n_eligibility_2 + n_eligibiltiy_5)
# Most social housing properties will have an EPC so we don't scale this up
social_eligibility = n_eligiblity_3
# We scale this up since this number is based on the number of homes in Birmingham with an EPC, and we want to
# estimate the total number of homes in Birmingham
total_eligible = oo_eligibility + prs_eligibility + social_eligibility
proportion_of_homes_eligibile = total_eligible / N_HOUSEHOLDS_IN_BIRMINGHAM
# Approx 53% of homes in Birmingham are eligible for ECO/GBIS funding
# Approximately 53% of Homes are eligible for some form of ECO4 or GBIS funding, 227k homes
# This is broken down as follows:
# - 155k owner occupiers
# - 33k private rentals
# - 39k social housing
# We can't seem to identify the properties owned by the council in the company ownership data, because what is the
# entity that owns the property? Is it the council, or is it a company that is owned by the council? We can't be sure
# and so since BCC owns 54,000 social housing properties (5k) supported housing
# [https://www.birmingham.gov.uk/info/50094/housing_options/2686/apply_for_social_housing#:~:text=We%20manage
# %20around%2054%2C000%20social,a%20member%20of%20your%20household.]
# and there are 78,410 social housing properties in Birmingham, we can assume that the council owns 54,000 of these
# and so 69% of the social housing is owned by the Council
# Since we saw that 38,779 of 78,410 social housing looked to be able to benefit from ECO/GBIS funding, we can assume
# that 69% of these are owned by the council, which is 26,757 properties
# So, with these assumptions in mind:
# We can commit to [x] per annum based on your 54k council-owned, of which approximately 27k are likely to be eligible
# for some form of ECO/GBIS funding. We will work directly with Housing associations to address the remaining 12k
# social properties that may be eligible for funding through ECO/GBIS.
# We will market directly to the 33k private rentals and 155k owner occupiers that are eligible for funding,
# and assuming a 5% conversion, will aim to complete work on