analysing wates data

This commit is contained in:
Khalim Conn-Kowlessar 2025-03-11 17:54:48 +00:00
parent b6ef41b21b
commit dc2d108060
4 changed files with 185 additions and 50 deletions

View file

@ -16,6 +16,7 @@ import asset_list.mappings.property_type as property_type_mappings
import asset_list.mappings.walls as walls_mappings
import asset_list.mappings.heating_systems as heating_mappings
import asset_list.mappings.exising_pv as existing_pv_mappings
import asset_list.mappings.built_form as built_form_mappings
from recommendations.recommendation_utils import (
estimate_perimeter,
@ -268,6 +269,7 @@ class AssetList:
STANDARD_UPRN = "ordnance_survey_uprn"
STANDARD_LANDLORD_PROPERTY_ID = "landlord_property_id"
STANDARD_PROPERTY_TYPE = "landlord_property_type"
STANDARD_BUILT_FORM = "landlord_built_form"
STANDARD_WALL_CONSTRUCTION = "landlord_wall_construction"
STANDARD_HEATING_SYSTEM = "landlord_heating_system"
STANDARD_EXISTING_PV = "landlord_existing_pv"
@ -321,6 +323,14 @@ class AssetList:
", ceiling insulated",
]
# List of strings we look for in the EPC data, where substrings indicate that the cavity is empty
UNINSULATED_CAVITY_SUBSTRINGS = [
"cavity wall, as built, no insulation (assumed)",
"cavity wall, as built, no insulation",
"cavity wall, as built, partial insulation (assumed)",
"cavity wall, as built, partial insulation",
]
def __init__(
self,
local_filepath,
@ -335,6 +345,7 @@ class AssetList:
landlord_year_built=None,
landlord_uprn=None,
landlord_property_type=None,
landlord_built_form=None,
landlord_wall_construction=None,
landlord_heating_system=None,
landlord_existing_pv=None,
@ -370,6 +381,7 @@ class AssetList:
self.landlord_year_built = landlord_year_built
self.landlord_uprn = landlord_uprn
self.landlord_property_type = landlord_property_type
self.landlord_built_form = landlord_built_form
self.landlord_wall_construction = landlord_wall_construction
self.landlord_heating_system = landlord_heating_system
self.landlord_existing_pv = landlord_existing_pv
@ -405,6 +417,13 @@ class AssetList:
self.standardised_asset_list[self.address1_colname].copy()
)
# Handle the case where the property type column is the same as the built type
if self.landlord_property_type == self.landlord_built_form:
self.landlord_built_form = self.STANDARD_BUILT_FORM
self.standardised_asset_list[self.landlord_built_form] = (
self.standardised_asset_list[self.landlord_property_type].copy()
)
def _extract_address1(self, asset_list, full_address_col, postcode_col, method="first_two_words"):
if method not in self.ADDRESS_1_CLEANING_METHODS:
@ -557,6 +576,7 @@ class AssetList:
self.full_address_colname,
self.landlord_uprn,
self.landlord_property_type,
self.landlord_built_form,
self.landlord_year_built,
self.landlord_wall_construction,
self.landlord_heating_system,
@ -571,6 +591,7 @@ class AssetList:
self.full_address_colname: self.STANDARD_FULL_ADDRESS,
self.landlord_uprn: self.STANDARD_UPRN,
self.landlord_property_type: self.STANDARD_PROPERTY_TYPE,
self.landlord_built_form: self.STANDARD_BUILT_FORM,
self.landlord_year_built: self.STANDARD_YEAR_BUILT,
self.landlord_wall_construction: self.STANDARD_WALL_CONSTRUCTION,
self.landlord_heating_system: self.STANDARD_HEATING_SYSTEM,
@ -681,6 +702,10 @@ class AssetList:
"standard_values": property_type_mappings.STANDARD_PROPERTY_TYPES,
"standard_map": property_type_mappings.PROPERTY_MAPPING
},
self.landlord_built_form: {
"standard_values": built_form_mappings.STANDARD_BUILT_FORMS,
"standard_map": built_form_mappings.BUILT_FORM_MAPPINGS
},
self.landlord_wall_construction: {
"standard_values": walls_mappings.STANDARD_WALL_CONSTRUCTIONS,
"standard_map": walls_mappings.WALL_CONSTRUCTION_MAPPINGS
@ -861,6 +886,9 @@ class AssetList:
x[self.EPC_API_DATA_NAMES["roof-description"]]) else None,
axis=1
)
self.standardised_asset_list[self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS] = (
self.standardised_asset_list[self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS].str.replace("+", "")
)
# We produce some additional fields
# 1) Is the SAP rating below C75
@ -990,9 +1018,6 @@ class AssetList:
def identify_worktypes(self, cleaned):
if not self.non_intrusives_present and not self.old_format_non_intrusives_present:
raise NotImplementedError("Need to implement the case for non-intrusives")
# If we have non-intrusives completed, we can use this to identify work types
######################################################
# Empty cavity:
@ -1055,6 +1080,39 @@ class AssetList:
)
)
self.standardised_asset_list["epc_indicates_empty_cavity_no_sap_filter"] = (
self.standardised_asset_list[self.EPC_API_DATA_NAMES["walls-description"]].str.lower().isin(
self.EPC_NO_WALL_INSULATION_DESCRIPTIONS
) & (
self.standardised_asset_list["epc_year_upper_bound"] <= 1995
) & (
~self.standardised_asset_list[self.ATTRIBUTE_EPC_PRE_YEAR_THRESHOLD]
) & (
self.standardised_asset_list[
self.EPC_API_DATA_NAMES["current-energy-efficiency"]] > self.EMPTY_CAVITY_SAP_THRESHOLD
)
)
self.standardised_asset_list["landlord_data_indicates_empty_cavity"] = (
self.standardised_asset_list[self.STANDARD_WALL_CONSTRUCTION].isin(["uninsulated cavity"]) &
(self.standardised_asset_list[self.STANDARD_YEAR_BUILT] <= 2002) &
(
self.standardised_asset_list[
self.EPC_API_DATA_NAMES["current-energy-efficiency"]
] <= self.EMPTY_CAVITY_SAP_THRESHOLD
)
)
self.standardised_asset_list["landlord_data_indicates_empty_cavity_no_sap_filter"] = (
self.standardised_asset_list[self.STANDARD_WALL_CONSTRUCTION].isin(["uninsulated cavity"]) &
(self.standardised_asset_list[self.STANDARD_YEAR_BUILT] <= 2002) &
(
self.standardised_asset_list[
self.EPC_API_DATA_NAMES["current-energy-efficiency"]
] > self.EMPTY_CAVITY_SAP_THRESHOLD
)
)
# If the EPC is esimtated, we defer to the non-intrusives
self.standardised_asset_list["epc_indicates_empty_cavity"] = np.where(
(
@ -1066,6 +1124,15 @@ class AssetList:
self.standardised_asset_list["epc_indicates_empty_cavity"]
)
# Finally, we create a flag to indicate that the cavity is empty, based on the criteria above
self.standardised_asset_list["cavity_is_empty"] = (
non_intrusives_wall_filter |
self.standardised_asset_list[self.EPC_API_DATA_NAMES["walls-description"]].str.lower().isin(
self.EPC_NO_WALL_INSULATION_DESCRIPTIONS
) |
self.standardised_asset_list[self.STANDARD_WALL_CONSTRUCTION].isin(["uninsulated cavity"])
)
######################################################
# Extraction
######################################################
@ -1212,7 +1279,9 @@ class AssetList:
print("Should we include cavity properties where they might be uninsulated?")
self.standardised_asset_list["solar_landlord_walls_insulated"] = (
self.standardised_asset_list[self.STANDARD_WALL_CONSTRUCTION].isin(
["filled cavity", "insulated solid brick", "insulated timber frame"]
[
"filled cavity", "insulated solid brick", "insulated timber frame",
]
)
)
@ -1264,24 +1333,24 @@ class AssetList:
)
# We merge on the u-value for average thermal transmittance
roof_uvalue_data = pd.DataFrame(cleaned["roof-description"])
roof_uvalue_data = roof_uvalue_data[
~pd.isnull(roof_uvalue_data["thermal_transmittance"])
][["original_description", "thermal_transmittance"]].rename(
roof_roof_data = pd.DataFrame(cleaned["roof-description"])
roof_roof_data = roof_roof_data[
["original_description", "thermal_transmittance", "is_pitched", "is_loft"]
].rename(
columns={
"original_description": self.EPC_API_DATA_NAMES["roof-description"],
"thermal_transmittance": "roof_u_value"
"thermal_transmittance": "roof_u_value",
}
)
self.standardised_asset_list = self.standardised_asset_list.merge(
roof_uvalue_data, how="left", on=self.EPC_API_DATA_NAMES["roof-description"]
roof_roof_data, how="left", on=self.EPC_API_DATA_NAMES["roof-description"]
)
# If the u-value of a roof is less than 0.7 we consider it insulated
self.standardised_asset_list["solar_epc_roof_insulated"] = (
self.standardised_asset_list[self.EPC_API_DATA_NAMES["roof-description"]].str.lower().str.contains(
"|".join(self.EPC_INSULATED_ROOF_SUBSTRINGS), regex=False
"|".join(self.EPC_INSULATED_ROOF_SUBSTRINGS),
) | (
self.standardised_asset_list[self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS].apply(
lambda x: int(x) >= 200 if str(x).isdigit() else False
@ -1293,9 +1362,19 @@ class AssetList:
)
)
self.standardised_asset_list["solar_epc_loft_needs_topup"] = self.standardised_asset_list[
self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS].apply(
lambda x: int(x) < 200 if str(x).isdigit() else False
self.standardised_asset_list["solar_epc_loft_needs_topup"] = (
self.standardised_asset_list[
self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS].apply(
lambda x: int(x) < 200 if str(x).isdigit() else False
) | (
(
self.standardised_asset_list["is_loft"] | self.standardised_asset_list["is_pitched"]
) & (
self.standardised_asset_list[self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS].isin(
["below average", "none"]
)
)
)
)
self.standardised_asset_list["epc_has_floor_recommendation"] = (
@ -1357,10 +1436,15 @@ class AssetList:
self.standardised_asset_list["solar_epc_data_indicates_requires_heating_upgrade"]
)
walls_are_insulated = (
# The requirements for walls are:
# 1) walls are insulated
# 2) property is a cavity (can be done insulated or not)
walls_meet_solar_requirements = (
self.standardised_asset_list["solar_landlord_walls_insulated"] |
self.standardised_asset_list["solar_epc_walls_insulated"] |
self.standardised_asset_list["solar_non_intrusives_walls_insulated"]
self.standardised_asset_list["solar_non_intrusives_walls_insulated"] |
self.standardised_asset_list["cavity_is_empty"] |
(self.standardised_asset_list[self.STANDARD_WALL_CONSTRUCTION].str.contains("cavity"))
)
not_a_flat = (
@ -1375,7 +1459,7 @@ class AssetList:
# The property doesn't currently have solar
~self.standardised_asset_list["property_has_solar"] &
# The walls are insulated
walls_are_insulated &
walls_meet_solar_requirements &
# Roof is insulated
self.standardised_asset_list["solar_epc_roof_insulated"] &
# Floor type check
@ -1392,7 +1476,7 @@ class AssetList:
# The property doesn't currently have solar
~self.standardised_asset_list["property_has_solar"] &
# The walls are insulated
walls_are_insulated &
walls_meet_solar_requirements &
# Roof is insulated
self.standardised_asset_list["solar_epc_roof_insulated"] &
# Floor type check
@ -1409,7 +1493,7 @@ class AssetList:
# The property doesn't currently have solar
~self.standardised_asset_list["property_has_solar"] &
# The walls are insulated
walls_are_insulated &
walls_meet_solar_requirements &
# Roof is insulated
self.standardised_asset_list["solar_epc_roof_insulated"] &
# Floor type check
@ -1425,7 +1509,7 @@ class AssetList:
# The property doesn't currently have solar
~self.standardised_asset_list["property_has_solar"] &
# The walls are insulated
walls_are_insulated &
walls_meet_solar_requirements &
# Roof is insulated
self.standardised_asset_list["solar_epc_roof_insulated"] &
# Floor type check
@ -1461,7 +1545,7 @@ class AssetList:
# The property doesn't currently have solar
~self.standardised_asset_list["property_has_solar"] &
# The walls are insulated
walls_are_insulated &
walls_meet_solar_requirements &
# Roof is insulated
self.standardised_asset_list["solar_epc_loft_needs_topup"] &
# Check floor
@ -1478,7 +1562,7 @@ class AssetList:
# The property doesn't currently have solar
~self.standardised_asset_list["property_has_solar"] &
# The walls are insulated
walls_are_insulated &
walls_meet_solar_requirements &
# Roof is insulated
self.standardised_asset_list["solar_epc_loft_needs_topup"] &
# Check floor
@ -1494,7 +1578,7 @@ class AssetList:
# The property doesn't currently have solar
~self.standardised_asset_list["property_has_solar"] &
# The walls are insulated
walls_are_insulated &
walls_meet_solar_requirements &
# Roof is insulated
self.standardised_asset_list["solar_epc_loft_needs_topup"] &
# Floor type
@ -1512,7 +1596,7 @@ class AssetList:
# The property doesn't currently have solar
~self.standardised_asset_list["property_has_solar"] &
# The walls are insulated
walls_are_insulated &
walls_meet_solar_requirements &
# Roof is insulated
self.standardised_asset_list["solar_epc_loft_needs_topup"] &
# Floor type
@ -1529,7 +1613,7 @@ class AssetList:
# The property doesn't currently have solar
~self.standardised_asset_list["property_has_solar"] &
# The walls are insulated
walls_are_insulated &
walls_meet_solar_requirements &
# Roof is insulated
self.standardised_asset_list["solar_epc_roof_insulated"] &
# Floor type
@ -1544,7 +1628,7 @@ class AssetList:
# The property doesn't currently have solar
~self.standardised_asset_list["property_has_solar"] &
# The walls are insulated
walls_are_insulated &
walls_meet_solar_requirements &
# Roof is insulated
self.standardised_asset_list["solar_epc_roof_insulated"] &
# Floor type - other types
@ -1561,7 +1645,7 @@ class AssetList:
# The property doesn't currently have solar
~self.standardised_asset_list["property_has_solar"] &
# The walls are insulated
walls_are_insulated &
walls_meet_solar_requirements &
# Roof is insulated
self.standardised_asset_list["solar_epc_roof_insulated"] &
# Other floor types
@ -1577,7 +1661,7 @@ class AssetList:
# The property doesn't currently have solar
~self.standardised_asset_list["property_has_solar"] &
# The walls are insulated
walls_are_insulated &
walls_meet_solar_requirements &
# Roof is insulated
self.standardised_asset_list["solar_epc_roof_insulated"] &
# Other floor types
@ -1600,7 +1684,7 @@ class AssetList:
# The property doesn't currently have solar
~self.standardised_asset_list["property_has_solar"] &
# The walls are insulated
walls_are_insulated &
walls_meet_solar_requirements &
# Roof need loft top-up
self.standardised_asset_list["solar_epc_loft_needs_topup"] &
# Other floor types
@ -1616,7 +1700,7 @@ class AssetList:
# The property doesn't currently have solar
~self.standardised_asset_list["property_has_solar"] &
# The walls are insulated
walls_are_insulated &
walls_meet_solar_requirements &
# Roof need loft top-up
self.standardised_asset_list["solar_epc_loft_needs_topup"] &
# Other floor types
@ -1633,7 +1717,7 @@ class AssetList:
# The property doesn't currently have solar
~self.standardised_asset_list["property_has_solar"] &
# The walls are insulated
walls_are_insulated &
walls_meet_solar_requirements &
# Roof need loft top-up
self.standardised_asset_list["solar_epc_loft_needs_topup"] &
# Other floor types
@ -1651,7 +1735,7 @@ class AssetList:
# The property doesn't currently have solar
~self.standardised_asset_list["property_has_solar"] &
# The walls are insulated
walls_are_insulated &
walls_meet_solar_requirements &
# Roof need loft top-up
self.standardised_asset_list["solar_epc_loft_needs_topup"] &
# Other floor types
@ -1676,6 +1760,7 @@ class AssetList:
# Finally, we note why each property has been flagged
self.standardised_asset_list["cavity_reason"] = None
self.standardised_asset_list["cavity_reason"] = np.where(
self.standardised_asset_list["non_intrusive_indicates_empty_cavity"],
"Non-Intrusive Data Showed Empty Cavity",
@ -1694,6 +1779,33 @@ class AssetList:
"EPC Data Showed Empty Cavity",
self.standardised_asset_list["cavity_reason"]
)
self.standardised_asset_list["cavity_reason"] = np.where(
(
self.standardised_asset_list["epc_indicates_empty_cavity_no_sap_filter"] &
~self.standardised_asset_list["non_intrusive_indicates_empty_cavity_no_sap_filter"]
),
"EPC Data Showed Empty Cavity but all SAP scores allowed",
self.standardised_asset_list["cavity_reason"]
)
# Landlord data
self.standardised_asset_list["cavity_reason"] = np.where(
(
self.standardised_asset_list["landlord_data_indicates_empty_cavity"] &
~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] &
~self.standardised_asset_list["epc_indicates_empty_cavity"]
),
"Landlord Data Showed Empty Cavity",
self.standardised_asset_list["cavity_reason"]
)
self.standardised_asset_list["cavity_reason"] = np.where(
(
self.standardised_asset_list["landlord_data_indicates_empty_cavity_no_sap_filter"] &
~self.standardised_asset_list["non_intrusive_indicates_empty_cavity_no_sap_filter"] &
~self.standardised_asset_list["epc_indicates_empty_cavity_no_sap_filter"]
),
"Landlord Data Showed Empty Cavity but all SAP scores allowed",
self.standardised_asset_list["cavity_reason"],
)
# Flag extraction
self.standardised_asset_list["cavity_reason"] = np.where(
(
@ -1802,7 +1914,7 @@ class AssetList:
}
# We prepare outcomes for output
if self.outcomes:
if self.outcomes is not None:
logger.info("Preparing outcomes for output")
identified_work = self.standardised_asset_list[
~pd.isnull(self.standardised_asset_list["cavity_reason"]) |

View file

@ -9,6 +9,7 @@ import msgpack
from utils.s3 import read_from_s3
from asset_list.AssetList import AssetList
from asset_list.mappings.property_type import PROPERTY_MAPPING
from asset_list.mappings.built_form import BUILT_FORM_MAPPINGS
from asset_list.mappings.walls import WALL_CONSTRUCTION_MAPPINGS
from asset_list.mappings.heating_systems import HEATING_MAPPINGS
from asset_list.mappings.exising_pv import EXISTING_PV_MAPPINGS
@ -40,6 +41,13 @@ def get_data(
"bedsit": "Flat"
}
built_form_map = {
"mid-terrace": "Mid-Terrace",
"end-terrace": "End-Terrace",
"semi-detached": "Semi-Detached",
"detached": "Detached"
}
epc_data = []
errors = []
no_epc = []
@ -65,6 +73,7 @@ def get_data(
uprn = None
property_type = property_type_map.get(home[AssetList.STANDARD_PROPERTY_TYPE], None)
built_form = built_form_map.get(home[AssetList.STANDARD_BUILT_FORM])
searcher = SearchEpc(
address1=str(house_no),
@ -119,6 +128,7 @@ def get_data(
# As a final resort, we estimate the EPC
if property_type is not None and searcher.newest_epc is None:
searcher.ordnance_survey_client.property_type = property_type
searcher.ordnance_survey_client.built_form = built_form
searcher.find_property(skip_os=True)
if searcher.newest_epc is None:
@ -260,6 +270,7 @@ def app():
landlord_year_built = "Build Year"
landlord_os_uprn = None
landlord_property_type = "Archetype"
landlord_built_form = "Archetype"
landlord_wall_construction = "Wall"
landlord_heating_system = "Heating Type"
landlord_existing_pv = None
@ -407,6 +418,7 @@ def app():
landlord_year_built=landlord_year_built,
landlord_uprn=landlord_os_uprn,
landlord_property_type=landlord_property_type,
landlord_built_form=landlord_built_form,
landlord_wall_construction=landlord_wall_construction,
landlord_heating_system=landlord_heating_system,
landlord_existing_pv=landlord_existing_pv
@ -421,6 +433,13 @@ def app():
).items()
if k not in PROPERTY_MAPPING
}
new_built_form_map = {
k: v for k, v in (
asset_list.variable_mappings[asset_list.landlord_built_form] if
asset_list.landlord_built_form else {}
).items()
if k not in BUILT_FORM_MAPPINGS
}
new_wall_map = {
k: v for k, v in (
asset_list.variable_mappings[asset_list.landlord_wall_construction] if
@ -619,22 +638,6 @@ def app():
pprint(asset_list.work_type_figures)
# TODO: Characterise the properties that didn't qualify
eg = asset_list.standardised_asset_list[
pd.isnull(asset_list.standardised_asset_list["solar_reason"])
]
eg[asset_list.EPC_API_DATA_NAMES["floor-description"]].value_counts()
# TODO: Look into the estimated ones
eg["estimated"].value_counts()
eg = eg[eg[asset_list.STANDARD_HEATING_SYSTEM] == "high heat retention storage heaters"]
eg[asset_list.STANDARD_WALL_CONSTRUCTION].value_counts()
eg = eg[eg[asset_list.STANDARD_WALL_CONSTRUCTION] == "filled cavity"]
eg[asset_list.EPC_API_DATA_NAMES["roof-description"]].value_counts()
eg[asset_list.EPC_API_DATA_NAMES["floor-description"]].value_counts()
eg["epc_has_floor_recommendation"].value_counts()
asset_list.flat_analysis()
asset_list.load_contact_details(

View file

@ -0,0 +1,20 @@
STANDARD_BUILT_FORMS = {
"unknown",
# Houses
"end-terrace", "semi-detached", "detached", "mid-terrace",
# Flats
"ground floor", "mid-floor", "top-floor"
}
BUILT_FORM_MAPPINGS = {
'House (End Terrace)': 'end-terrace',
'Ground Floor Flat General': 'ground floor',
'House (Semi)': 'semi-detached',
'House (Mid Terrace)': 'mid-terrace',
'Bungalow': 'unknown',
'House (Mid terrace)': 'mid-terrace',
'Maisonette': 'unknown',
'Flat': 'unknown',
'First Floor Flat General': 'mid-floor',
'Bungalow (Semi)': 'semi-detached'
}

View file

@ -112,7 +112,7 @@ WALL_CONSTRUCTION_MAPPINGS = {
'Cavity Unknown insulation': 'cavity unknown insulation',
'Timber frame As-built': 'timber frame',
'System build Unknown insulation': 'system built',
'Cavity As-built': 'unknown',
'Cavity As-built': 'uninsulated cavity',
'System build External': 'system built',
'ND (inferred) ND (inferred)': 'unknown',
'Solid brick External': 'insulated solid brick',