preparing data pull for acis

This commit is contained in:
Khalim Conn-Kowlessar 2025-03-06 17:04:01 +00:00
parent e083962881
commit 66e0fdea28
5 changed files with 142 additions and 40 deletions

View file

@ -283,6 +283,8 @@ class AssetList:
"Any further surveyor notes", 'Surveyors Name'
]
OLD_FORMAT_NON_INTRUSIVE_COLNAMES = ['WFT Findings', 'ECO Eligibility']
# This SAP threshold is a key search criteria for properties that may be eligible for extraction
FILLED_CAVITY_SAP_THRESHOLD = 75
# This SAP the
@ -351,7 +353,9 @@ class AssetList:
self.contact_detail_fields = None
# We detect the presence of the non-intrusive columns
self.non_intrusives_present = True if "CIGA Check Required" in self.raw_asset_list.columns else False
self.non_intrusives_present = "CIGA Check Required" in self.raw_asset_list.columns
# We detect if we have the old format of non-intruvies
self.old_format_non_intrusives_present = "WFT Findings" in self.raw_asset_list.columns
# Names of columns
self.landlord_property_id = landlord_property_id
@ -562,14 +566,19 @@ class AssetList:
}
self.rename_map = {k: v for k, v in self.rename_map.items() if k is not None}
non_intrusive_columns = []
if self.non_intrusives_present:
self.keep_variables += self.NON_INTRUSIVES_COLNAMES
self.rename_map = {
**self.rename_map,
**dict(
zip(self.NON_INTRUSIVES_COLNAMES, ["non-intrusives: " + c for c in self.NON_INTRUSIVES_COLNAMES])
)
}
non_intrusive_columns = self.NON_INTRUSIVES_COLNAMES
if self.old_format_non_intrusives_present:
non_intrusive_columns = self.OLD_FORMAT_NON_INTRUSIVE_COLNAMES
self.rename_map = {
**self.rename_map,
**dict(
zip(self.NON_INTRUSIVES_COLNAMES, ["non-intrusives: " + c for c in non_intrusive_columns])
)
}
# We idenfiy addresses which are likely to be multi-addresses (i.g are rooms x-y)
self.standardised_asset_list["is_multi_address"] = self.standardised_asset_list[
@ -616,7 +625,11 @@ class AssetList:
Extracts the year from a date string in the format '01-Jul-YYYY'.
Returns the extracted year as an integer or None if the format is incorrect.
"""
known_errors = ["#MULTIVALUE"]
known_errors = [
"#MULTIVALUE",
"This cell has an external reference that can't be shown or edited. Editing this cell will "
"remove the external reference."
]
if pd.isnull(date_str) or date_str in known_errors:
return None

View file

@ -247,22 +247,22 @@ def app():
# - Or the insulation required is loft/cavity (floors should be solid)
# Ealing
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Programme data - 04032025"
data_filename = "Ealing BC - Property Plus Tenure 25.02.2025.xlsx"
sheet_name = "IGNORE - FULL MAIN"
postcode_column = 'Postcode'
fulladdress_column = "Address"
address1_column = None
address1_method = "first_word"
address_cols_to_concat = []
missing_postcodes_method = None
landlord_year_built = "Year Built"
landlord_os_uprn = None
landlord_property_type = "Property Type Code"
landlord_wall_construction = None
landlord_heating_system = None
landlord_existing_pv = None
landlord_property_id = "Property ref"
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Programme data - 04032025"
# data_filename = "Ealing BC - Property Plus Tenure 25.02.2025.xlsx"
# sheet_name = "IGNORE - FULL MAIN"
# postcode_column = 'Postcode'
# fulladdress_column = "Address"
# address1_column = None
# address1_method = "first_word"
# address_cols_to_concat = []
# missing_postcodes_method = None
# landlord_year_built = "Year Built"
# landlord_os_uprn = None
# landlord_property_type = "Property Type Code"
# landlord_wall_construction = None
# landlord_heating_system = None
# landlord_existing_pv = None
# landlord_property_id = "Property ref"
# For Westward
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Westward"
@ -282,6 +282,24 @@ def app():
# landlord_existing_pv = "PV (Y/N)"
# landlord_property_id = "Place ref"
# For ACIS - programme re-build
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/ACIS/ACIS Full Programme Review March 2025"
data_filename = "ACIS asset list.xlsx"
sheet_name = "Assets"
address1_column = "House No"
postcode_column = "Postcode"
landlord_property_id = "UPRN"
fulladdress_column = None
address_cols_to_concat = ["House No", "Street", "Town"]
missing_postcodes_method = None
address1_method = None
landlord_year_built = "YEAR BUILT"
landlord_os_uprn = None
landlord_property_type = "Property type"
landlord_wall_construction = "Wall Constuction"
landlord_heating_system = "Heating"
landlord_existing_pv = None
# Maps addresses to uprn in problematic cases
manual_uprn_map = {}
@ -306,20 +324,33 @@ def app():
asset_list.init_standardise()
# We produce the new maps, which can be saved for future useage
new_property_type_map = PROPERTY_MAPPING.copy().update(
asset_list.variable_mappings[asset_list.landlord_property_type] if asset_list.landlord_property_type else {}
)
new_wall_map = WALL_CONSTRUCTION_MAPPINGS.copy().update(
asset_list.variable_mappings[asset_list.landlord_wall_construction] if
asset_list.landlord_wall_construction else {}
)
new_heating_map = HEATING_MAPPINGS.copy().update(
asset_list.variable_mappings[asset_list.landlord_heating_system] if asset_list.landlord_heating_system else {}
)
new_existing_pv_map = EXISTING_PV_MAPPINGS.copy().update(
asset_list.variable_mappings[asset_list.landlord_existing_pv] if asset_list.landlord_existing_pv else {}
)
new_property_type_map = {
k: v for k, v in (
asset_list.variable_mappings[asset_list.landlord_property_type] if
asset_list.landlord_property_type else {}
).items()
if k not in PROPERTY_MAPPING
}
new_wall_map = {
k: v for k, v in (
asset_list.variable_mappings[asset_list.landlord_wall_construction] if
asset_list.landlord_wall_construction else {}
).items()
if k not in WALL_CONSTRUCTION_MAPPINGS
}
new_heating_map = {
k: v for k, v in (
asset_list.variable_mappings[asset_list.landlord_heating_system] if
asset_list.landlord_heating_system else {}
).items()
if k not in HEATING_MAPPINGS
}
new_existing_pv_map = {
k: v for k, v in (
asset_list.variable_mappings[asset_list.landlord_existing_pv] if asset_list.landlord_existing_pv else {}
).items()
if k not in EXISTING_PV_MAPPINGS
}
asset_list.apply_standardiation()

View file

@ -64,4 +64,10 @@ HEATING_MAPPINGS = {
'SOLIDFUEL': 'boiler - other fuel',
'STORHTR': 'electric storage heaters',
np.nan: 'unknown',
'Oil': 'boiler - other fuel',
'Gas': 'gas condensing boiler',
'Electric': 'electric storage heaters',
'Solid fuel': 'other',
'No Heat': 'unknown',
'GSHP': 'ground source heat pump'
}

View file

@ -1,3 +1,5 @@
import numpy as np
# These are the standard categories for property types
STANDARD_PROPERTY_TYPES = {
"house", "flat", "maisonette", "bungalow", "park home", "block house", "bedsit", "coach house",
@ -22,5 +24,43 @@ PROPERTY_MAPPING = {
'House': 'house',
'Maisonette': 'maisonette',
'Stairwell': 'other',
'MAISON': 'maisonette'
'MAISON': 'maisonette',
'3 Bed Semi Detached House': 'house',
'3 Bed Mid Terrace House': 'house',
'2 Bed Semi Detached House': 'house',
'4 Bed Semi Detached House': 'house',
'2 Bed End Terrace House': 'house',
'1 Bed Sheltered Bungalow': 'bungalow',
'1 Bed 1st Floor Sheltered Flat': 'flat',
'2 Bed Second Floor Flat': 'flat',
'1 Bed Mid Terrace House': 'house',
'1 Bed End Terrace House': 'house',
'7 Bed Detached House': 'house',
'4 Bed End Terrace House': 'house',
'1 Bed Link House': 'house',
'1 Bed Second Floor Flat': 'flat',
'2 Bed Detached House': 'house',
'1 Bed Ground Floor Flat': 'flat',
'2 Bed Sheltered Bungalow': 'bungalow',
'4 Bed Mid Terrace House': 'house',
'2 Bed Mid Terrace House': 'house',
'2 Bed First Floor Flat': 'flat',
'3 Bed Detached House': 'house',
'Ground Floor Bedsit': 'bedsit',
'3 Bed Bungalow': 'bungalow',
np.nan: 'unknown',
'5 Bed End Terrace House': 'house',
'1 Bed Grd Floor Sheltered Flat': 'flat',
'3 Bed End Terrace House': 'house',
'2 Bed Second Floor Maisonette': 'maisonette',
'2 Bed Ground Floor Flat': 'flat',
'2 Bed First Floor Maisonette': 'maisonette',
'4 Bed Detached House': 'house',
'1 Bed Bungalow': 'bungalow',
'2 Bed Bungalow': 'bungalow',
'First Floor Bedsit': 'bedsit',
'3 Bed First Floor Maisonette': 'maisonette',
'2 Bed 1st Floor Sheltered Flat': 'flat',
'1 Bed First Floor Flat': 'flat',
'3 Bed First Floor Flat': 'flat'
}

View file

@ -1,3 +1,5 @@
import numpy as np
STANDARD_WALL_CONSTRUCTIONS = {
"uninsulated cavity", "filled cavity", "partial insulated cavity", "cavity unknown insulation",
"uninsulated solid brick", "insulated solid brick", "solid brick unknown insulation",
@ -89,4 +91,14 @@ WALL_CONSTRUCTION_MAPPINGS = {
'NONE': 'unknown',
'NOTKNOWN': 'unknown',
'SOLID': 'solid brick unknown insulation',
np.nan: 'unknown',
'RENDER/TIMBER FRAME': 'timber frame',
'SYSTEM BUILT': 'system built',
'PCC PANELS': 'other',
'NOT APPLICABLE - FLAT': 'unknown',
'BRICK/TIMBER FRAME': 'timber frame',
'BRICK/BLOCK CAVITY': 'cavity unknown insulation',
'STONE SOLID': 'sandstone or limestone',
'EXT CLADDING SYSTEM': 'system built',
'BRICK/BLOCK SOLID': 'solid brick unknown insulation'
}