debugging incorrect fetching of flat data

This commit is contained in:
Khalim Conn-Kowlessar 2025-05-13 11:53:38 +01:00
parent f1b9ee2920
commit 30847ded90
9 changed files with 355 additions and 471 deletions

2
.idea/Model.iml generated
View file

@ -7,7 +7,7 @@
<sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
</content>
<orderEntry type="jdk" jdkName="AssetList" jdkType="Python SDK" />
<orderEntry type="jdk" jdkName="Fastapi-backend" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="PyNamespacePackagesService">

2
.idea/misc.xml generated
View file

@ -3,7 +3,7 @@
<component name="Black">
<option name="sdkName" value="Python 3.10 (backend)" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="AssetList" project-jdk-type="Python SDK" />
<component name="ProjectRootManager" version="2" project-jdk-name="Fastapi-backend" project-jdk-type="Python SDK" />
<component name="PyCharmProfessionalAdvertiser">
<option name="shown" value="true" />
</component>

View file

@ -739,6 +739,11 @@ class AssetList:
self.standardised_asset_list[self.landlord_year_built].replace(self.DATETIME_REMAP)
)
no_data_codes = {"No Data": None}
self.standardised_asset_list[self.landlord_year_built] = (
self.standardised_asset_list[self.landlord_year_built].replace(no_data_codes)
)
self.standardised_asset_list[self.landlord_year_built] = pd.to_datetime(
self.standardised_asset_list[self.landlord_year_built]
)
@ -759,7 +764,8 @@ class AssetList:
"This cell has an external reference that can't be shown or edited. Editing this cell will "
"remove the external reference.",
"ND",
'PIMSS EMPTY'
'PIMSS EMPTY',
"UNKNOWN"
]
if pd.isnull(date_str) or date_str in known_errors or (date_str == 0):
@ -1229,11 +1235,11 @@ class AssetList:
elif self.old_format_non_intrusives_present:
non_intrusives_wall_filter = (
self.standardised_asset_list['non-intrusives: WFT Findings'].str.lower().str.strip().isin(
["empty cavity", "partial fill"]
["empty cavity", "partial fill", "empty", "EMPTY CAVITY 70MM", "partial"]
) | (
(
self.standardised_asset_list['non-intrusives: WFT Findings']
.str.lower().str.strip().str.contains("empty cavity|partial fill") &
.str.lower().str.strip().str.contains("empty cavity|partial fill|empty|partial") &
~self.standardised_asset_list['non-intrusives: WFT Findings']
.astype(str).str.lower().str.strip().str.contains("major access issues")
)

View file

@ -62,22 +62,83 @@ def app():
Property UPRN
"""
# Community Housing new list
data_folder = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Community Housing/Programme "
"Reconciliation")
data_filename = "SUB EPC C to Domna.xlsx"
sheet_name = "Sheet1"
postcode_column = 'POSTCODE'
fulladdress_column = None
address1_column = "ADDRESS"
address1_method = None
address_cols_to_concat = ["ADDRESS", "ESTATE", "TOWN"]
# Thurrock
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thurrock"
data_filename = "THURROCK COUNCIL.xlsx"
sheet_name = "Assets"
postcode_column = 'Postcode'
fulladdress_column = "Full Address"
address1_column = None
address1_method = "house_number_extraction"
address_cols_to_concat = []
missing_postcodes_method = None
landlord_year_built = "BUILD DATE"
landlord_year_built = "Construction Date"
landlord_os_uprn = None
landlord_property_type = "PROPERTY TYPE"
landlord_built_form = "PROPERTY TYPE"
landlord_wall_construction = "CONSTRUCTION TYPE"
landlord_property_type = "Property Type"
landlord_built_form = "Property Subtype"
landlord_wall_construction = None
landlord_roof_construction = None
landlord_heating_system = "Main Heating Type"
landlord_existing_pv = None
landlord_property_id = "Property Reference"
landlord_sap = None
outcomes_filename = []
outcomes_sheetname = []
outcomes_postcode = []
outcomes_houseno = []
outcomes_id = []
outcomes_address = []
master_filepaths = []
master_to_asset_list_filepath = None
phase = False
ecosurv_landlords = None
# Medway
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Medway"
data_filename = "MEDWAY Asset List.xlsx"
sheet_name = "Asset list"
postcode_column = 'Postcode'
fulladdress_column = None
address1_column = "House Number"
address1_method = None
address_cols_to_concat = ["House Number", "Street 1"]
missing_postcodes_method = None
landlord_year_built = "Year Built"
landlord_os_uprn = None
landlord_property_type = "Property Type - Academy"
landlord_built_form = "Property Type - Academy"
landlord_wall_construction = None
landlord_roof_construction = None
landlord_heating_system = None
landlord_existing_pv = None
landlord_property_id = "Row ID"
landlord_sap = None
outcomes_filename = []
outcomes_sheetname = []
outcomes_postcode = []
outcomes_houseno = []
outcomes_id = []
outcomes_address = []
master_filepaths = []
master_to_asset_list_filepath = None
phase = False
ecosurv_landlords = None
# MHS
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS"
data_filename = "MHS HOMES (Full Asset List) - for programme build.xlsx"
sheet_name = "Sheet1"
postcode_column = 'Postcode'
fulladdress_column = "FullAddress"
address1_column = None
address1_method = "house_number_extraction"
address_cols_to_concat = []
missing_postcodes_method = None
landlord_year_built = "BuiltInYear"
landlord_os_uprn = None
landlord_property_type = "AssetType"
landlord_built_form = "PropertyType"
landlord_wall_construction = None
landlord_roof_construction = None
landlord_heating_system = None
landlord_existing_pv = None
@ -94,459 +155,33 @@ def app():
phase = False
ecosurv_landlords = None
# Unitas
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Unitas"
data_filename = "unitas_asset_list_for_analysis.xlsx"
sheet_name = "Sheet1"
postcode_column = 'Post Code'
fulladdress_column = "Address Line 1"
address1_column = "Address Line 1"
address1_method = None
address_cols_to_concat = []
missing_postcodes_method = None
landlord_year_built = "built year"
landlord_os_uprn = None
landlord_property_type = "Property Type"
landlord_built_form = "Expanded Property Type"
landlord_wall_construction = None
landlord_roof_construction = "loft insulation"
landlord_heating_system = "Bolier Make"
landlord_existing_pv = None
landlord_property_id = "Property Reference"
landlord_sap = "Sap Rating"
outcomes_filename = [
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Unitas/Unitas - All outcomes - 24.04.2025.xlsx",
]
outcomes_sheetname = ["Feedback"]
outcomes_postcode = ["Postcode"]
outcomes_houseno = ["No."]
outcomes_id = [None]
outcomes_address = ["Address"]
master_filepaths = [
os.path.join(data_folder, "Submissions ECO 3.csv"),
os.path.join(data_folder, "Submissions ECO 4 - PHASE 1.csv"),
os.path.join(data_folder, "Submissions ECO 4 - PHASE 2.csv")
]
master_to_asset_list_filepath = None
phase = False
ecosurv_landlords = "unitas|everill|baskeyfield"
# LHP:
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/LHP"
data_filename = "LHP.xlsx"
sheet_name = "Decent Homes Stock"
postcode_column = 'Postcode'
fulladdress_column = "Address"
address1_column = None
address1_method = "house_number_extraction"
address_cols_to_concat = []
missing_postcodes_method = None
landlord_year_built = "Build Date"
landlord_os_uprn = None
landlord_property_type = "Property Type"
landlord_built_form = None
landlord_wall_construction = None
landlord_roof_construction = None
landlord_heating_system = "Heating Type"
landlord_existing_pv = None
landlord_property_id = "Property ID"
landlord_sap = None
outcomes_filename = [
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/LHP/LHP Outcomes.xlsx",
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/LHP/Lincolnshire Housing Partnership - Outcomes 20th "
"Feb 2024.xlsx",
]
outcomes_sheetname = ["Sheet1", "LHP"]
outcomes_postcode = ["Postcode", "Postcode"]
outcomes_houseno = ["No.", "No."]
outcomes_id = [None, None]
outcomes_address = ["Address", "Address"]
master_filepaths = [os.path.join(data_folder, "LHP Rolling Master for analysis.csv")]
master_to_asset_list_filepath = None
phase = False
ecosurv_landlords = "lhp"
# Soverign
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Sovereign"
data_filename = "Warmfront - Quote for CWI.xlsx"
sheet_name = "Sheet2"
postcode_column = 'Postcode'
fulladdress_column = None
address1_column = "Address Line 1"
address1_method = None
address_cols_to_concat = ["Address Line 1", "Address Line 2", "Address Line 3"]
missing_postcodes_method = None
landlord_year_built = None
landlord_os_uprn = None
landlord_property_type = None
landlord_built_form = None
landlord_wall_construction = None
landlord_roof_construction = None
landlord_heating_system = None
landlord_existing_pv = None
landlord_property_id = "ID"
landlord_sap = None
outcomes_filename = None
outcomes_sheetname = None
outcomes_postcode = None
outcomes_houseno = None
outcomes_id = None
outcomes_address = None
master_filepaths = []
master_to_asset_list_filepath = None
phase = False
ecosurv_landlords = None
# NCHA
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/NCHA"
data_filename = "Energy Info Copy.xlsx"
sheet_name = "Data"
postcode_column = 'Postcode'
fulladdress_column = "Address"
address1_column = None
address1_method = "house_number_extraction"
address_cols_to_concat = []
missing_postcodes_method = None
landlord_year_built = "Build Date (HAR10)"
landlord_os_uprn = None
landlord_property_type = "Property Type (HAR10)"
landlord_built_form = "Build Form (EPC)"
landlord_wall_construction = "Wall Description"
landlord_roof_construction = None
landlord_heating_system = "Heating System"
landlord_existing_pv = None
landlord_property_id = "Place ref"
landlord_sap = "EPC SAP"
outcomes_filename = None
outcomes_sheetname = None
outcomes_postcode = None
outcomes_houseno = None
outcomes_id = None
outcomes_address = None
master_filepaths = []
master_to_asset_list_filepath = None
phase = False
ecosurv_landlords = None
# Torus
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Torus/Phase 1"
data_filename = "Torus Property Asset List - Phase 1.xlsx"
sheet_name = "TORUS"
postcode_column = 'Postcode'
fulladdress_column = None
address1_column = "AddressLine1"
address1_method = None
address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"]
missing_postcodes_method = None
landlord_year_built = "Property Age"
landlord_os_uprn = "NatUPRN"
landlord_property_type = "Property Type"
landlord_built_form = "Built Form"
landlord_wall_construction = "Wall Construction"
landlord_roof_construction = "Roof Construction"
landlord_heating_system = "Space Heating Source"
landlord_existing_pv = "Low Carbon Technology (Solar PV)"
landlord_property_id = "UPRN"
landlord_sap = "SAP Score"
outcomes_filename = None
outcomes_sheetname = None
outcomes_postcode = None
outcomes_houseno = None
outcomes_id = None
outcomes_address = None
master_filepaths = []
master_to_asset_list_filepath = None
phase = True
# Southern Midlands
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Southern/Midlands Properties - Apr 2025"
data_filename = "Southern Housing Midlands Property List - combined.xlsx"
sheet_name = "Sheet 1"
postcode_column = 'Post Code'
fulladdress_column = "Address"
address1_column = None
address1_method = "house_number_extraction"
address_cols_to_concat = []
missing_postcodes_method = None
landlord_year_built = "Age_1"
landlord_os_uprn = None
landlord_property_type = "Prop_Type"
landlord_built_form = "Prop_Type"
landlord_wall_construction = "Walls_P"
landlord_heating_system = "Heating System"
landlord_existing_pv = None
landlord_property_id = "AssetID"
outcomes_filename = None
outcomes_sheetname = None
outcomes_postcode = None
outcomes_houseno = None
outcomes_id = None
outcomes_address = None
master_filepaths = []
master_to_asset_list_filepath = None
# PFP London
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/London"
data_filename = "PFP AREAS SURROUNDING LONDON - JAY, RUTH & LANE.xlsx"
sheet_name = "PFP SURROUNDING LONDON"
postcode_column = 'Postcode'
fulladdress_column = None
address1_column = "AddressLine1"
address1_method = None
address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"]
missing_postcodes_method = None
landlord_year_built = None
landlord_os_uprn = None
landlord_property_type = "Archetype (PFP)"
landlord_built_form = "Archetype (PFP)"
landlord_wall_construction = None
landlord_heating_system = None
landlord_existing_pv = None
landlord_property_id = "Uprn"
outcomes_filename = None
outcomes_sheetname = None
outcomes_postcode = None
outcomes_houseno = None
outcomes_id = None
master_filepaths = []
master_to_asset_list_filepath = None
# PFP North-West
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/North-West"
data_filename = "Places for People NORTH WEST - INSPECTIONS MASTER - UPDATE.xlsx"
sheet_name = "CHECKED"
postcode_column = 'Postcode'
fulladdress_column = None
address1_column = "AddressLine1"
address1_method = None
address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"]
missing_postcodes_method = None
landlord_year_built = None
landlord_os_uprn = None
landlord_property_type = "Archetype (PFP)"
landlord_built_form = "Archetype (PFP)"
landlord_wall_construction = None
landlord_heating_system = None
landlord_existing_pv = None
landlord_property_id = "Uprn"
outcomes_filename = None
outcomes_sheetname = None
outcomes_postcode = None
outcomes_houseno = None
outcomes_id = None
master_filepaths = []
master_to_asset_list_filepath = None
# PFP North-East
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/North-East"
data_filename = "Places for People NORTH EAST - INSPECTIONS MASTER.xlsx"
sheet_name = "CHECKED"
postcode_column = 'Postcode'
fulladdress_column = None
address1_column = "AddressLine1"
address1_method = None
address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"]
missing_postcodes_method = None
landlord_year_built = None
landlord_os_uprn = None
landlord_property_type = "Archetype (PFP)"
landlord_built_form = "Archetype (PFP)"
landlord_wall_construction = None
landlord_heating_system = None
landlord_existing_pv = None
landlord_property_id = "Uprn"
outcomes_filename = None
outcomes_sheetname = None
outcomes_postcode = None
outcomes_houseno = None
outcomes_id = None
master_filepaths = []
master_to_asset_list_filepath = None
# PFP East
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/East"
data_filename = "PFP EAST - Master - DN LN NG NR PE POSTCODES.xlsx"
sheet_name = "PFP EAST"
postcode_column = 'Postcode'
fulladdress_column = None
address1_column = "AddressLine1"
address1_method = None
address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"]
missing_postcodes_method = None
landlord_year_built = None
landlord_os_uprn = None
landlord_property_type = "Archetype (PFP)"
landlord_built_form = "Archetype (PFP)"
landlord_wall_construction = None
landlord_heating_system = None
landlord_existing_pv = None
landlord_property_id = "Uprn"
outcomes_filename = None
outcomes_sheetname = None
outcomes_postcode = None
outcomes_houseno = None
outcomes_id = None
master_filepaths = []
master_to_asset_list_filepath = None
# Wates
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Wates - "
data_filename = "ECO 4 Wates.xlsx"
sheet_name = "Roadmap Homes"
postcode_column = 'Postcode'
fulladdress_column = None
address1_column = "Address Line 1"
address1_method = None
address_cols_to_concat = ["Address Line 1", "Address Line 2", "Address Line 3"]
missing_postcodes_method = None
landlord_year_built = "Build Year"
landlord_os_uprn = None
landlord_property_type = "Archetype"
landlord_built_form = "Archetype"
landlord_wall_construction = "Wall"
landlord_heating_system = "Heating Type"
landlord_existing_pv = None
landlord_property_id = "UPRN"
outcomes_filename = None
outcomes_sheetname = None
outcomes_postcode = None
outcomes_houseno = None
master_filepaths = []
master_to_asset_list_filepath = None
# Ealing
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Programme data - 04032025"
# data_filename = "Ealing BC - Property Plus Tenure 25.02.2025.xlsx"
# sheet_name = "IGNORE - FULL MAIN"
# postcode_column = 'Postcode'
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Southern/Midlands Properties - Apr 2025"
# data_filename = "Southern Housing Midlands Property List - combined.xlsx"
# sheet_name = "Sheet 1"
# postcode_column = 'Post Code'
# fulladdress_column = "Address"
# address1_column = None
# address1_method = "first_word"
# address1_method = "house_number_extraction"
# address_cols_to_concat = []
# missing_postcodes_method = None
# landlord_year_built = "Year Built"
# landlord_year_built = "Age_1"
# landlord_os_uprn = None
# landlord_property_type = "Property Type Code"
# landlord_wall_construction = None
# landlord_heating_system = None
# landlord_property_type = "Prop_Type"
# landlord_built_form = "Prop_Type"
# landlord_wall_construction = "Walls_P"
# landlord_heating_system = "Heating System"
# landlord_existing_pv = None
# landlord_property_id = "Property ref"
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester"
# data_filename = "Warmfront data- Colchester Borough Homes (Complete).xlsx"
# sheet_name = "Sheet1"
# postcode_column = 'Full Address.1'
# fulladdress_column = "Full Address"
# address1_column = None
# address1_method = "first_word"
# address_cols_to_concat = []
# missing_postcodes_method = None
# landlord_year_built = "Build Date"
# landlord_os_uprn = None
# landlord_property_type = "Property Type"
# landlord_wall_construction = "Wallinsul"
# landlord_heating_system = "HeatSorc"
# landlord_existing_pv = None
# landlord_property_id = "Property Reference"
# landlord_property_id = "AssetID"
# outcomes_filename = None
# outcomes_sheetname = None
# outcomes_postcode = None
# outcomes_houseno = None
# outcomes_id = None
# outcomes_address = None
# master_filepaths = []
# master_to_asset_list_filepath = None
# For Westward
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Westward"
data_filename = "WESTWARD - completed list - 20.03.2025.xlsx"
sheet_name = "Sheet1"
postcode_column = "WFT EDIT Postcode"
fulladdress_column = "Address"
address1_column = None
address1_method = "house_number_extraction"
address_cols_to_concat = []
missing_postcodes_method = None
landlord_year_built = "Build date"
landlord_os_uprn = "UPRN"
landlord_property_type = "Location type"
landlord_built_form = None
landlord_wall_construction = "Wall Construction (EPC)"
landlord_heating_system = "Heat Source"
landlord_existing_pv = "PV (Y/N)"
landlord_property_id = "Place ref"
landlord_roof_construction = None
landlord_sap = None
outcomes_filename = None
outcomes_sheetname = None
outcomes_postcode = None
outcomes_houseno = None
master_filepaths = []
master_to_asset_list_filepath = None
outcomes_id = None
outcomes_address = None
phase = False
ecosurv_landlords = None
# For ACIS - programme re-build
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/ACIS/ACIS Full Programme Review March 2025"
# data_filename = "ACIS asset list.xlsx"
# sheet_name = "Assets"
# address1_column = "House No"
# postcode_column = "Postcode"
# landlord_property_id = "UPRN"
# fulladdress_column = None
# address_cols_to_concat = ["House No", "Street", "Town"]
# missing_postcodes_method = None
# address1_method = None
# landlord_year_built = "YEAR BUILT"
# landlord_os_uprn = None
# landlord_property_type = "Property type"
# landlord_built_form = None
# landlord_wall_construction = "Wall Constuction"
# landlord_roof_construction = None
# landlord_sap = None
# landlord_heating_system = "Heating"
# landlord_existing_pv = None
# outcomes_filename = "ACIS Group - 25.11.2024 - outcomes.xlsx"
# outcomes_sheetname = "Feedback"
# outcomes_postcode = "Postcode"
# outcomes_address = "Address"
# outcomes_houseno = "No"
# outcomes_id = None
# master_filepaths = [
# os.path.join(data_folder, "ECO 3 -Table 1.csv"),
# os.path.join(data_folder, "ECO 4 -Table 1.csv"),
# ]
# master_to_asset_list_filepath = None
# phase = False
# ecosurv_landlords = None
# For plus dane
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Plus Dane"
data_filename = "PLUS DANE Asset List - for analysis.xlsx"
sheet_name = "Asset List"
address1_column = " Address"
postcode_column = " Postcode"
landlord_property_id = "UPRN"
fulladdress_column = " Address"
address_cols_to_concat = []
missing_postcodes_method = None
address1_method = None
landlord_year_built = "Property Age"
landlord_os_uprn = None
landlord_property_type = "Property Type"
landlord_wall_construction = "Landlord Wall Full"
landlord_heating_system = "Landlord Heating"
landlord_existing_pv = None
outcomes_filename = "plus dane outcomes.xlsx"
outcomes_sheetname = "EVERYTHING"
outcomes_postcode = "Post Code"
outcomes_houseno = "Numb."
master_filepaths = [
os.path.join(data_folder, "JJC Rolling Master.csv"),
os.path.join(data_folder, "SCIS Rolling Master.csv"),
]
master_to_asset_list_filepath = os.path.join(data_folder, "surveys_to_assets.csv")
# Maps addresses to uprn in problematic cases
manual_uprn_map = {}

View file

@ -5,7 +5,7 @@ STANDARD_BUILT_FORMS = {
# Houses
"end-terrace", "semi-detached", "detached", "mid-terrace",
# Flats
"ground floor", "mid-floor", "top-floor", "basement"
"ground floor", "mid-floor", "top-floor", "basement", "low rise", "high rise",
}
BUILT_FORM_MAPPINGS = {
@ -265,5 +265,70 @@ BUILT_FORM_MAPPINGS = {
'FIRST FLOOR FLAT': 'ground floor',
'GROUND FL MAISONETTE': 'ground floor',
'HOUSE 2 LIVING ROOMS': 'unknown',
'FLAT OVER SHOP': 'unknown'
'FLAT OVER SHOP': 'unknown',
'4 Bed Detached House': 'detached',
'2 Bed Detached House': 'detached',
'3 Bed Detached Bungalow': 'detached',
'1 Bed Semi-Detached House': 'semi-detached',
'2 Bed Semi-Detached House': 'semi-detached',
'2 Bed Detached Bungalow': 'detached',
'1 Bed Mid Terrace Bungalow': 'mid-terrace',
'4 Bed Semi-Detached Bungalow': 'semi-detached',
'3 Bed Mid Terrace Bungalow': 'mid-terrace',
'3 Bed Semi-Detached Bungalow': 'semi-detached',
'3 Bed Mid Terrace House': 'mid-terrace',
'2 Bed Mid Terrace House': 'mid-terrace',
'3 Bed Detached House': 'detached',
'2 Bed Semi-Detached Bungalow': 'semi-detached',
'5 Bed Mid Terrace House': 'mid-terrace',
'2 Bed Mid Terrace Bungalow': 'mid-terrace',
'3 Bed Semi-Detached House': 'semi-detached',
'1 Bed Semi-Detached Bungalow': 'semi-detached',
'4 Bed Mid Terrace House': 'mid-terrace',
'1 Bed Detached Bungalow': 'detached',
'5 Bed Semi-Detached House': 'semi-detached',
'6 Bed Detached House': 'detached',
'1 Bed Mid Terrace House': 'mid-terrace',
'4 Bed Semi-Detached House': 'semi-detached',
'TBA': 'unknown',
'1 Bed EOT House': 'end-terrace',
'3 Bed Flat': 'unknown',
'5 Bed EOT House': 'end-terrace',
'1 Bed EOT Bungalow': 'end-terrace',
'2 Bed EOT House': 'end-terrace',
'1 Bed Studio Flat': 'unknown',
'3 Bed Maison': 'unknown',
'Commercial Letting': 'unknown',
'4 Bed Maison': 'unknown',
'2 Bed Flat': 'unknown',
'3 Bed EOT House': 'end-terrace',
'2 Bed Maison': 'unknown',
'4 Bed EOT House': 'end-terrace',
'1 Bed Flat': 'unknown',
'3 Bed EOT Bungalow': 'end-terrace',
'1 Bed Maison': 'unknown',
'2 Bed EOT Bungalow': 'end-terrace',
'Bungalow detached': 'detached',
'Bungalow semi detached': 'semi-detached',
'Sheltered bungalow semi detached': 'semi-detached',
'Bedsit bungalow semi detached': 'semi-detached',
'Semi detached house': 'semi-detached',
'Bedsit bungalow terraced': 'mid-terrace', 'Terraced house': 'mid-terrace',
'Sheltered flat': 'unknown',
'APD Bungalow': 'unknown',
'Flat with partition': 'unknown',
'APD flat': 'unknown',
'Sheltered warden flat': 'unknown',
'Sheltered bedsit': 'unknown',
'Sheltered bungalow terraced': 'mid-terrace',
'Block': 'unknown',
'Bungalow terraced': 'mid-terrace',
'Maisonette flat': 'unknown',
'Sheltered bedsit disabled': 'unknown',
'Bedsit Flat': 'unknown',
'Low Rise': 'low rise',
'Upper Floor': 'top-floor',
'High Rise': 'high rise',
}

View file

@ -288,5 +288,8 @@ HEATING_MAPPINGS = {
'No Gas Boiler': 'no heating',
'Back Boiler': 'solid fuel',
"This cell has an external reference that can't be shown or edited. Editing this cell will remove the external "
"reference.": 'unknown'
"reference.": 'unknown',
'Communal Heating': 'communal heating',
'No Data': 'unknown',
'Boiler System': 'gas condensing boiler',
}

View file

@ -227,6 +227,30 @@ PROPERTY_MAPPING = {
'FIRST FLOOR FLAT': 'flat',
'GROUND FL MAISONETTE': 'maisonette',
'HOUSE 2 LIVING ROOMS': 'house',
'FLAT OVER SHOP': 'flat'
'FLAT OVER SHOP': 'flat',
'House With Integral Garage': 'house',
'Flat Over Parking/Accessway': 'flat',
'Flat Over Binstore': 'flat',
'Flat Over Garage': 'flat',
'House With Independent Garage': 'house',
'Studio': 'flat',
'Bedsit bungalow terraced': 'bedsit',
'Terraced house': 'house',
'Sheltered flat': 'flat',
'APD Bungalow': 'bungalow',
'Flat with partition': 'flat',
'Bungalow detached': 'bungalow',
'APD flat': 'flat',
'Sheltered warden flat': 'flat',
'Bungalow semi detached': 'bungalow',
'Sheltered bedsit': 'bedsit',
'Sheltered bungalow terraced': 'bungalow',
'Sheltered bungalow semi detached': 'bungalow',
'Bungalow terraced': 'bungalow',
'Maisonette flat': 'maisonette',
'Sheltered bedsit disabled': 'bedsit',
'Bedsit bungalow semi detached': 'bedsit',
'Bedsit Flat': 'bedsit',
'Semi detached house': 'house',
'Unit': 'unknown'
}

View file

@ -172,7 +172,7 @@ class SearchEpc:
self.address1 = address1
self.postcode = postcode
self.full_address = full_address
self.full_address = full_address if full_address is not None else self.address1
self.uprn = uprn
self.house_number = self.get_house_number(self.address1)
self.numeric_house_number = self.extract_numeric_housenumber_part(self.house_number)
@ -265,9 +265,7 @@ class SearchEpc:
for retry in range(self.max_retries):
try:
response = self.client.domestic.call(method="get", url=url, params=params)
if response:
self.data = response
return {
@ -368,7 +366,10 @@ class SearchEpc:
unique_property_types = {r["property-type"] for r in rows}
# We allow for variation in property type across flats/maisonettes
if (len(uprns) == 1) and ((len(unique_property_types) == 1) or unique_property_types == {"Flat", "Maisonette"}):
# If we know that we have a flat/maisonette, we allow for both property types
if property_type in ["Flat", "Maisonette"]:
if ((len(uprns) == 1) and ((len(unique_property_types) == 1)
) or unique_property_types == {"Flat", "Maisonette"}):
return rows
if property_type is not None:
@ -388,11 +389,27 @@ class SearchEpc:
# We check if post town is included in the address
if any([r["posttown"].lower() in address.lower() for r in rows]):
best_match = process.extractOne(
best_match1 = process.extractOne(
address, [", ".join([r["address"], r["posttown"]]) for r in rows], score_cutoff=0
)
best_match2 = process.extractOne(
address, [", ".join([r["address"]]) for r in rows], score_cutoff=0
)
# Pick the largest score
if best_match1[1] >= best_match2[1]:
# Get all of the scores
rows_filtered = [r for r in rows if ", ".join([r["address"], r["posttown"]]) == best_match[0]]
rows_filtered = [r for r in rows if ", ".join([r["address"], r["posttown"]]) == best_match1[0]]
else:
# Get all of the scores
rows_filtered = [r for r in rows if r["address"] == best_match2[0]]
# If we have multiple, we filter on newest lodgment date
if len(rows_filtered) > 1:
rows_filtered = [
r for r in rows_filtered
if r["lodgement-datetime"] == max([x["lodgement-datetime"] for x in rows_filtered])
]
else:
best_match = process.extractOne(address, [r["address"] for r in rows], score_cutoff=0)
# Get the UPRN for the best match

View file

@ -0,0 +1,134 @@
"""
On the standardised asset list, this script will flag the pilot assets.
"""
import pandas as pd
import os
import numpy as np
from tqdm import tqdm
PILOT_PROJECT_CODE = "MHS-000-PILOT"
MHS_PHASE_1_PROJECT_CODE = "MHS-001"
asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/MHS HOMES (Full Asset List) - for programme build - "
"Standardised.xlsx",
sheet_name="Standardised Asset List",
)
flat_data = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/MHS HOMES (Full Asset List) - for programme build - "
"Standardised.xlsx",
sheet_name="Flat Data",
)
pilot = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/MHS 334 x Pilot reviewed - KB notes end column.xlsx"
)
ciga_checks = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/MHS CIGA Check 03042025_201.xlsx"
)
ciga_checks["row_id"] = ciga_checks.index
asset_list["project_code"] = None
asset_list["project_code"] = np.where(
asset_list["landlord_property_id"].isin(pilot["Place Reference"]),
PILOT_PROJECT_CODE,
asset_list["project_code"],
)
# We now flag the next phase of the programme
asset_list["project_code"] = np.where(
(~pd.isnull(asset_list["cavity_reason"]) | ~pd.isnull(asset_list["solar_reason"])) & pd.isnull(
asset_list["project_code"]),
MHS_PHASE_1_PROJECT_CODE,
asset_list["project_code"],
)
# We now flag the CIGA checks
manual_fixes = {
"123 Columbine Close, Rochester": "2213861230"
}
ciga_lookup = []
for _, row in tqdm(ciga_checks.iterrows(), total=len(ciga_checks)):
if manual_fixes.get(row["Matched Address"]):
ll_pid = manual_fixes[row["Matched Address"]]
df = asset_list[
(asset_list["landlord_property_id"].astype(str) == ll_pid)
]
ciga_lookup.append(
{
"domna_property_id": df["domna_property_id"].values[0],
"row_id": row["row_id"],
}
)
continue
df = asset_list[
(asset_list["domna_postcode"] == row["Postcode"])
]
df = df[
(df["domna_address_1"].astype(str) == str(row["Address1"]))
]
if df.empty:
df = asset_list[
(asset_list["domna_postcode"] == row["Matched Postcode"])
]
df = df[(df["domna_address_1"].astype(str) == str(row["Address1"]))]
if df.shape[0] > 1:
df = asset_list[
(asset_list["domna_full_address"].str.lower().str.replace(",", "").str.contains(
row["Matched Address"].lower().replace(",", ""), na=False))
]
if df.empty:
df = asset_list[
(asset_list["domna_full_address"].str.lower().str.replace(",", "").str.contains(
row["Address2"].lower().replace(",", ""), na=False))
]
df = df[(df["domna_address_1"].astype(str) == str(row["Address1"]))]
if df.shape[0] != 1:
raise Exception("More than one match found for {row['Address1']} in the asset list")
ciga_lookup.append(
{
"domna_property_id": df["domna_property_id"].values[0],
"row_id": row["row_id"],
}
)
ciga_lookup = pd.DataFrame(ciga_lookup)
ciga_lookup = ciga_lookup.merge(
ciga_checks[["row_id", "Guarantee"]].rename(
columns={"Guarantee": "ciga_guarantee"}
), how="left", on="row_id"
)
ciga_lookup["ciga_check_complete"] = True
asset_list = asset_list.merge(
ciga_lookup[["domna_property_id", "ciga_guarantee"]],
how="left",
on="domna_property_id"
)
# Check we matched addresses correctly
# match_check = ciga_lookup.merge(
# ciga_checks, how="left", on="row_id"
# ).merge(
# asset_list[["domna_property_id", "domna_full_address"]], how="left", on="domna_property_id"
# )
# match_check = match_check[["Matched Address", "domna_full_address"]]
# Save
filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/12052025 MHS Standardised Asset List - "
"programme.xlsx")
# Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data
with pd.ExcelWriter(filename) as writer:
asset_list.to_excel(writer, sheet_name="Standardised Asset List", index=False)
flat_data.to_excel(writer, sheet_name="Flat Data", index=False)