mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
debugging incorrect fetching of flat data
This commit is contained in:
parent
f1b9ee2920
commit
30847ded90
9 changed files with 355 additions and 471 deletions
2
.idea/Model.iml
generated
2
.idea/Model.iml
generated
|
|
@ -7,7 +7,7 @@
|
|||
<sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
|
||||
</content>
|
||||
<orderEntry type="jdk" jdkName="AssetList" jdkType="Python SDK" />
|
||||
<orderEntry type="jdk" jdkName="Fastapi-backend" jdkType="Python SDK" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
<component name="PyNamespacePackagesService">
|
||||
|
|
|
|||
2
.idea/misc.xml
generated
2
.idea/misc.xml
generated
|
|
@ -3,7 +3,7 @@
|
|||
<component name="Black">
|
||||
<option name="sdkName" value="Python 3.10 (backend)" />
|
||||
</component>
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="AssetList" project-jdk-type="Python SDK" />
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Fastapi-backend" project-jdk-type="Python SDK" />
|
||||
<component name="PyCharmProfessionalAdvertiser">
|
||||
<option name="shown" value="true" />
|
||||
</component>
|
||||
|
|
|
|||
|
|
@ -739,6 +739,11 @@ class AssetList:
|
|||
self.standardised_asset_list[self.landlord_year_built].replace(self.DATETIME_REMAP)
|
||||
)
|
||||
|
||||
no_data_codes = {"No Data": None}
|
||||
self.standardised_asset_list[self.landlord_year_built] = (
|
||||
self.standardised_asset_list[self.landlord_year_built].replace(no_data_codes)
|
||||
)
|
||||
|
||||
self.standardised_asset_list[self.landlord_year_built] = pd.to_datetime(
|
||||
self.standardised_asset_list[self.landlord_year_built]
|
||||
)
|
||||
|
|
@ -759,7 +764,8 @@ class AssetList:
|
|||
"This cell has an external reference that can't be shown or edited. Editing this cell will "
|
||||
"remove the external reference.",
|
||||
"ND",
|
||||
'PIMSS EMPTY'
|
||||
'PIMSS EMPTY',
|
||||
"UNKNOWN"
|
||||
]
|
||||
|
||||
if pd.isnull(date_str) or date_str in known_errors or (date_str == 0):
|
||||
|
|
@ -1229,11 +1235,11 @@ class AssetList:
|
|||
elif self.old_format_non_intrusives_present:
|
||||
non_intrusives_wall_filter = (
|
||||
self.standardised_asset_list['non-intrusives: WFT Findings'].str.lower().str.strip().isin(
|
||||
["empty cavity", "partial fill"]
|
||||
["empty cavity", "partial fill", "empty", "EMPTY CAVITY 70MM", "partial"]
|
||||
) | (
|
||||
(
|
||||
self.standardised_asset_list['non-intrusives: WFT Findings']
|
||||
.str.lower().str.strip().str.contains("empty cavity|partial fill") &
|
||||
.str.lower().str.strip().str.contains("empty cavity|partial fill|empty|partial") &
|
||||
~self.standardised_asset_list['non-intrusives: WFT Findings']
|
||||
.astype(str).str.lower().str.strip().str.contains("major access issues")
|
||||
)
|
||||
|
|
|
|||
|
|
@ -62,22 +62,83 @@ def app():
|
|||
Property UPRN
|
||||
"""
|
||||
|
||||
# Community Housing new list
|
||||
data_folder = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Community Housing/Programme "
|
||||
"Reconciliation")
|
||||
data_filename = "SUB EPC C to Domna.xlsx"
|
||||
sheet_name = "Sheet1"
|
||||
postcode_column = 'POSTCODE'
|
||||
fulladdress_column = None
|
||||
address1_column = "ADDRESS"
|
||||
address1_method = None
|
||||
address_cols_to_concat = ["ADDRESS", "ESTATE", "TOWN"]
|
||||
# Thurrock
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thurrock"
|
||||
data_filename = "THURROCK COUNCIL.xlsx"
|
||||
sheet_name = "Assets"
|
||||
postcode_column = 'Postcode'
|
||||
fulladdress_column = "Full Address"
|
||||
address1_column = None
|
||||
address1_method = "house_number_extraction"
|
||||
address_cols_to_concat = []
|
||||
missing_postcodes_method = None
|
||||
landlord_year_built = "BUILD DATE"
|
||||
landlord_year_built = "Construction Date"
|
||||
landlord_os_uprn = None
|
||||
landlord_property_type = "PROPERTY TYPE"
|
||||
landlord_built_form = "PROPERTY TYPE"
|
||||
landlord_wall_construction = "CONSTRUCTION TYPE"
|
||||
landlord_property_type = "Property Type"
|
||||
landlord_built_form = "Property Subtype"
|
||||
landlord_wall_construction = None
|
||||
landlord_roof_construction = None
|
||||
landlord_heating_system = "Main Heating Type"
|
||||
landlord_existing_pv = None
|
||||
landlord_property_id = "Property Reference"
|
||||
landlord_sap = None
|
||||
outcomes_filename = []
|
||||
outcomes_sheetname = []
|
||||
outcomes_postcode = []
|
||||
outcomes_houseno = []
|
||||
outcomes_id = []
|
||||
outcomes_address = []
|
||||
master_filepaths = []
|
||||
master_to_asset_list_filepath = None
|
||||
phase = False
|
||||
ecosurv_landlords = None
|
||||
|
||||
# Medway
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Medway"
|
||||
data_filename = "MEDWAY Asset List.xlsx"
|
||||
sheet_name = "Asset list"
|
||||
postcode_column = 'Postcode'
|
||||
fulladdress_column = None
|
||||
address1_column = "House Number"
|
||||
address1_method = None
|
||||
address_cols_to_concat = ["House Number", "Street 1"]
|
||||
missing_postcodes_method = None
|
||||
landlord_year_built = "Year Built"
|
||||
landlord_os_uprn = None
|
||||
landlord_property_type = "Property Type - Academy"
|
||||
landlord_built_form = "Property Type - Academy"
|
||||
landlord_wall_construction = None
|
||||
landlord_roof_construction = None
|
||||
landlord_heating_system = None
|
||||
landlord_existing_pv = None
|
||||
landlord_property_id = "Row ID"
|
||||
landlord_sap = None
|
||||
outcomes_filename = []
|
||||
outcomes_sheetname = []
|
||||
outcomes_postcode = []
|
||||
outcomes_houseno = []
|
||||
outcomes_id = []
|
||||
outcomes_address = []
|
||||
master_filepaths = []
|
||||
master_to_asset_list_filepath = None
|
||||
phase = False
|
||||
ecosurv_landlords = None
|
||||
|
||||
# MHS
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS"
|
||||
data_filename = "MHS HOMES (Full Asset List) - for programme build.xlsx"
|
||||
sheet_name = "Sheet1"
|
||||
postcode_column = 'Postcode'
|
||||
fulladdress_column = "FullAddress"
|
||||
address1_column = None
|
||||
address1_method = "house_number_extraction"
|
||||
address_cols_to_concat = []
|
||||
missing_postcodes_method = None
|
||||
landlord_year_built = "BuiltInYear"
|
||||
landlord_os_uprn = None
|
||||
landlord_property_type = "AssetType"
|
||||
landlord_built_form = "PropertyType"
|
||||
landlord_wall_construction = None
|
||||
landlord_roof_construction = None
|
||||
landlord_heating_system = None
|
||||
landlord_existing_pv = None
|
||||
|
|
@ -94,459 +155,33 @@ def app():
|
|||
phase = False
|
||||
ecosurv_landlords = None
|
||||
|
||||
# Unitas
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Unitas"
|
||||
data_filename = "unitas_asset_list_for_analysis.xlsx"
|
||||
sheet_name = "Sheet1"
|
||||
postcode_column = 'Post Code'
|
||||
fulladdress_column = "Address Line 1"
|
||||
address1_column = "Address Line 1"
|
||||
address1_method = None
|
||||
address_cols_to_concat = []
|
||||
missing_postcodes_method = None
|
||||
landlord_year_built = "built year"
|
||||
landlord_os_uprn = None
|
||||
landlord_property_type = "Property Type"
|
||||
landlord_built_form = "Expanded Property Type"
|
||||
landlord_wall_construction = None
|
||||
landlord_roof_construction = "loft insulation"
|
||||
landlord_heating_system = "Bolier Make"
|
||||
landlord_existing_pv = None
|
||||
landlord_property_id = "Property Reference"
|
||||
landlord_sap = "Sap Rating"
|
||||
outcomes_filename = [
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Unitas/Unitas - All outcomes - 24.04.2025.xlsx",
|
||||
]
|
||||
outcomes_sheetname = ["Feedback"]
|
||||
outcomes_postcode = ["Postcode"]
|
||||
outcomes_houseno = ["No."]
|
||||
outcomes_id = [None]
|
||||
outcomes_address = ["Address"]
|
||||
master_filepaths = [
|
||||
os.path.join(data_folder, "Submissions ECO 3.csv"),
|
||||
os.path.join(data_folder, "Submissions ECO 4 - PHASE 1.csv"),
|
||||
os.path.join(data_folder, "Submissions ECO 4 - PHASE 2.csv")
|
||||
]
|
||||
master_to_asset_list_filepath = None
|
||||
phase = False
|
||||
ecosurv_landlords = "unitas|everill|baskeyfield"
|
||||
|
||||
# LHP:
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/LHP"
|
||||
data_filename = "LHP.xlsx"
|
||||
sheet_name = "Decent Homes Stock"
|
||||
postcode_column = 'Postcode'
|
||||
fulladdress_column = "Address"
|
||||
address1_column = None
|
||||
address1_method = "house_number_extraction"
|
||||
address_cols_to_concat = []
|
||||
missing_postcodes_method = None
|
||||
landlord_year_built = "Build Date"
|
||||
landlord_os_uprn = None
|
||||
landlord_property_type = "Property Type"
|
||||
landlord_built_form = None
|
||||
landlord_wall_construction = None
|
||||
landlord_roof_construction = None
|
||||
landlord_heating_system = "Heating Type"
|
||||
landlord_existing_pv = None
|
||||
landlord_property_id = "Property ID"
|
||||
landlord_sap = None
|
||||
outcomes_filename = [
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/LHP/LHP Outcomes.xlsx",
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/LHP/Lincolnshire Housing Partnership - Outcomes 20th "
|
||||
"Feb 2024.xlsx",
|
||||
]
|
||||
outcomes_sheetname = ["Sheet1", "LHP"]
|
||||
outcomes_postcode = ["Postcode", "Postcode"]
|
||||
outcomes_houseno = ["No.", "No."]
|
||||
outcomes_id = [None, None]
|
||||
outcomes_address = ["Address", "Address"]
|
||||
master_filepaths = [os.path.join(data_folder, "LHP Rolling Master for analysis.csv")]
|
||||
master_to_asset_list_filepath = None
|
||||
phase = False
|
||||
ecosurv_landlords = "lhp"
|
||||
|
||||
# Soverign
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Sovereign"
|
||||
data_filename = "Warmfront - Quote for CWI.xlsx"
|
||||
sheet_name = "Sheet2"
|
||||
postcode_column = 'Postcode'
|
||||
fulladdress_column = None
|
||||
address1_column = "Address Line 1"
|
||||
address1_method = None
|
||||
address_cols_to_concat = ["Address Line 1", "Address Line 2", "Address Line 3"]
|
||||
missing_postcodes_method = None
|
||||
landlord_year_built = None
|
||||
landlord_os_uprn = None
|
||||
landlord_property_type = None
|
||||
landlord_built_form = None
|
||||
landlord_wall_construction = None
|
||||
landlord_roof_construction = None
|
||||
landlord_heating_system = None
|
||||
landlord_existing_pv = None
|
||||
landlord_property_id = "ID"
|
||||
landlord_sap = None
|
||||
outcomes_filename = None
|
||||
outcomes_sheetname = None
|
||||
outcomes_postcode = None
|
||||
outcomes_houseno = None
|
||||
outcomes_id = None
|
||||
outcomes_address = None
|
||||
master_filepaths = []
|
||||
master_to_asset_list_filepath = None
|
||||
phase = False
|
||||
ecosurv_landlords = None
|
||||
|
||||
# NCHA
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/NCHA"
|
||||
data_filename = "Energy Info Copy.xlsx"
|
||||
sheet_name = "Data"
|
||||
postcode_column = 'Postcode'
|
||||
fulladdress_column = "Address"
|
||||
address1_column = None
|
||||
address1_method = "house_number_extraction"
|
||||
address_cols_to_concat = []
|
||||
missing_postcodes_method = None
|
||||
landlord_year_built = "Build Date (HAR10)"
|
||||
landlord_os_uprn = None
|
||||
landlord_property_type = "Property Type (HAR10)"
|
||||
landlord_built_form = "Build Form (EPC)"
|
||||
landlord_wall_construction = "Wall Description"
|
||||
landlord_roof_construction = None
|
||||
landlord_heating_system = "Heating System"
|
||||
landlord_existing_pv = None
|
||||
landlord_property_id = "Place ref"
|
||||
landlord_sap = "EPC SAP"
|
||||
outcomes_filename = None
|
||||
outcomes_sheetname = None
|
||||
outcomes_postcode = None
|
||||
outcomes_houseno = None
|
||||
outcomes_id = None
|
||||
outcomes_address = None
|
||||
master_filepaths = []
|
||||
master_to_asset_list_filepath = None
|
||||
phase = False
|
||||
ecosurv_landlords = None
|
||||
|
||||
# Torus
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Torus/Phase 1"
|
||||
data_filename = "Torus Property Asset List - Phase 1.xlsx"
|
||||
sheet_name = "TORUS"
|
||||
postcode_column = 'Postcode'
|
||||
fulladdress_column = None
|
||||
address1_column = "AddressLine1"
|
||||
address1_method = None
|
||||
address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"]
|
||||
missing_postcodes_method = None
|
||||
landlord_year_built = "Property Age"
|
||||
landlord_os_uprn = "NatUPRN"
|
||||
landlord_property_type = "Property Type"
|
||||
landlord_built_form = "Built Form"
|
||||
landlord_wall_construction = "Wall Construction"
|
||||
landlord_roof_construction = "Roof Construction"
|
||||
landlord_heating_system = "Space Heating Source"
|
||||
landlord_existing_pv = "Low Carbon Technology (Solar PV)"
|
||||
landlord_property_id = "UPRN"
|
||||
landlord_sap = "SAP Score"
|
||||
outcomes_filename = None
|
||||
outcomes_sheetname = None
|
||||
outcomes_postcode = None
|
||||
outcomes_houseno = None
|
||||
outcomes_id = None
|
||||
outcomes_address = None
|
||||
master_filepaths = []
|
||||
master_to_asset_list_filepath = None
|
||||
phase = True
|
||||
|
||||
# Southern Midlands
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Southern/Midlands Properties - Apr 2025"
|
||||
data_filename = "Southern Housing Midlands Property List - combined.xlsx"
|
||||
sheet_name = "Sheet 1"
|
||||
postcode_column = 'Post Code'
|
||||
fulladdress_column = "Address"
|
||||
address1_column = None
|
||||
address1_method = "house_number_extraction"
|
||||
address_cols_to_concat = []
|
||||
missing_postcodes_method = None
|
||||
landlord_year_built = "Age_1"
|
||||
landlord_os_uprn = None
|
||||
landlord_property_type = "Prop_Type"
|
||||
landlord_built_form = "Prop_Type"
|
||||
landlord_wall_construction = "Walls_P"
|
||||
landlord_heating_system = "Heating System"
|
||||
landlord_existing_pv = None
|
||||
landlord_property_id = "AssetID"
|
||||
outcomes_filename = None
|
||||
outcomes_sheetname = None
|
||||
outcomes_postcode = None
|
||||
outcomes_houseno = None
|
||||
outcomes_id = None
|
||||
outcomes_address = None
|
||||
master_filepaths = []
|
||||
master_to_asset_list_filepath = None
|
||||
|
||||
# PFP London
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/London"
|
||||
data_filename = "PFP AREAS SURROUNDING LONDON - JAY, RUTH & LANE.xlsx"
|
||||
sheet_name = "PFP SURROUNDING LONDON"
|
||||
postcode_column = 'Postcode'
|
||||
fulladdress_column = None
|
||||
address1_column = "AddressLine1"
|
||||
address1_method = None
|
||||
address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"]
|
||||
missing_postcodes_method = None
|
||||
landlord_year_built = None
|
||||
landlord_os_uprn = None
|
||||
landlord_property_type = "Archetype (PFP)"
|
||||
landlord_built_form = "Archetype (PFP)"
|
||||
landlord_wall_construction = None
|
||||
landlord_heating_system = None
|
||||
landlord_existing_pv = None
|
||||
landlord_property_id = "Uprn"
|
||||
outcomes_filename = None
|
||||
outcomes_sheetname = None
|
||||
outcomes_postcode = None
|
||||
outcomes_houseno = None
|
||||
outcomes_id = None
|
||||
master_filepaths = []
|
||||
master_to_asset_list_filepath = None
|
||||
|
||||
# PFP North-West
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/North-West"
|
||||
data_filename = "Places for People NORTH WEST - INSPECTIONS MASTER - UPDATE.xlsx"
|
||||
sheet_name = "CHECKED"
|
||||
postcode_column = 'Postcode'
|
||||
fulladdress_column = None
|
||||
address1_column = "AddressLine1"
|
||||
address1_method = None
|
||||
address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"]
|
||||
missing_postcodes_method = None
|
||||
landlord_year_built = None
|
||||
landlord_os_uprn = None
|
||||
landlord_property_type = "Archetype (PFP)"
|
||||
landlord_built_form = "Archetype (PFP)"
|
||||
landlord_wall_construction = None
|
||||
landlord_heating_system = None
|
||||
landlord_existing_pv = None
|
||||
landlord_property_id = "Uprn"
|
||||
outcomes_filename = None
|
||||
outcomes_sheetname = None
|
||||
outcomes_postcode = None
|
||||
outcomes_houseno = None
|
||||
outcomes_id = None
|
||||
master_filepaths = []
|
||||
master_to_asset_list_filepath = None
|
||||
|
||||
# PFP North-East
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/North-East"
|
||||
data_filename = "Places for People NORTH EAST - INSPECTIONS MASTER.xlsx"
|
||||
sheet_name = "CHECKED"
|
||||
postcode_column = 'Postcode'
|
||||
fulladdress_column = None
|
||||
address1_column = "AddressLine1"
|
||||
address1_method = None
|
||||
address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"]
|
||||
missing_postcodes_method = None
|
||||
landlord_year_built = None
|
||||
landlord_os_uprn = None
|
||||
landlord_property_type = "Archetype (PFP)"
|
||||
landlord_built_form = "Archetype (PFP)"
|
||||
landlord_wall_construction = None
|
||||
landlord_heating_system = None
|
||||
landlord_existing_pv = None
|
||||
landlord_property_id = "Uprn"
|
||||
outcomes_filename = None
|
||||
outcomes_sheetname = None
|
||||
outcomes_postcode = None
|
||||
outcomes_houseno = None
|
||||
outcomes_id = None
|
||||
master_filepaths = []
|
||||
master_to_asset_list_filepath = None
|
||||
|
||||
# PFP East
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/East"
|
||||
data_filename = "PFP EAST - Master - DN LN NG NR PE POSTCODES.xlsx"
|
||||
sheet_name = "PFP EAST"
|
||||
postcode_column = 'Postcode'
|
||||
fulladdress_column = None
|
||||
address1_column = "AddressLine1"
|
||||
address1_method = None
|
||||
address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"]
|
||||
missing_postcodes_method = None
|
||||
landlord_year_built = None
|
||||
landlord_os_uprn = None
|
||||
landlord_property_type = "Archetype (PFP)"
|
||||
landlord_built_form = "Archetype (PFP)"
|
||||
landlord_wall_construction = None
|
||||
landlord_heating_system = None
|
||||
landlord_existing_pv = None
|
||||
landlord_property_id = "Uprn"
|
||||
outcomes_filename = None
|
||||
outcomes_sheetname = None
|
||||
outcomes_postcode = None
|
||||
outcomes_houseno = None
|
||||
outcomes_id = None
|
||||
master_filepaths = []
|
||||
master_to_asset_list_filepath = None
|
||||
|
||||
# Wates
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Wates - "
|
||||
data_filename = "ECO 4 Wates.xlsx"
|
||||
sheet_name = "Roadmap Homes"
|
||||
postcode_column = 'Postcode'
|
||||
fulladdress_column = None
|
||||
address1_column = "Address Line 1"
|
||||
address1_method = None
|
||||
address_cols_to_concat = ["Address Line 1", "Address Line 2", "Address Line 3"]
|
||||
missing_postcodes_method = None
|
||||
landlord_year_built = "Build Year"
|
||||
landlord_os_uprn = None
|
||||
landlord_property_type = "Archetype"
|
||||
landlord_built_form = "Archetype"
|
||||
landlord_wall_construction = "Wall"
|
||||
landlord_heating_system = "Heating Type"
|
||||
landlord_existing_pv = None
|
||||
landlord_property_id = "UPRN"
|
||||
outcomes_filename = None
|
||||
outcomes_sheetname = None
|
||||
outcomes_postcode = None
|
||||
outcomes_houseno = None
|
||||
master_filepaths = []
|
||||
master_to_asset_list_filepath = None
|
||||
|
||||
# Ealing
|
||||
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Programme data - 04032025"
|
||||
# data_filename = "Ealing BC - Property Plus Tenure 25.02.2025.xlsx"
|
||||
# sheet_name = "IGNORE - FULL MAIN"
|
||||
# postcode_column = 'Postcode'
|
||||
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Southern/Midlands Properties - Apr 2025"
|
||||
# data_filename = "Southern Housing Midlands Property List - combined.xlsx"
|
||||
# sheet_name = "Sheet 1"
|
||||
# postcode_column = 'Post Code'
|
||||
# fulladdress_column = "Address"
|
||||
# address1_column = None
|
||||
# address1_method = "first_word"
|
||||
# address1_method = "house_number_extraction"
|
||||
# address_cols_to_concat = []
|
||||
# missing_postcodes_method = None
|
||||
# landlord_year_built = "Year Built"
|
||||
# landlord_year_built = "Age_1"
|
||||
# landlord_os_uprn = None
|
||||
# landlord_property_type = "Property Type Code"
|
||||
# landlord_wall_construction = None
|
||||
# landlord_heating_system = None
|
||||
# landlord_property_type = "Prop_Type"
|
||||
# landlord_built_form = "Prop_Type"
|
||||
# landlord_wall_construction = "Walls_P"
|
||||
# landlord_heating_system = "Heating System"
|
||||
# landlord_existing_pv = None
|
||||
# landlord_property_id = "Property ref"
|
||||
|
||||
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester"
|
||||
# data_filename = "Warmfront data- Colchester Borough Homes (Complete).xlsx"
|
||||
# sheet_name = "Sheet1"
|
||||
# postcode_column = 'Full Address.1'
|
||||
# fulladdress_column = "Full Address"
|
||||
# address1_column = None
|
||||
# address1_method = "first_word"
|
||||
# address_cols_to_concat = []
|
||||
# missing_postcodes_method = None
|
||||
# landlord_year_built = "Build Date"
|
||||
# landlord_os_uprn = None
|
||||
# landlord_property_type = "Property Type"
|
||||
# landlord_wall_construction = "Wallinsul"
|
||||
# landlord_heating_system = "HeatSorc"
|
||||
# landlord_existing_pv = None
|
||||
# landlord_property_id = "Property Reference"
|
||||
# landlord_property_id = "AssetID"
|
||||
# outcomes_filename = None
|
||||
# outcomes_sheetname = None
|
||||
# outcomes_postcode = None
|
||||
# outcomes_houseno = None
|
||||
# outcomes_id = None
|
||||
# outcomes_address = None
|
||||
# master_filepaths = []
|
||||
# master_to_asset_list_filepath = None
|
||||
|
||||
# For Westward
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Westward"
|
||||
data_filename = "WESTWARD - completed list - 20.03.2025.xlsx"
|
||||
sheet_name = "Sheet1"
|
||||
postcode_column = "WFT EDIT Postcode"
|
||||
fulladdress_column = "Address"
|
||||
address1_column = None
|
||||
address1_method = "house_number_extraction"
|
||||
address_cols_to_concat = []
|
||||
missing_postcodes_method = None
|
||||
landlord_year_built = "Build date"
|
||||
landlord_os_uprn = "UPRN"
|
||||
landlord_property_type = "Location type"
|
||||
landlord_built_form = None
|
||||
landlord_wall_construction = "Wall Construction (EPC)"
|
||||
landlord_heating_system = "Heat Source"
|
||||
landlord_existing_pv = "PV (Y/N)"
|
||||
landlord_property_id = "Place ref"
|
||||
landlord_roof_construction = None
|
||||
landlord_sap = None
|
||||
outcomes_filename = None
|
||||
outcomes_sheetname = None
|
||||
outcomes_postcode = None
|
||||
outcomes_houseno = None
|
||||
master_filepaths = []
|
||||
master_to_asset_list_filepath = None
|
||||
outcomes_id = None
|
||||
outcomes_address = None
|
||||
phase = False
|
||||
ecosurv_landlords = None
|
||||
|
||||
# For ACIS - programme re-build
|
||||
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/ACIS/ACIS Full Programme Review March 2025"
|
||||
# data_filename = "ACIS asset list.xlsx"
|
||||
# sheet_name = "Assets"
|
||||
# address1_column = "House No"
|
||||
# postcode_column = "Postcode"
|
||||
# landlord_property_id = "UPRN"
|
||||
# fulladdress_column = None
|
||||
# address_cols_to_concat = ["House No", "Street", "Town"]
|
||||
# missing_postcodes_method = None
|
||||
# address1_method = None
|
||||
# landlord_year_built = "YEAR BUILT"
|
||||
# landlord_os_uprn = None
|
||||
# landlord_property_type = "Property type"
|
||||
# landlord_built_form = None
|
||||
# landlord_wall_construction = "Wall Constuction"
|
||||
# landlord_roof_construction = None
|
||||
# landlord_sap = None
|
||||
# landlord_heating_system = "Heating"
|
||||
# landlord_existing_pv = None
|
||||
# outcomes_filename = "ACIS Group - 25.11.2024 - outcomes.xlsx"
|
||||
# outcomes_sheetname = "Feedback"
|
||||
# outcomes_postcode = "Postcode"
|
||||
# outcomes_address = "Address"
|
||||
# outcomes_houseno = "No"
|
||||
# outcomes_id = None
|
||||
# master_filepaths = [
|
||||
# os.path.join(data_folder, "ECO 3 -Table 1.csv"),
|
||||
# os.path.join(data_folder, "ECO 4 -Table 1.csv"),
|
||||
# ]
|
||||
# master_to_asset_list_filepath = None
|
||||
# phase = False
|
||||
# ecosurv_landlords = None
|
||||
|
||||
# For plus dane
|
||||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Plus Dane"
|
||||
data_filename = "PLUS DANE Asset List - for analysis.xlsx"
|
||||
sheet_name = "Asset List"
|
||||
address1_column = " Address"
|
||||
postcode_column = " Postcode"
|
||||
landlord_property_id = "UPRN"
|
||||
fulladdress_column = " Address"
|
||||
address_cols_to_concat = []
|
||||
missing_postcodes_method = None
|
||||
address1_method = None
|
||||
landlord_year_built = "Property Age"
|
||||
landlord_os_uprn = None
|
||||
landlord_property_type = "Property Type"
|
||||
landlord_wall_construction = "Landlord Wall Full"
|
||||
landlord_heating_system = "Landlord Heating"
|
||||
landlord_existing_pv = None
|
||||
outcomes_filename = "plus dane outcomes.xlsx"
|
||||
outcomes_sheetname = "EVERYTHING"
|
||||
outcomes_postcode = "Post Code"
|
||||
outcomes_houseno = "Numb."
|
||||
master_filepaths = [
|
||||
os.path.join(data_folder, "JJC Rolling Master.csv"),
|
||||
os.path.join(data_folder, "SCIS Rolling Master.csv"),
|
||||
]
|
||||
master_to_asset_list_filepath = os.path.join(data_folder, "surveys_to_assets.csv")
|
||||
|
||||
# Maps addresses to uprn in problematic cases
|
||||
manual_uprn_map = {}
|
||||
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ STANDARD_BUILT_FORMS = {
|
|||
# Houses
|
||||
"end-terrace", "semi-detached", "detached", "mid-terrace",
|
||||
# Flats
|
||||
"ground floor", "mid-floor", "top-floor", "basement"
|
||||
"ground floor", "mid-floor", "top-floor", "basement", "low rise", "high rise",
|
||||
}
|
||||
|
||||
BUILT_FORM_MAPPINGS = {
|
||||
|
|
@ -265,5 +265,70 @@ BUILT_FORM_MAPPINGS = {
|
|||
'FIRST FLOOR FLAT': 'ground floor',
|
||||
'GROUND FL MAISONETTE': 'ground floor',
|
||||
'HOUSE 2 LIVING ROOMS': 'unknown',
|
||||
'FLAT OVER SHOP': 'unknown'
|
||||
'FLAT OVER SHOP': 'unknown',
|
||||
|
||||
'4 Bed Detached House': 'detached',
|
||||
'2 Bed Detached House': 'detached',
|
||||
'3 Bed Detached Bungalow': 'detached',
|
||||
'1 Bed Semi-Detached House': 'semi-detached',
|
||||
'2 Bed Semi-Detached House': 'semi-detached',
|
||||
'2 Bed Detached Bungalow': 'detached',
|
||||
'1 Bed Mid Terrace Bungalow': 'mid-terrace',
|
||||
'4 Bed Semi-Detached Bungalow': 'semi-detached',
|
||||
'3 Bed Mid Terrace Bungalow': 'mid-terrace',
|
||||
'3 Bed Semi-Detached Bungalow': 'semi-detached',
|
||||
'3 Bed Mid Terrace House': 'mid-terrace',
|
||||
'2 Bed Mid Terrace House': 'mid-terrace',
|
||||
'3 Bed Detached House': 'detached',
|
||||
'2 Bed Semi-Detached Bungalow': 'semi-detached',
|
||||
'5 Bed Mid Terrace House': 'mid-terrace',
|
||||
'2 Bed Mid Terrace Bungalow': 'mid-terrace',
|
||||
'3 Bed Semi-Detached House': 'semi-detached',
|
||||
'1 Bed Semi-Detached Bungalow': 'semi-detached',
|
||||
'4 Bed Mid Terrace House': 'mid-terrace',
|
||||
'1 Bed Detached Bungalow': 'detached',
|
||||
'5 Bed Semi-Detached House': 'semi-detached',
|
||||
'6 Bed Detached House': 'detached',
|
||||
'1 Bed Mid Terrace House': 'mid-terrace',
|
||||
'4 Bed Semi-Detached House': 'semi-detached',
|
||||
'TBA': 'unknown',
|
||||
'1 Bed EOT House': 'end-terrace',
|
||||
'3 Bed Flat': 'unknown',
|
||||
'5 Bed EOT House': 'end-terrace',
|
||||
'1 Bed EOT Bungalow': 'end-terrace',
|
||||
'2 Bed EOT House': 'end-terrace',
|
||||
'1 Bed Studio Flat': 'unknown',
|
||||
'3 Bed Maison': 'unknown',
|
||||
'Commercial Letting': 'unknown',
|
||||
'4 Bed Maison': 'unknown',
|
||||
'2 Bed Flat': 'unknown',
|
||||
'3 Bed EOT House': 'end-terrace',
|
||||
'2 Bed Maison': 'unknown',
|
||||
'4 Bed EOT House': 'end-terrace',
|
||||
'1 Bed Flat': 'unknown',
|
||||
'3 Bed EOT Bungalow': 'end-terrace',
|
||||
'1 Bed Maison': 'unknown',
|
||||
'2 Bed EOT Bungalow': 'end-terrace',
|
||||
|
||||
'Bungalow detached': 'detached',
|
||||
'Bungalow semi detached': 'semi-detached',
|
||||
'Sheltered bungalow semi detached': 'semi-detached',
|
||||
'Bedsit bungalow semi detached': 'semi-detached',
|
||||
'Semi detached house': 'semi-detached',
|
||||
'Bedsit bungalow terraced': 'mid-terrace', 'Terraced house': 'mid-terrace',
|
||||
'Sheltered flat': 'unknown',
|
||||
'APD Bungalow': 'unknown',
|
||||
'Flat with partition': 'unknown',
|
||||
'APD flat': 'unknown',
|
||||
'Sheltered warden flat': 'unknown',
|
||||
'Sheltered bedsit': 'unknown',
|
||||
'Sheltered bungalow terraced': 'mid-terrace',
|
||||
'Block': 'unknown',
|
||||
'Bungalow terraced': 'mid-terrace',
|
||||
'Maisonette flat': 'unknown',
|
||||
'Sheltered bedsit disabled': 'unknown',
|
||||
'Bedsit Flat': 'unknown',
|
||||
'Low Rise': 'low rise',
|
||||
'Upper Floor': 'top-floor',
|
||||
'High Rise': 'high rise',
|
||||
}
|
||||
|
|
|
|||
|
|
@ -288,5 +288,8 @@ HEATING_MAPPINGS = {
|
|||
'No Gas Boiler': 'no heating',
|
||||
'Back Boiler': 'solid fuel',
|
||||
"This cell has an external reference that can't be shown or edited. Editing this cell will remove the external "
|
||||
"reference.": 'unknown'
|
||||
"reference.": 'unknown',
|
||||
'Communal Heating': 'communal heating',
|
||||
'No Data': 'unknown',
|
||||
'Boiler System': 'gas condensing boiler',
|
||||
}
|
||||
|
|
|
|||
|
|
@ -227,6 +227,30 @@ PROPERTY_MAPPING = {
|
|||
'FIRST FLOOR FLAT': 'flat',
|
||||
'GROUND FL MAISONETTE': 'maisonette',
|
||||
'HOUSE 2 LIVING ROOMS': 'house',
|
||||
'FLAT OVER SHOP': 'flat'
|
||||
|
||||
'FLAT OVER SHOP': 'flat',
|
||||
'House With Integral Garage': 'house',
|
||||
'Flat Over Parking/Accessway': 'flat',
|
||||
'Flat Over Binstore': 'flat',
|
||||
'Flat Over Garage': 'flat',
|
||||
'House With Independent Garage': 'house',
|
||||
'Studio': 'flat',
|
||||
'Bedsit bungalow terraced': 'bedsit',
|
||||
'Terraced house': 'house',
|
||||
'Sheltered flat': 'flat',
|
||||
'APD Bungalow': 'bungalow',
|
||||
'Flat with partition': 'flat',
|
||||
'Bungalow detached': 'bungalow',
|
||||
'APD flat': 'flat',
|
||||
'Sheltered warden flat': 'flat',
|
||||
'Bungalow semi detached': 'bungalow',
|
||||
'Sheltered bedsit': 'bedsit',
|
||||
'Sheltered bungalow terraced': 'bungalow',
|
||||
'Sheltered bungalow semi detached': 'bungalow',
|
||||
'Bungalow terraced': 'bungalow',
|
||||
'Maisonette flat': 'maisonette',
|
||||
'Sheltered bedsit disabled': 'bedsit',
|
||||
'Bedsit bungalow semi detached': 'bedsit',
|
||||
'Bedsit Flat': 'bedsit',
|
||||
'Semi detached house': 'house',
|
||||
'Unit': 'unknown'
|
||||
}
|
||||
|
|
|
|||
|
|
@ -172,7 +172,7 @@ class SearchEpc:
|
|||
|
||||
self.address1 = address1
|
||||
self.postcode = postcode
|
||||
self.full_address = full_address
|
||||
self.full_address = full_address if full_address is not None else self.address1
|
||||
self.uprn = uprn
|
||||
self.house_number = self.get_house_number(self.address1)
|
||||
self.numeric_house_number = self.extract_numeric_housenumber_part(self.house_number)
|
||||
|
|
@ -265,9 +265,7 @@ class SearchEpc:
|
|||
|
||||
for retry in range(self.max_retries):
|
||||
try:
|
||||
|
||||
response = self.client.domestic.call(method="get", url=url, params=params)
|
||||
|
||||
if response:
|
||||
self.data = response
|
||||
return {
|
||||
|
|
@ -368,8 +366,11 @@ class SearchEpc:
|
|||
unique_property_types = {r["property-type"] for r in rows}
|
||||
|
||||
# We allow for variation in property type across flats/maisonettes
|
||||
if (len(uprns) == 1) and ((len(unique_property_types) == 1) or unique_property_types == {"Flat", "Maisonette"}):
|
||||
return rows
|
||||
# If we know that we have a flat/maisonette, we allow for both property types
|
||||
if property_type in ["Flat", "Maisonette"]:
|
||||
if ((len(uprns) == 1) and ((len(unique_property_types) == 1)
|
||||
) or unique_property_types == {"Flat", "Maisonette"}):
|
||||
return rows
|
||||
|
||||
if property_type is not None:
|
||||
# We can do a filter on the property type
|
||||
|
|
@ -388,11 +389,27 @@ class SearchEpc:
|
|||
|
||||
# We check if post town is included in the address
|
||||
if any([r["posttown"].lower() in address.lower() for r in rows]):
|
||||
best_match = process.extractOne(
|
||||
best_match1 = process.extractOne(
|
||||
address, [", ".join([r["address"], r["posttown"]]) for r in rows], score_cutoff=0
|
||||
)
|
||||
# Get all of the scores
|
||||
rows_filtered = [r for r in rows if ", ".join([r["address"], r["posttown"]]) == best_match[0]]
|
||||
best_match2 = process.extractOne(
|
||||
address, [", ".join([r["address"]]) for r in rows], score_cutoff=0
|
||||
)
|
||||
# Pick the largest score
|
||||
if best_match1[1] >= best_match2[1]:
|
||||
# Get all of the scores
|
||||
rows_filtered = [r for r in rows if ", ".join([r["address"], r["posttown"]]) == best_match1[0]]
|
||||
else:
|
||||
# Get all of the scores
|
||||
rows_filtered = [r for r in rows if r["address"] == best_match2[0]]
|
||||
|
||||
# If we have multiple, we filter on newest lodgment date
|
||||
if len(rows_filtered) > 1:
|
||||
rows_filtered = [
|
||||
r for r in rows_filtered
|
||||
if r["lodgement-datetime"] == max([x["lodgement-datetime"] for x in rows_filtered])
|
||||
]
|
||||
|
||||
else:
|
||||
best_match = process.extractOne(address, [r["address"] for r in rows], score_cutoff=0)
|
||||
# Get the UPRN for the best match
|
||||
|
|
|
|||
134
etl/customers/mhs/flag_pilot.py
Normal file
134
etl/customers/mhs/flag_pilot.py
Normal file
|
|
@ -0,0 +1,134 @@
|
|||
"""
|
||||
On the standardised asset list, this script will flag the pilot assets.
|
||||
"""
|
||||
import pandas as pd
|
||||
import os
|
||||
import numpy as np
|
||||
from tqdm import tqdm
|
||||
|
||||
PILOT_PROJECT_CODE = "MHS-000-PILOT"
|
||||
MHS_PHASE_1_PROJECT_CODE = "MHS-001"
|
||||
|
||||
asset_list = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/MHS HOMES (Full Asset List) - for programme build - "
|
||||
"Standardised.xlsx",
|
||||
sheet_name="Standardised Asset List",
|
||||
)
|
||||
flat_data = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/MHS HOMES (Full Asset List) - for programme build - "
|
||||
"Standardised.xlsx",
|
||||
sheet_name="Flat Data",
|
||||
)
|
||||
|
||||
pilot = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/MHS 334 x Pilot reviewed - KB notes end column.xlsx"
|
||||
)
|
||||
ciga_checks = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/MHS CIGA Check 03042025_201.xlsx"
|
||||
)
|
||||
ciga_checks["row_id"] = ciga_checks.index
|
||||
|
||||
asset_list["project_code"] = None
|
||||
|
||||
asset_list["project_code"] = np.where(
|
||||
asset_list["landlord_property_id"].isin(pilot["Place Reference"]),
|
||||
PILOT_PROJECT_CODE,
|
||||
asset_list["project_code"],
|
||||
)
|
||||
# We now flag the next phase of the programme
|
||||
asset_list["project_code"] = np.where(
|
||||
(~pd.isnull(asset_list["cavity_reason"]) | ~pd.isnull(asset_list["solar_reason"])) & pd.isnull(
|
||||
asset_list["project_code"]),
|
||||
MHS_PHASE_1_PROJECT_CODE,
|
||||
asset_list["project_code"],
|
||||
)
|
||||
|
||||
# We now flag the CIGA checks
|
||||
manual_fixes = {
|
||||
"123 Columbine Close, Rochester": "2213861230"
|
||||
}
|
||||
ciga_lookup = []
|
||||
for _, row in tqdm(ciga_checks.iterrows(), total=len(ciga_checks)):
|
||||
|
||||
if manual_fixes.get(row["Matched Address"]):
|
||||
ll_pid = manual_fixes[row["Matched Address"]]
|
||||
df = asset_list[
|
||||
(asset_list["landlord_property_id"].astype(str) == ll_pid)
|
||||
]
|
||||
ciga_lookup.append(
|
||||
{
|
||||
"domna_property_id": df["domna_property_id"].values[0],
|
||||
"row_id": row["row_id"],
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
df = asset_list[
|
||||
(asset_list["domna_postcode"] == row["Postcode"])
|
||||
]
|
||||
|
||||
df = df[
|
||||
(df["domna_address_1"].astype(str) == str(row["Address1"]))
|
||||
]
|
||||
|
||||
if df.empty:
|
||||
df = asset_list[
|
||||
(asset_list["domna_postcode"] == row["Matched Postcode"])
|
||||
]
|
||||
df = df[(df["domna_address_1"].astype(str) == str(row["Address1"]))]
|
||||
|
||||
if df.shape[0] > 1:
|
||||
df = asset_list[
|
||||
(asset_list["domna_full_address"].str.lower().str.replace(",", "").str.contains(
|
||||
row["Matched Address"].lower().replace(",", ""), na=False))
|
||||
]
|
||||
if df.empty:
|
||||
df = asset_list[
|
||||
(asset_list["domna_full_address"].str.lower().str.replace(",", "").str.contains(
|
||||
row["Address2"].lower().replace(",", ""), na=False))
|
||||
]
|
||||
|
||||
df = df[(df["domna_address_1"].astype(str) == str(row["Address1"]))]
|
||||
|
||||
if df.shape[0] != 1:
|
||||
raise Exception("More than one match found for {row['Address1']} in the asset list")
|
||||
|
||||
ciga_lookup.append(
|
||||
{
|
||||
"domna_property_id": df["domna_property_id"].values[0],
|
||||
"row_id": row["row_id"],
|
||||
}
|
||||
)
|
||||
|
||||
ciga_lookup = pd.DataFrame(ciga_lookup)
|
||||
|
||||
ciga_lookup = ciga_lookup.merge(
|
||||
ciga_checks[["row_id", "Guarantee"]].rename(
|
||||
columns={"Guarantee": "ciga_guarantee"}
|
||||
), how="left", on="row_id"
|
||||
)
|
||||
ciga_lookup["ciga_check_complete"] = True
|
||||
|
||||
asset_list = asset_list.merge(
|
||||
ciga_lookup[["domna_property_id", "ciga_guarantee"]],
|
||||
how="left",
|
||||
on="domna_property_id"
|
||||
)
|
||||
|
||||
# Check we matched addresses correctly
|
||||
# match_check = ciga_lookup.merge(
|
||||
# ciga_checks, how="left", on="row_id"
|
||||
# ).merge(
|
||||
# asset_list[["domna_property_id", "domna_full_address"]], how="left", on="domna_property_id"
|
||||
# )
|
||||
# match_check = match_check[["Matched Address", "domna_full_address"]]
|
||||
|
||||
# Save
|
||||
|
||||
filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS/12052025 MHS Standardised Asset List - "
|
||||
"programme.xlsx")
|
||||
# Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data
|
||||
|
||||
with pd.ExcelWriter(filename) as writer:
|
||||
asset_list.to_excel(writer, sheet_name="Standardised Asset List", index=False)
|
||||
flat_data.to_excel(writer, sheet_name="Flat Data", index=False)
|
||||
Loading…
Add table
Reference in a new issue