mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
911 lines
36 KiB
Python
911 lines
36 KiB
Python
import os
|
||
import json
|
||
import pandas as pd
|
||
from pprint import pprint
|
||
import msgpack
|
||
from utils.s3 import read_from_s3
|
||
from asset_list.AssetList import AssetList
|
||
from asset_list.mappings.property_type import PROPERTY_MAPPING
|
||
from asset_list.mappings.built_form import BUILT_FORM_MAPPINGS
|
||
from asset_list.mappings.walls import WALL_CONSTRUCTION_MAPPINGS
|
||
from asset_list.mappings.heating_systems import HEATING_MAPPINGS
|
||
from asset_list.mappings.exising_pv import EXISTING_PV_MAPPINGS
|
||
from asset_list.mappings.roof import ROOF_CONSTRUCTION_MAPPINGS
|
||
from asset_list.utils import get_data
|
||
|
||
from dotenv import load_dotenv
|
||
from backend.SearchEpc import SearchEpc
|
||
|
||
load_dotenv(dotenv_path="backend/.env")
|
||
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
|
||
|
||
|
||
def extract_address1(asset_list, full_address_col, postcode_col, method="first_two_words"):
|
||
if method == "first_two_words":
|
||
asset_list["address1_extracted"] = asset_list[full_address_col].str.split(" ").str[:2].str.join(" ")
|
||
return asset_list
|
||
|
||
if method == "first_word":
|
||
asset_list["address1_extracted"] = asset_list[full_address_col].str.split(" ").str[0]
|
||
return asset_list
|
||
|
||
if method == "house_number_extraction":
|
||
asset_list["address1_extracted"] = asset_list.apply(
|
||
lambda x: SearchEpc.get_house_number(address=x[full_address_col], postcode=x[postcode_col]),
|
||
axis=1
|
||
)
|
||
return asset_list
|
||
|
||
raise ValueError(f"Method {method} not recognized")
|
||
|
||
|
||
def app():
|
||
"""
|
||
This app is EPC pulling data for some properties owned by Livewest
|
||
|
||
Data request contents:
|
||
Date of last EPC
|
||
Reason for EPC
|
||
SAP score on register
|
||
Property Type
|
||
Property Area
|
||
Property Age
|
||
Any Dimensions (HLP,PW,RH)
|
||
Property Wall Construction
|
||
Heating Type
|
||
Secondary Heating
|
||
Loft Insulation Depth
|
||
|
||
Additional if possible:
|
||
Heat loss calculations
|
||
EPC recommendations
|
||
Property UPRN
|
||
"""
|
||
|
||
# TODO:
|
||
# For cavity work:
|
||
# - Flag any entries that have a different wall type between non-intrusive data against EPC
|
||
# - Worth double checking entries that have a difference in wall construction
|
||
# - Look at anything that is flagged as an empty cavity but the EPC data says it’s a filled cavity
|
||
# - Look at the current EPC scores - Anything that is C75 or above, especially if it’s assumed no insulation
|
||
# - By postcode, we can try and deduce if all of the addresses are a flats and then estimate if 50% of the flats
|
||
# are less than C75
|
||
# - Flag anything pre SAP2012
|
||
# - Flag anything over 5 years old
|
||
# - Look at year built vs age band
|
||
#
|
||
# For Solar:
|
||
# - Discount any that have solar PV - based on non-intrusives and from the inspections team
|
||
# - In the heating, discount anything that isn’t ashp, ghsp, hhrs, electric storage - possibly homes with
|
||
# electric room heaters but it might need to be an EPC E
|
||
# - Fabric - check the floor, wall and roof:
|
||
# - Filled or empty cavity is good
|
||
# - Insulated solid/timber/system built is good
|
||
# - SCIS/CEG needs solid floors
|
||
# - JJC don’t care
|
||
# - Anything with a loft 200 or below
|
||
# - Anything C75 and above won’t qualify
|
||
# - Insulated loft = 200mm
|
||
# - We want: fully insulated property (all wall types), EPC D or below (floors should be solid)
|
||
# - Or the insulation required is loft/cavity (floors should be solid)
|
||
|
||
# Torus
|
||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Torus/Phase 1"
|
||
data_filename = "Torus Property Asset List - Phase 1.xlsx"
|
||
sheet_name = "TORUS"
|
||
postcode_column = 'Postcode'
|
||
fulladdress_column = None
|
||
address1_column = "AddressLine1"
|
||
address1_method = None
|
||
address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"]
|
||
missing_postcodes_method = None
|
||
landlord_year_built = "Property Age"
|
||
landlord_os_uprn = "NatUPRN"
|
||
landlord_property_type = "Property Type"
|
||
landlord_built_form = "Built Form"
|
||
landlord_wall_construction = "Wall Construction"
|
||
landlord_roof_construction = "Roof Construction"
|
||
landlord_heating_system = "Space Heating Source"
|
||
landlord_existing_pv = "Low Carbon Technology (Solar PV)"
|
||
landlord_property_id = "UPRN"
|
||
landlord_sap = "SAP Score"
|
||
outcomes_filename = None
|
||
outcomes_sheetname = None
|
||
outcomes_postcode = None
|
||
outcomes_houseno = None
|
||
outcomes_id = None
|
||
outcomes_address = None
|
||
master_filepaths = []
|
||
master_to_asset_list_filepath = None
|
||
phase = True
|
||
|
||
# Ealing - houses
|
||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing"
|
||
data_filename = "Ealing_rechecked_cleaned_05042025.csv"
|
||
sheet_name = None
|
||
postcode_column = 'Postcode'
|
||
fulladdress_column = "Address"
|
||
address1_column = None
|
||
address1_method = "house_number_extraction"
|
||
address_cols_to_concat = []
|
||
missing_postcodes_method = None
|
||
landlord_year_built = "Year Built"
|
||
landlord_os_uprn = None
|
||
landlord_property_type = "Property Type Code"
|
||
landlord_built_form = None
|
||
landlord_wall_construction = None
|
||
landlord_heating_system = None
|
||
landlord_existing_pv = None
|
||
landlord_property_id = "Property ref"
|
||
outcomes_filename = None
|
||
outcomes_sheetname = None
|
||
outcomes_postcode = None
|
||
outcomes_houseno = None
|
||
outcomes_id = None
|
||
outcomes_address = None
|
||
master_filepaths = []
|
||
master_to_asset_list_filepath = None
|
||
|
||
# Southern Midlands
|
||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Southern/Midlands Properties - Apr 2025"
|
||
data_filename = "Southern Housing Midlands Property List - combined.xlsx"
|
||
sheet_name = "Sheet 1"
|
||
postcode_column = 'Post Code'
|
||
fulladdress_column = "Address"
|
||
address1_column = None
|
||
address1_method = "house_number_extraction"
|
||
address_cols_to_concat = []
|
||
missing_postcodes_method = None
|
||
landlord_year_built = "Age_1"
|
||
landlord_os_uprn = None
|
||
landlord_property_type = "Prop_Type"
|
||
landlord_built_form = "Prop_Type"
|
||
landlord_wall_construction = "Walls_P"
|
||
landlord_heating_system = "Heating System"
|
||
landlord_existing_pv = None
|
||
landlord_property_id = "AssetID"
|
||
outcomes_filename = None
|
||
outcomes_sheetname = None
|
||
outcomes_postcode = None
|
||
outcomes_houseno = None
|
||
outcomes_id = None
|
||
outcomes_address = None
|
||
master_filepaths = []
|
||
master_to_asset_list_filepath = None
|
||
|
||
# Live West (2018 Asset list)
|
||
data_folder = (
|
||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March 2025/2018 Asset List"
|
||
)
|
||
data_filename = "LIVEWEST STOCK - 23rd October 2018.xlsx"
|
||
sheet_name = "Assets"
|
||
postcode_column = 'Postcode'
|
||
fulladdress_column = "Address"
|
||
address1_column = None
|
||
address1_method = "house_number_extraction"
|
||
address_cols_to_concat = []
|
||
missing_postcodes_method = None
|
||
landlord_year_built = "Build Year"
|
||
landlord_os_uprn = None
|
||
landlord_property_type = "Property Archetype"
|
||
landlord_built_form = None
|
||
landlord_wall_construction = None
|
||
landlord_heating_system = "Heating Fuel Type"
|
||
landlord_existing_pv = None
|
||
landlord_property_id = "Uprn - DO NOT DELETE"
|
||
outcomes_filename = "RT - LiveWest.xlsx"
|
||
outcomes_sheetname = "Feedback"
|
||
outcomes_postcode = "Poscode"
|
||
outcomes_houseno = "No."
|
||
outcomes_id = "UPRN"
|
||
master_filepaths = [
|
||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March 2025/Rolling Master "
|
||
"- redacted for analysis/CAVITY-Table 1.csv"
|
||
]
|
||
master_to_asset_list_filepath = None
|
||
|
||
# Live West (South West asset list)
|
||
data_folder = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March "
|
||
"2025/Livewest Asset List (Original) - csv")
|
||
data_filename = "Report-Table 1.csv"
|
||
sheet_name = None
|
||
postcode_column = 'Postcode'
|
||
fulladdress_column = "T1_Address"
|
||
address1_column = None
|
||
address1_method = "house_number_extraction"
|
||
address_cols_to_concat = []
|
||
missing_postcodes_method = None
|
||
landlord_year_built = "Build Yr"
|
||
landlord_os_uprn = None
|
||
landlord_property_type = "T1_AssetType"
|
||
landlord_built_form = "T1_AssetType"
|
||
landlord_wall_construction = "Wall Type Cavity"
|
||
landlord_heating_system = "Heating Fuel"
|
||
landlord_existing_pv = None
|
||
landlord_property_id = "T1_UPRN"
|
||
outcomes_filename = "RT - LiveWest.xlsx"
|
||
outcomes_sheetname = "Feedback"
|
||
outcomes_postcode = "Poscode"
|
||
outcomes_houseno = "No."
|
||
outcomes_id = "UPRN"
|
||
master_filepaths = [
|
||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March 2025/Rolling Master "
|
||
"- redacted for analysis/CAVITY-Table 1.csv"
|
||
]
|
||
master_to_asset_list_filepath = None
|
||
|
||
# PFP London
|
||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/London"
|
||
data_filename = "PFP AREAS SURROUNDING LONDON - JAY, RUTH & LANE.xlsx"
|
||
sheet_name = "PFP SURROUNDING LONDON"
|
||
postcode_column = 'Postcode'
|
||
fulladdress_column = None
|
||
address1_column = "AddressLine1"
|
||
address1_method = None
|
||
address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"]
|
||
missing_postcodes_method = None
|
||
landlord_year_built = None
|
||
landlord_os_uprn = None
|
||
landlord_property_type = "Archetype (PFP)"
|
||
landlord_built_form = "Archetype (PFP)"
|
||
landlord_wall_construction = None
|
||
landlord_heating_system = None
|
||
landlord_existing_pv = None
|
||
landlord_property_id = "Uprn"
|
||
outcomes_filename = None
|
||
outcomes_sheetname = None
|
||
outcomes_postcode = None
|
||
outcomes_houseno = None
|
||
outcomes_id = None
|
||
master_filepaths = []
|
||
master_to_asset_list_filepath = None
|
||
|
||
# PFP North-West
|
||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/North-West"
|
||
data_filename = "Places for People NORTH WEST - INSPECTIONS MASTER - UPDATE.xlsx"
|
||
sheet_name = "CHECKED"
|
||
postcode_column = 'Postcode'
|
||
fulladdress_column = None
|
||
address1_column = "AddressLine1"
|
||
address1_method = None
|
||
address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"]
|
||
missing_postcodes_method = None
|
||
landlord_year_built = None
|
||
landlord_os_uprn = None
|
||
landlord_property_type = "Archetype (PFP)"
|
||
landlord_built_form = "Archetype (PFP)"
|
||
landlord_wall_construction = None
|
||
landlord_heating_system = None
|
||
landlord_existing_pv = None
|
||
landlord_property_id = "Uprn"
|
||
outcomes_filename = None
|
||
outcomes_sheetname = None
|
||
outcomes_postcode = None
|
||
outcomes_houseno = None
|
||
outcomes_id = None
|
||
master_filepaths = []
|
||
master_to_asset_list_filepath = None
|
||
|
||
# PFP North-East
|
||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/North-East"
|
||
data_filename = "Places for People NORTH EAST - INSPECTIONS MASTER.xlsx"
|
||
sheet_name = "CHECKED"
|
||
postcode_column = 'Postcode'
|
||
fulladdress_column = None
|
||
address1_column = "AddressLine1"
|
||
address1_method = None
|
||
address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"]
|
||
missing_postcodes_method = None
|
||
landlord_year_built = None
|
||
landlord_os_uprn = None
|
||
landlord_property_type = "Archetype (PFP)"
|
||
landlord_built_form = "Archetype (PFP)"
|
||
landlord_wall_construction = None
|
||
landlord_heating_system = None
|
||
landlord_existing_pv = None
|
||
landlord_property_id = "Uprn"
|
||
outcomes_filename = None
|
||
outcomes_sheetname = None
|
||
outcomes_postcode = None
|
||
outcomes_houseno = None
|
||
outcomes_id = None
|
||
master_filepaths = []
|
||
master_to_asset_list_filepath = None
|
||
|
||
# PFP East
|
||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/East"
|
||
data_filename = "PFP EAST - Master - DN LN NG NR PE POSTCODES.xlsx"
|
||
sheet_name = "PFP EAST"
|
||
postcode_column = 'Postcode'
|
||
fulladdress_column = None
|
||
address1_column = "AddressLine1"
|
||
address1_method = None
|
||
address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"]
|
||
missing_postcodes_method = None
|
||
landlord_year_built = None
|
||
landlord_os_uprn = None
|
||
landlord_property_type = "Archetype (PFP)"
|
||
landlord_built_form = "Archetype (PFP)"
|
||
landlord_wall_construction = None
|
||
landlord_heating_system = None
|
||
landlord_existing_pv = None
|
||
landlord_property_id = "Uprn"
|
||
outcomes_filename = None
|
||
outcomes_sheetname = None
|
||
outcomes_postcode = None
|
||
outcomes_houseno = None
|
||
outcomes_id = None
|
||
master_filepaths = []
|
||
master_to_asset_list_filepath = None
|
||
|
||
# Wates
|
||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Wates - "
|
||
data_filename = "ECO 4 Wates.xlsx"
|
||
sheet_name = "Roadmap Homes"
|
||
postcode_column = 'Postcode'
|
||
fulladdress_column = None
|
||
address1_column = "Address Line 1"
|
||
address1_method = None
|
||
address_cols_to_concat = ["Address Line 1", "Address Line 2", "Address Line 3"]
|
||
missing_postcodes_method = None
|
||
landlord_year_built = "Build Year"
|
||
landlord_os_uprn = None
|
||
landlord_property_type = "Archetype"
|
||
landlord_built_form = "Archetype"
|
||
landlord_wall_construction = "Wall"
|
||
landlord_heating_system = "Heating Type"
|
||
landlord_existing_pv = None
|
||
landlord_property_id = "UPRN"
|
||
outcomes_filename = None
|
||
outcomes_sheetname = None
|
||
outcomes_postcode = None
|
||
outcomes_houseno = None
|
||
master_filepaths = []
|
||
master_to_asset_list_filepath = None
|
||
|
||
# Ealing
|
||
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Programme data - 04032025"
|
||
# data_filename = "Ealing BC - Property Plus Tenure 25.02.2025.xlsx"
|
||
# sheet_name = "IGNORE - FULL MAIN"
|
||
# postcode_column = 'Postcode'
|
||
# fulladdress_column = "Address"
|
||
# address1_column = None
|
||
# address1_method = "first_word"
|
||
# address_cols_to_concat = []
|
||
# missing_postcodes_method = None
|
||
# landlord_year_built = "Year Built"
|
||
# landlord_os_uprn = None
|
||
# landlord_property_type = "Property Type Code"
|
||
# landlord_wall_construction = None
|
||
# landlord_heating_system = None
|
||
# landlord_existing_pv = None
|
||
# landlord_property_id = "Property ref"
|
||
|
||
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester"
|
||
# data_filename = "Warmfront data- Colchester Borough Homes (Complete).xlsx"
|
||
# sheet_name = "Sheet1"
|
||
# postcode_column = 'Full Address.1'
|
||
# fulladdress_column = "Full Address"
|
||
# address1_column = None
|
||
# address1_method = "first_word"
|
||
# address_cols_to_concat = []
|
||
# missing_postcodes_method = None
|
||
# landlord_year_built = "Build Date"
|
||
# landlord_os_uprn = None
|
||
# landlord_property_type = "Property Type"
|
||
# landlord_wall_construction = "Wallinsul"
|
||
# landlord_heating_system = "HeatSorc"
|
||
# landlord_existing_pv = None
|
||
# landlord_property_id = "Property Reference"
|
||
# outcomes_filename = None
|
||
# outcomes_sheetname = None
|
||
# outcomes_postcode = None
|
||
# outcomes_houseno = None
|
||
# master_filepaths = []
|
||
# master_to_asset_list_filepath = None
|
||
|
||
# For Westward
|
||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Westward"
|
||
data_filename = "WESTWARD - completed list - 20.03.2025.xlsx"
|
||
sheet_name = "Sheet1"
|
||
postcode_column = "WFT EDIT Postcode"
|
||
fulladdress_column = "Address"
|
||
address1_column = None
|
||
address1_method = "house_number_extraction"
|
||
address_cols_to_concat = []
|
||
missing_postcodes_method = None
|
||
landlord_year_built = "Build date"
|
||
landlord_os_uprn = "UPRN"
|
||
landlord_property_type = "Location type"
|
||
landlord_built_form = None
|
||
landlord_wall_construction = "Wall Construction (EPC)"
|
||
landlord_heating_system = "Heat Source"
|
||
landlord_existing_pv = "PV (Y/N)"
|
||
landlord_property_id = "Place ref"
|
||
outcomes_filename = None
|
||
outcomes_sheetname = None
|
||
outcomes_postcode = None
|
||
outcomes_houseno = None
|
||
master_filepaths = []
|
||
master_to_asset_list_filepath = None
|
||
outcomes_id = None
|
||
|
||
# For ACIS - programme re-build
|
||
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/ACIS/ACIS Full Programme Review March 2025"
|
||
# data_filename = "ACIS asset list.xlsx"
|
||
# sheet_name = "Assets"
|
||
# address1_column = "House No"
|
||
# postcode_column = "Postcode"
|
||
# landlord_property_id = "UPRN"
|
||
# fulladdress_column = None
|
||
# address_cols_to_concat = ["House No", "Street", "Town"]
|
||
# missing_postcodes_method = None
|
||
# address1_method = None
|
||
# landlord_year_built = "YEAR BUILT"
|
||
# landlord_os_uprn = None
|
||
# landlord_property_type = "Property type"
|
||
# landlord_built_form = None
|
||
# landlord_wall_construction = "Wall Constuction"
|
||
# landlord_heating_system = "Heating"
|
||
# landlord_existing_pv = None
|
||
# outcomes_filename = "ACIS Group - 25.11.2024 - outcomes.xlsx"
|
||
# outcomes_sheetname = "Feedback"
|
||
# outcomes_postcode = "Postcode"
|
||
# outcomes_houseno = "No"
|
||
# master_filepaths = [
|
||
# os.path.join(data_folder, "ECO 3 -Table 1.csv"),
|
||
# os.path.join(data_folder, "ECO 4 -Table 1.csv"),
|
||
# ]
|
||
# master_to_asset_list_filepath = None
|
||
|
||
# For plus dane
|
||
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Plus Dane"
|
||
data_filename = "PLUS DANE Asset List - for analysis.xlsx"
|
||
sheet_name = "Asset List"
|
||
address1_column = " Address"
|
||
postcode_column = " Postcode"
|
||
landlord_property_id = "UPRN"
|
||
fulladdress_column = " Address"
|
||
address_cols_to_concat = []
|
||
missing_postcodes_method = None
|
||
address1_method = None
|
||
landlord_year_built = "Property Age"
|
||
landlord_os_uprn = None
|
||
landlord_property_type = "Property Type"
|
||
landlord_wall_construction = "Landlord Wall Full"
|
||
landlord_heating_system = "Landlord Heating"
|
||
landlord_existing_pv = None
|
||
outcomes_filename = "plus dane outcomes.xlsx"
|
||
outcomes_sheetname = "EVERYTHING"
|
||
outcomes_postcode = "Post Code"
|
||
outcomes_houseno = "Numb."
|
||
master_filepaths = [
|
||
os.path.join(data_folder, "JJC Rolling Master.csv"),
|
||
os.path.join(data_folder, "SCIS Rolling Master.csv"),
|
||
]
|
||
master_to_asset_list_filepath = os.path.join(data_folder, "surveys_to_assets.csv")
|
||
|
||
# Maps addresses to uprn in problematic cases
|
||
manual_uprn_map = {}
|
||
|
||
asset_list = AssetList(
|
||
local_filepath=os.path.join(data_folder, data_filename),
|
||
header=0,
|
||
sheet_name=sheet_name,
|
||
address1_colname=address1_column,
|
||
postcode_colname=postcode_column,
|
||
landlord_property_id=landlord_property_id,
|
||
full_address_colname=fulladdress_column,
|
||
full_address_cols_to_concat=address_cols_to_concat,
|
||
missing_postcodes_method=missing_postcodes_method,
|
||
address1_extraction_method=address1_method,
|
||
landlord_year_built=landlord_year_built,
|
||
landlord_uprn=landlord_os_uprn,
|
||
landlord_property_type=landlord_property_type,
|
||
landlord_built_form=landlord_built_form,
|
||
landlord_wall_construction=landlord_wall_construction,
|
||
landlord_roof_construction=landlord_roof_construction,
|
||
landlord_heating_system=landlord_heating_system,
|
||
landlord_existing_pv=landlord_existing_pv,
|
||
landlord_sap=landlord_sap,
|
||
phase=phase
|
||
)
|
||
asset_list.init_standardise()
|
||
|
||
# We produce the new maps, which can be saved for future useage
|
||
new_property_type_map = {
|
||
k: v for k, v in (
|
||
asset_list.variable_mappings[asset_list.landlord_property_type] if
|
||
asset_list.landlord_property_type else {}
|
||
).items()
|
||
if k not in PROPERTY_MAPPING
|
||
}
|
||
new_built_form_map = {
|
||
k: v for k, v in (
|
||
asset_list.variable_mappings[asset_list.landlord_built_form] if
|
||
asset_list.landlord_built_form else {}
|
||
).items()
|
||
if k not in BUILT_FORM_MAPPINGS
|
||
}
|
||
new_wall_map = {
|
||
k: v for k, v in (
|
||
asset_list.variable_mappings[asset_list.landlord_wall_construction] if
|
||
asset_list.landlord_wall_construction else {}
|
||
).items()
|
||
if k not in WALL_CONSTRUCTION_MAPPINGS
|
||
}
|
||
new_heating_map = {
|
||
k: v for k, v in (
|
||
asset_list.variable_mappings[asset_list.landlord_heating_system] if
|
||
asset_list.landlord_heating_system else {}
|
||
).items()
|
||
if k not in HEATING_MAPPINGS
|
||
}
|
||
new_existing_pv_map = {
|
||
k: v for k, v in (
|
||
asset_list.variable_mappings[asset_list.landlord_existing_pv] if asset_list.landlord_existing_pv else {}
|
||
).items()
|
||
if k not in EXISTING_PV_MAPPINGS
|
||
}
|
||
new_roof_construction_map = {
|
||
k: v for k, v in (
|
||
asset_list.variable_mappings[asset_list.landlord_roof_construction] if
|
||
asset_list.landlord_roof_construction else {}
|
||
).items()
|
||
if k not in ROOF_CONSTRUCTION_MAPPINGS
|
||
}
|
||
|
||
asset_list.apply_standardiation()
|
||
|
||
# We now flag properties that have been treated under existing programmes
|
||
asset_list.flag_outcomes(
|
||
outcomes_filepath=os.path.join(data_folder, outcomes_filename) if outcomes_filename else None,
|
||
outcomes_sheetname=outcomes_sheetname,
|
||
outcomes_address=outcomes_address,
|
||
outcomes_postcode=outcomes_postcode,
|
||
outcomes_houseno=outcomes_houseno,
|
||
outcomes_id=outcomes_id
|
||
)
|
||
|
||
asset_list.flag_survey_master(
|
||
master_filepaths=master_filepaths,
|
||
master_to_asset_list_filepath=master_to_asset_list_filepath
|
||
)
|
||
|
||
### We retrieve the EPC data
|
||
|
||
# We chunk up this data into 5000 rows at a time
|
||
# Create the chunks directory
|
||
epc_api_only = False
|
||
force_retrieve_data = False
|
||
skip = None # Used to skip already completed chunks
|
||
chunk_size = 1000
|
||
filename = "Chunk {i}.csv"
|
||
download_folder = os.path.join(data_folder, "Chunks")
|
||
if not os.path.exists(download_folder):
|
||
os.makedirs(download_folder)
|
||
|
||
chunk_indexes = list(range(0, len(asset_list.standardised_asset_list), chunk_size))
|
||
downloaded_files = {filename.format(i=i) for i in chunk_indexes}
|
||
|
||
# We check if we have files associated to these files already and if we do, and we do not want to force the
|
||
# fetching of the data, we skip
|
||
folder_contents = os.listdir(download_folder)
|
||
if all(x in folder_contents for x in downloaded_files):
|
||
skip = max(chunk_indexes)
|
||
|
||
if any(x in folder_contents for x in downloaded_files):
|
||
skip = max([i for i in chunk_indexes if filename.format(i=i) in folder_contents])
|
||
|
||
for i in range(0, len(asset_list.standardised_asset_list), chunk_size):
|
||
print(f"Processing chunk {i} to {i + chunk_size}")
|
||
if skip is not None and not force_retrieve_data:
|
||
if i <= skip:
|
||
continue
|
||
chunk = asset_list.standardised_asset_list[i:i + chunk_size]
|
||
epc_data_chunk, errors_chunk, no_epc_chunk = get_data(
|
||
df=chunk,
|
||
row_id_name=asset_list.DOMNA_PROPERTY_ID,
|
||
uprn_column=AssetList.STANDARD_UPRN,
|
||
fulladdress_column=AssetList.STANDARD_FULL_ADDRESS,
|
||
address1_column=AssetList.STANDARD_ADDRESS_1,
|
||
postcode_column=AssetList.STANDARD_POSTCODE,
|
||
property_type_column=AssetList.STANDARD_PROPERTY_TYPE,
|
||
built_form_column=AssetList.STANDARD_BUILT_FORM,
|
||
manual_uprn_map=manual_uprn_map,
|
||
epc_api_only=epc_api_only,
|
||
epc_auth_token=EPC_AUTH_TOKEN
|
||
)
|
||
|
||
# We now retrieve any failed properties
|
||
chunk_failed = chunk[chunk[asset_list.DOMNA_PROPERTY_ID].isin(errors_chunk)]
|
||
epc_data_failed, _, _ = get_data(
|
||
df=chunk_failed,
|
||
row_id_name=asset_list.DOMNA_PROPERTY_ID,
|
||
uprn_column=AssetList.STANDARD_UPRN,
|
||
fulladdress_column=AssetList.STANDARD_FULL_ADDRESS,
|
||
address1_column=AssetList.STANDARD_ADDRESS_1,
|
||
postcode_column=AssetList.STANDARD_POSTCODE,
|
||
property_type_column=AssetList.STANDARD_PROPERTY_TYPE,
|
||
built_form_column=AssetList.STANDARD_BUILT_FORM,
|
||
manual_uprn_map=manual_uprn_map,
|
||
epc_api_only=epc_api_only,
|
||
epc_auth_token=EPC_AUTH_TOKEN
|
||
)
|
||
|
||
epc_data_chunk.extend(epc_data_failed)
|
||
|
||
# Append the failed data to the main data
|
||
# Store the chunk locally as a csv
|
||
pd.DataFrame(epc_data_chunk).to_csv(os.path.join(data_folder, f"Chunks/Chunk {i}.csv"), index=False)
|
||
# Store the errors and no-data locally
|
||
with open(os.path.join(data_folder, f"Chunks/Chunk {i} errors.json"), "w") as f:
|
||
json.dump(errors_chunk, f)
|
||
|
||
with open(os.path.join(data_folder, f"Chunks/Chunk {i} nodata.csv"), "w") as f:
|
||
json.dump(no_epc_chunk, f)
|
||
|
||
# We read in and concatenate the created created chunks
|
||
# List the contents
|
||
epc_data = []
|
||
for file in downloaded_files:
|
||
csv_data = pd.read_csv(os.path.join(download_folder, file))
|
||
# We need to convert the recommendations back to a list
|
||
csv_data["recommendations"] = csv_data["recommendations"].apply(eval)
|
||
# We don't have this if we didn't run the pulling from find my epc
|
||
if "find_my_epc_data" in csv_data.columns:
|
||
csv_data["find_my_epc_data"] = csv_data["find_my_epc_data"].apply(eval)
|
||
epc_data.append(csv_data)
|
||
|
||
epc_df = pd.concat(epc_data)
|
||
epc_df["estimated"] = epc_df["estimated"].fillna(False)
|
||
|
||
# We expand out the recommendations
|
||
recommendations_df = epc_df[[asset_list.DOMNA_PROPERTY_ID, "recommendations"]]
|
||
|
||
unique_recommendations = set()
|
||
for _, row in recommendations_df.iterrows():
|
||
unique_recommendations.update([rec["improvement-summary-text"] for rec in row["recommendations"]])
|
||
|
||
columns = [asset_list.DOMNA_PROPERTY_ID] + list(unique_recommendations)
|
||
transformed_data = []
|
||
for _, row in recommendations_df.iterrows():
|
||
# Initialize a dictionary for this row with False for all recommendations
|
||
row_data = {col: False for col in columns}
|
||
row_data[asset_list.DOMNA_PROPERTY_ID] = row[asset_list.DOMNA_PROPERTY_ID]
|
||
|
||
# Set True for each recommendation present in this row
|
||
for rec in row["recommendations"]:
|
||
recommendation_text = rec["improvement-summary-text"]
|
||
row_data[recommendation_text] = True
|
||
|
||
# Append the row data to transformed_data
|
||
transformed_data.append(row_data)
|
||
|
||
transformed_df = pd.DataFrame(transformed_data)
|
||
transformed_df = transformed_df[
|
||
[
|
||
asset_list.DOMNA_PROPERTY_ID, "Floor insulation (solid floor)",
|
||
"Floor insulation", "Floor insulation (suspended floor)"
|
||
]
|
||
]
|
||
|
||
transformed_df["epc_has_floor_recommendation"] = (
|
||
transformed_df["Floor insulation (solid floor)"] | transformed_df["Floor insulation"] |
|
||
transformed_df["Floor insulation (suspended floor)"]
|
||
)
|
||
|
||
# Get the find my epc data
|
||
if "find_my_epc_data" not in epc_df.columns:
|
||
epc_df["find_my_epc_data"] = None
|
||
|
||
find_my_epc_data = []
|
||
for _, x in epc_df.iterrows():
|
||
if x["find_my_epc_data"]:
|
||
find_my_epc_data.append(
|
||
{
|
||
asset_list.DOMNA_PROPERTY_ID: x[asset_list.DOMNA_PROPERTY_ID],
|
||
**x["find_my_epc_data"]
|
||
}
|
||
)
|
||
else:
|
||
find_my_epc_data.append(
|
||
{
|
||
asset_list.DOMNA_PROPERTY_ID: x[asset_list.DOMNA_PROPERTY_ID]
|
||
}
|
||
)
|
||
|
||
find_my_epc_data = pd.DataFrame(find_my_epc_data)
|
||
|
||
find_my_epc_data = find_my_epc_data.merge(
|
||
transformed_df[[asset_list.DOMNA_PROPERTY_ID, "epc_has_floor_recommendation"]],
|
||
how="left", on=asset_list.DOMNA_PROPERTY_ID
|
||
)
|
||
|
||
# We check if we get the solar pv column:
|
||
if "Solar photovoltaics" not in find_my_epc_data.columns:
|
||
find_my_epc_data["Solar photovoltaics"] = False
|
||
|
||
# Retrieve just the data we need
|
||
epc_df = epc_df[
|
||
[asset_list.DOMNA_PROPERTY_ID] + list(asset_list.EPC_API_DATA_NAMES.keys())
|
||
].rename(
|
||
columns=asset_list.EPC_API_DATA_NAMES
|
||
)
|
||
|
||
# Look for columns not in the find my EPC data, which will have happened if we didn't
|
||
# retrieve it in the first place
|
||
missed_find_epc_cols = [c for c in list(asset_list.FIND_EPC_DATA_NAMES.keys()) if c not in find_my_epc_data.columns]
|
||
if missed_find_epc_cols:
|
||
for c in missed_find_epc_cols:
|
||
find_my_epc_data[c] = None
|
||
|
||
epc_df = epc_df.merge(
|
||
find_my_epc_data[
|
||
[asset_list.DOMNA_PROPERTY_ID, "epc_has_floor_recommendation"] + list(asset_list.FIND_EPC_DATA_NAMES.keys())
|
||
]
|
||
.rename(columns=asset_list.FIND_EPC_DATA_NAMES),
|
||
how="left",
|
||
on=asset_list.DOMNA_PROPERTY_ID
|
||
)
|
||
|
||
asset_list.merge_data(epc_df)
|
||
|
||
asset_list.extract_attributes()
|
||
|
||
cleaned = read_from_s3(
|
||
s3_file_name="cleaned_epc_data/cleaned.bson",
|
||
bucket_name="retrofit-data-dev"
|
||
)
|
||
cleaned = msgpack.unpackb(cleaned, raw=False)
|
||
|
||
asset_list.identify_worktypes(cleaned)
|
||
|
||
pprint(asset_list.work_type_figures)
|
||
|
||
asset_list.flat_analysis()
|
||
|
||
################################################################
|
||
# WESTWARD - comparison between Kieran's method & automated
|
||
################################################################
|
||
|
||
# Check 1)
|
||
cavity_fills = pd.read_excel(
|
||
os.path.join(data_folder, "WESTWARD - Route March Prep.xlsx"),
|
||
sheet_name="Straight Fill"
|
||
)
|
||
cavity_fills = cavity_fills.merge(
|
||
asset_list.standardised_asset_list[
|
||
[asset_list.STANDARD_LANDLORD_PROPERTY_ID, "cavity_reason"]
|
||
],
|
||
how="left",
|
||
left_on=asset_list.landlord_property_id,
|
||
right_on=asset_list.STANDARD_LANDLORD_PROPERTY_ID
|
||
)
|
||
cavity_fills["cavity_reason"] = cavity_fills["cavity_reason"].fillna("Not identified")
|
||
print(cavity_fills["cavity_reason"].value_counts())
|
||
# Didn't identify 3 properties because they're bedsits
|
||
# 4 properties were identified, not based on the non-intrusives but instead because
|
||
# Westward said they were built in 2003/2007. Have adjusted this to use the age from the
|
||
# epc as well, as EPC says 1975 and they look like 1975 properties
|
||
# 37 properties flagged as already having solar - these are all because the landlord said they have solar
|
||
# e.g.
|
||
# https://earth.google.com/web/search/11+Winsland+Avenue+TOTNES+TQ9+5FT/@50.43354465,-3.71318276,46.57468503a,
|
||
# 59.14004365d,35y,0h,0t,
|
||
# 0r/data=CpABGmISXAolMHg0ODZkMWQxOGE4NWRiZjdkOjB4YjBhM2E5M2Q3YWVlMWEwYhlZYgp7fzdJQCHFfC9027QNwCohMTEgV2luc2xhbmQgQXZlbnVlIFRPVE5FUyBUUTkgNUZUGAIgASImCiQJbxsQEoo3SUARXQcp_HE3SUAZBmiZGJ6yDcAhCA0fqq63DcBCAggBOgMKATBCAggASg0I____________ARAA
|
||
# https://earth.google.com/web/search/15+St+Anne%27s+Ct,+Newton+Abbot+TQ12+1TL/@50.53068337,-3.61611128,
|
||
# 11.74908956a,135.73212429d,35y,0h,0t,
|
||
# 0r/data=CpUBGmcSYQolMHg0ODZkMDVkMjFhODhjZjgxOjB4MjBmMzE2Zjc3MGI2NGMwYxlCxHLw8UNJQCFZqyzALe4MwComMTUgU3QgQW5uZSdzIEN0LCBOZXd0b24gQWJib3QgVFExMiAxVEwYAiABIiYKJAm-r6U2iDdJQBHS5ICRdDdJQBmYGVpmiLINwCG8wcrtqbYNwEICCAE6AwoBMEICCABKDQj___________8BEAA
|
||
|
||
# Check 2)
|
||
cavity_fills_with_solar = pd.read_excel(
|
||
os.path.join(data_folder, "WESTWARD - Route March Prep.xlsx"),
|
||
sheet_name="Solar PV - Straight Fill"
|
||
)
|
||
cavity_fills_with_solar = cavity_fills_with_solar.merge(
|
||
asset_list.standardised_asset_list[
|
||
[asset_list.STANDARD_LANDLORD_PROPERTY_ID, "cavity_reason"]
|
||
],
|
||
how="left",
|
||
left_on=asset_list.landlord_property_id,
|
||
right_on=asset_list.STANDARD_LANDLORD_PROPERTY_ID
|
||
)
|
||
cavity_fills_with_solar["cavity_reason"] = cavity_fills_with_solar["cavity_reason"].fillna("Not identified")
|
||
print(cavity_fills_with_solar["cavity_reason"].value_counts())
|
||
# 203 properties total
|
||
# 140 properties were flagged up based on non-intrusives (Non-Intrusive Data Showed Empty Cavity)
|
||
# 63 property already has solar
|
||
|
||
# Check 3) RDF
|
||
rdf = pd.read_excel(
|
||
os.path.join(data_folder, "WESTWARD - Route March Prep.xlsx"),
|
||
sheet_name="RDF CIGA checks"
|
||
)
|
||
rdf = rdf.merge(
|
||
asset_list.standardised_asset_list[
|
||
[asset_list.STANDARD_LANDLORD_PROPERTY_ID, "cavity_reason", "solar_reason"]
|
||
],
|
||
how="left",
|
||
left_on=asset_list.landlord_property_id,
|
||
right_on=asset_list.STANDARD_LANDLORD_PROPERTY_ID
|
||
)
|
||
rdf["cavity_reason"] = rdf["cavity_reason"].fillna("Not identified")
|
||
print(rdf["cavity_reason"].value_counts())
|
||
# 264 properties are not identified, 261 of which are due to the fact they contain materials
|
||
# The other 3 were determined to be eligible for solar instead
|
||
# Many of these units that were identified for rdf works could be solar jobs
|
||
|
||
rdf_with_solar = pd.read_excel(
|
||
os.path.join(data_folder, "WESTWARD - Route March Prep.xlsx"),
|
||
sheet_name="Solar PV - RDF CIGA Checks"
|
||
)
|
||
rdf_with_solar = rdf_with_solar.merge(
|
||
asset_list.standardised_asset_list[
|
||
[asset_list.STANDARD_LANDLORD_PROPERTY_ID, "cavity_reason", "solar_reason"]
|
||
],
|
||
how="left",
|
||
left_on=asset_list.landlord_property_id,
|
||
right_on=asset_list.STANDARD_LANDLORD_PROPERTY_ID
|
||
)
|
||
rdf_with_solar["cavity_reason"] = rdf_with_solar["cavity_reason"].fillna("Not identified")
|
||
rdf_with_solar["cavity_reason"].value_counts()
|
||
|
||
# All others identified - some flagged as empties due to EPC or landlord data suggesting as much
|
||
# 5 not identified due to containing COMPACTED BEAD
|
||
|
||
asset_list.standardised_asset_list = asset_list.standardised_asset_list[
|
||
asset_list.standardised_asset_list[asset_list.landlord_property_id]
|
||
]
|
||
|
||
asset_list.load_contact_details(
|
||
local_filepath=os.path.join(data_folder, "Full property list wth D&V report V look up 12.2.25.xlsx"),
|
||
sheet_name="Report 1",
|
||
landlord_property_id=asset_list.landlord_property_id,
|
||
phone_number_column='Property Current Tel. Number',
|
||
fullname_column='Proeprty Current Occupant',
|
||
firstname_column=None,
|
||
lastname_column=None,
|
||
email_column=None, # TODO - we need this
|
||
)
|
||
|
||
# Convert to a format suitable for CRM
|
||
# TODO: TEMP
|
||
assigned_surveyors = pd.DataFrame(
|
||
[
|
||
{
|
||
asset_list.landlord_property_id: "02610001",
|
||
"week_commencing": "10/10/2025",
|
||
"surveyor_name": "Khalim Conn-Kowlessar",
|
||
"surveyor_email": "khalim@domna.homes",
|
||
}
|
||
]
|
||
)
|
||
|
||
# TODO: Sort the output by postcode
|
||
|
||
company_domain = "ealing.gov.uk"
|
||
crm_pipeline_name = "Survey Management"
|
||
first_dealstage = "READY TO BEGIN SCHEDULING"
|
||
# TODO - temp, upload to either SharePoint or AWS
|
||
|
||
asset_list.prepare_for_crm(
|
||
assigned_surveyors=assigned_surveyors,
|
||
company_domain=company_domain,
|
||
crm_pipeline_name=crm_pipeline_name,
|
||
first_dealstage=first_dealstage
|
||
)
|
||
hubspot_data = asset_list.hubspot_data
|
||
|
||
# Store as an excel
|
||
filename = os.path.join(data_folder, ".".join(data_filename.split(".")[:-1])) + " - Standardised.xlsx"
|
||
# Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data
|
||
|
||
with pd.ExcelWriter(filename) as writer:
|
||
asset_list.standardised_asset_list.to_excel(writer, sheet_name="Standardised Asset List", index=False)
|
||
asset_list.flat_data.to_excel(writer, sheet_name="Flat Data", index=False)
|
||
# If we have outcomes, we add a tab with the outcomes
|
||
if not asset_list.outcomes_for_output.empty:
|
||
asset_list.outcomes_for_output.to_excel(writer, sheet_name="Outcomes", index=False)
|
||
|
||
# Store the Hubspot export as a csv
|
||
hubspot_data.to_csv(os.path.join(data_folder, "Hubspot Export.csv"), index=False)
|