Merge pull request #444 from Hestia-Homes/remote-assessment-api

Remote assessment api
This commit is contained in:
KhalimCK 2025-06-22 15:37:56 +01:00 committed by GitHub
commit b81e2a4eba
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
24 changed files with 2567 additions and 451 deletions

2
.idea/Model.iml generated
View file

@ -7,7 +7,7 @@
<sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
</content>
<orderEntry type="jdk" jdkName="Fastapi-backend" jdkType="Python SDK" />
<orderEntry type="jdk" jdkName="AssetList" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

2
.idea/misc.xml generated
View file

@ -3,7 +3,7 @@
<component name="Black">
<option name="sdkName" value="Python 3.10 (backend)" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Fastapi-backend" project-jdk-type="Python SDK" />
<component name="ProjectRootManager" version="2" project-jdk-name="AssetList" project-jdk-type="Python SDK" />
<component name="PyCharmProfessionalAdvertiser">
<option name="shown" value="true" />
</component>

File diff suppressed because it is too large Load diff

View file

@ -2,8 +2,6 @@ import os
import json
import pandas as pd
from pprint import pprint
import msgpack
from utils.s3 import read_from_s3
from asset_list.AssetList import AssetList
from asset_list.mappings.property_type import PROPERTY_MAPPING
from asset_list.mappings.built_form import BUILT_FORM_MAPPINGS
@ -62,98 +60,227 @@ def app():
Property UPRN
"""
# Thurrock
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thurrock"
data_filename = "THURROCK COUNCIL - For analysis.xlsx"
sheet_name = "Assets"
# NCHA
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/NCHA"
data_filename = "Energy Information MASTER June 2025.xlsx"
sheet_name = "Data"
postcode_column = 'Postcode'
fulladdress_column = "Full Address"
fulladdress_column = "Address"
address1_column = None
address1_method = "house_number_extraction"
address_cols_to_concat = []
missing_postcodes_method = None
landlord_year_built = "Construction Date"
landlord_year_built = "Build Date (HAR10)"
landlord_os_uprn = None
landlord_property_type = "Property Type"
landlord_built_form = "Property Subtype"
landlord_wall_construction = None
landlord_property_type = "Property Type (HAR10)"
landlord_built_form = "Build Form (EPC)"
landlord_wall_construction = "Wall Description"
landlord_roof_construction = None
landlord_heating_system = "Main Heating Type"
landlord_heating_system = "HEAT Code"
landlord_existing_pv = None
landlord_property_id = "Property Reference"
landlord_sap = None
outcomes_filename = []
outcomes_sheetname = []
outcomes_postcode = []
outcomes_houseno = []
outcomes_id = []
outcomes_address = []
landlord_property_id = "Place ref"
landlord_sap = "EPC SAP"
outcomes_filename = None
outcomes_sheetname = None
outcomes_postcode = None
outcomes_houseno = None
outcomes_id = None
outcomes_address = None
master_filepaths = []
master_to_asset_list_filepath = None
phase = False
ecosurv_landlords = None
asset_list_header = 0
landlord_block_reference = None
master_id_colnames = []
# Medway
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Medway"
data_filename = "MEDWAY Asset List.xlsx"
sheet_name = "Asset list"
postcode_column = 'Postcode'
fulladdress_column = None
address1_column = "House Number"
address1_method = None
address_cols_to_concat = ["House Number", "Street 1"]
missing_postcodes_method = None
landlord_year_built = "Year Built"
landlord_os_uprn = None
landlord_property_type = "Property Type - Academy"
landlord_built_form = "Property Type - Academy"
landlord_wall_construction = None
landlord_roof_construction = None
landlord_heating_system = None
landlord_existing_pv = None
landlord_property_id = "Row ID"
landlord_sap = None
outcomes_filename = []
outcomes_sheetname = []
outcomes_postcode = []
outcomes_houseno = []
outcomes_id = []
outcomes_address = []
master_filepaths = []
master_to_asset_list_filepath = None
phase = False
ecosurv_landlords = None
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Calico"
# data_filename = "07.04 CALICO - Final List.xlsx"
# asset_list_header = 2
# sheet_name = "Final List"
# postcode_column = 'Postcode'
# fulladdress_column = None
# address1_column = "Property Number / Name"
# address1_method = None
# address_cols_to_concat = [
# "Property Number / Name",
# "Street",
# "Town"
# ]
# missing_postcodes_method = None
# landlord_year_built = "NROSH Estimated Build Date"
# landlord_os_uprn = None
# landlord_property_type = "Asset Type"
# landlord_built_form = None
# landlord_wall_construction = "Wall Type"
# landlord_heating_system = "Boiler Type"
# landlord_existing_pv = None
# landlord_property_id = "Asset Reference"
# outcomes_filename = []
# outcomes_sheetname = []
# outcomes_postcode = []
# outcomes_houseno = []
# outcomes_id = []
# outcomes_address = []
# master_filepaths = []
# master_id_colnames = []
# master_to_asset_list_filepath = None
# landlord_roof_construction = None
# landlord_block_reference = None
# landlord_sap = "Current Efficiency Rating - Score"
# phase = None
# ecosurv_landlords = None
# MHS
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MHS"
data_filename = "MHS HOMES (Full Asset List) - for programme build.xlsx"
sheet_name = "Sheet1"
postcode_column = 'Postcode'
fulladdress_column = "FullAddress"
address1_column = None
address1_method = "house_number_extraction"
address_cols_to_concat = []
missing_postcodes_method = None
landlord_year_built = "BuiltInYear"
landlord_os_uprn = None
landlord_property_type = "AssetType"
landlord_built_form = "PropertyType"
landlord_wall_construction = None
landlord_roof_construction = None
landlord_heating_system = None
landlord_existing_pv = None
landlord_property_id = "UPRN"
landlord_sap = None
outcomes_filename = []
outcomes_sheetname = []
outcomes_postcode = []
outcomes_houseno = []
outcomes_id = []
outcomes_address = []
master_filepaths = []
master_to_asset_list_filepath = None
phase = False
ecosurv_landlords = None
# data_folder = (
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March 2025/2018 Asset
# List"
# )
# data_filename = "LIVEWEST STOCK - 23rd October 2018.xlsx"
# sheet_name = "Assets"
# postcode_column = 'Postcode'
# fulladdress_column = "Address"
# address1_column = None
# address1_method = "house_number_extraction"
# address_cols_to_concat = []
# missing_postcodes_method = None
# landlord_year_built = "Build Year"
# landlord_os_uprn = None
# landlord_property_type = "Property Archetype"
# landlord_built_form = None
# landlord_wall_construction = None
# landlord_heating_system = "Heating Fuel Type"
# landlord_existing_pv = None
# landlord_property_id = "Uprn - DO NOT DELETE"
# outcomes_filename = [
# os.path.join(data_folder, "RT - LiveWest.xlsx")
# ]
# outcomes_sheetname = ["Feedback"]
# outcomes_postcode = ["Poscode"]
# outcomes_houseno = ["No."]
# outcomes_id = ["UPRN"]
# outcomes_address = ["Address"]
# master_filepaths = [
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March 2025/Rolling
# Master "
# "- redacted for analysis/CAVITY-Table 1.csv"
# ]
# master_id_colnames = [None]
# master_to_asset_list_filepath = None
# landlord_roof_construction = None
# landlord_block_reference = None
# landlord_sap = None
# phase = None
# ecosurv_landlords = "livewest|live west"
# data_folder = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March "
# "2025/Livewest Asset List (Original) - csv")
# data_filename = "Report-Table 1.csv"
# sheet_name = None
# postcode_column = 'Postcode'
# fulladdress_column = "T1_Address"
# address1_column = None
# address1_method = "house_number_extraction"
# address_cols_to_concat = []
# missing_postcodes_method = None
# landlord_year_built = "Build Yr"
# landlord_os_uprn = None
# landlord_property_type = "T1_AssetType"
# landlord_built_form = "T1_AssetType"
# landlord_wall_construction = "Wall Type Cavity"
# landlord_heating_system = "Heating Fuel"
# landlord_existing_pv = None
# landlord_property_id = "T1_UPRN"
# outcomes_filename = [
# os.path.join(data_folder, "RT - LiveWest.xlsx")
# ]
# outcomes_address = ["Address"]
# outcomes_sheetname = ["Feedback"]
# outcomes_postcode = ["Poscode"]
# outcomes_houseno = ["No."]
# outcomes_id = ["UPRN"]
# master_filepaths = [
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March 2025/Rolling
# Master "
# "- redacted for analysis/CAVITY-Table 1.csv"
# ]
# master_id_colnames = [None]
# master_to_asset_list_filepath = None
# landlord_roof_construction = None
# landlord_block_reference = None
# landlord_sap = None
# phase = None
# ecosurv_landlords = "livewest|live west"
# Stori
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Storicymru"
# data_filename = "Asset list - for analysis.xlsx"
# sheet_name = "SAP and Costs Calculations"
# postcode_column = 'Postcode'
# fulladdress_column = "Address1"
# address1_column = None
# address1_method = "house_number_extraction"
# address_cols_to_concat = []
# missing_postcodes_method = None
# landlord_year_built = "Age"
# landlord_os_uprn = None
# landlord_property_type = "TYPE"
# landlord_built_form = "AGE / DETACHMENT"
# landlord_wall_construction = "WALL"
# landlord_roof_construction = "LOFT INSULATION"
# landlord_heating_system = "BOILER"
# landlord_existing_pv = "SOLAR PV"
# landlord_property_id = "UPRN"
# landlord_sap = "Current SAP Rating"
# landlord_block_reference = None
# outcomes_filename = []
# outcomes_sheetname = []
# outcomes_postcode = []
# outcomes_houseno = []
# outcomes_id = []
# outcomes_address = []
# master_filepaths = []
# master_to_asset_list_filepath = None
# master_id_colnames = []
# phase = False
# ecosurv_landlords = None
# Thrive - reconciliation
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation"
# data_filename = "Thrive Asset List - Complete - Updated May 2025.xlsx"
# sheet_name = "Sheet1"
# postcode_column = 'postcode'
# fulladdress_column = "full_address"
# address1_column = "address_line_1"
# address1_method = None
# address_cols_to_concat = []
# missing_postcodes_method = None
# landlord_year_built = "age_band_calculated"
# landlord_os_uprn = None
# landlord_property_type = "property_type"
# landlord_built_form = "build_form"
# landlord_wall_construction = None
# landlord_roof_construction = "assumed_loft_insulation_thickness_updated"
# landlord_heating_system = "heating_type_updated"
# landlord_existing_pv = None
# landlord_property_id = "thrive_property_id"
# landlord_sap = "sap_rating_updated"
# landlord_block_reference = "block_reference"
# outcomes_filename = [
# os.path.join(data_folder, "Thrive - Outcomes - April 24-March25 - Corrected.xlsx")
# ]
# outcomes_sheetname = ["Sheet1"]
# outcomes_postcode = ["postcode"]
# outcomes_houseno = ["No."]
# outcomes_id = ["thrive_property_id"]
# outcomes_address = ["address"]
# master_filepaths = [
# os.path.join(data_folder, "Thrive Submissions ECO3 - with IDS.csv"),
# os.path.join(data_folder, "Thrive Submissions ECO4 - with IDS.csv"),
# ]
# master_to_asset_list_filepath = None
# master_id_colnames = ["thrive_property_id", "thrive_property_id"]
# phase = False
# ecosurv_landlords = "thrive"
# Southern Midlands
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Southern/Midlands Properties - Apr 2025"
@ -182,40 +309,12 @@ def app():
# master_filepaths = []
# master_to_asset_list_filepath = None
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/North-West"
data_filename = "Places for People NORTH WEST - INSPECTIONS MASTER - UPDATE.xlsx"
sheet_name = "CHECKED"
postcode_column = 'Postcode'
fulladdress_column = None
address1_column = "AddressLine1"
address1_method = None
address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"]
missing_postcodes_method = None
landlord_year_built = None
landlord_os_uprn = None
landlord_property_type = "Archetype (PFP)"
landlord_built_form = "Archetype (PFP)"
landlord_wall_construction = None
landlord_roof_construction = None
landlord_heating_system = None
landlord_existing_pv = None
landlord_property_id = "Uprn"
outcomes_filename = None
outcomes_sheetname = None
outcomes_postcode = None
outcomes_houseno = None
outcomes_id = None
master_filepaths = []
master_to_asset_list_filepath = None
landlord_sap = None
phase = None
# Maps addresses to uprn in problematic cases
manual_uprn_map = {}
asset_list = AssetList(
local_filepath=os.path.join(data_folder, data_filename),
header=0,
header=asset_list_header,
sheet_name=sheet_name,
address1_colname=address1_column,
postcode_colname=postcode_column,
@ -233,6 +332,7 @@ def app():
landlord_heating_system=landlord_heating_system,
landlord_existing_pv=landlord_existing_pv,
landlord_sap=landlord_sap,
landlord_block_reference=landlord_block_reference,
phase=phase
)
asset_list.init_standardise()
@ -294,7 +394,8 @@ def app():
asset_list.flag_survey_master(
master_filepaths=master_filepaths,
master_to_asset_list_filepath=master_to_asset_list_filepath
master_to_asset_list_filepath=master_to_asset_list_filepath,
master_id_colnames=master_id_colnames,
)
asset_list.flag_ecosurv(ecosurv_landlords)
@ -306,7 +407,7 @@ def app():
epc_api_only = False
force_retrieve_data = False
skip = None # Used to skip already completed chunks
chunk_size = 5000
chunk_size = 2000
filename = "Chunk {i}.csv"
download_folder = os.path.join(data_folder, "Chunks")
if not os.path.exists(download_folder):
@ -486,59 +587,12 @@ def app():
)
asset_list.merge_data(epc_df)
asset_list.extract_attributes()
asset_list.identify_worktypes()
cleaned = read_from_s3(
s3_file_name="cleaned_epc_data/cleaned.bson",
bucket_name="retrofit-data-dev"
)
cleaned = msgpack.unpackb(cleaned, raw=False)
asset_list.identify_worktypes(cleaned)
pprint(asset_list.work_type_figures)
asset_list.flat_analysis()
asset_list.load_contact_details(
local_filepath=os.path.join(data_folder, "Full property list wth D&V report V look up 12.2.25.xlsx"),
sheet_name="Report 1",
landlord_property_id=asset_list.landlord_property_id,
phone_number_column='Property Current Tel. Number',
fullname_column='Proeprty Current Occupant',
firstname_column=None,
lastname_column=None,
email_column=None, # TODO - we need this
)
# Convert to a format suitable for CRM
# TODO: TEMP
assigned_surveyors = pd.DataFrame(
[
{
asset_list.landlord_property_id: "02610001",
"week_commencing": "10/10/2025",
"surveyor_name": "Khalim Conn-Kowlessar",
"surveyor_email": "khalim@domna.homes",
}
]
)
# TODO: Sort the output by postcode
company_domain = "ealing.gov.uk"
crm_pipeline_name = "Survey Management"
first_dealstage = "READY TO BEGIN SCHEDULING"
# TODO - temp, upload to either SharePoint or AWS
asset_list.prepare_for_crm(
assigned_surveyors=assigned_surveyors,
company_domain=company_domain,
crm_pipeline_name=crm_pipeline_name,
first_dealstage=first_dealstage
)
hubspot_data = asset_list.hubspot_data
# We now flag the status of the property
asset_list.label_property_status()
asset_list.analyse_geographies()
# Store as an excel
filename = os.path.join(data_folder, ".".join(data_filename.split(".")[:-1])) + " - Standardised.xlsx"
@ -546,7 +600,8 @@ def app():
with pd.ExcelWriter(filename) as writer:
asset_list.standardised_asset_list.to_excel(writer, sheet_name="Standardised Asset List", index=False)
asset_list.flat_data.to_excel(writer, sheet_name="Flat Data", index=False)
if asset_list.block_analysis_df is not None:
asset_list.block_analysis_df.to_excel(writer, sheet_name="Block Analysis", index=False)
# If we have outcomes, we add a tab with the outcomes
if not asset_list.outcomes_for_output.empty:
asset_list.outcomes_for_output.to_excel(writer, sheet_name="Outcomes", index=False)
@ -560,5 +615,5 @@ def app():
if not asset_list.ecosurv_no_match.empty:
asset_list.ecosurv_no_match.to_excel(writer, sheet_name="Unmatched Ecosurv", index=False)
# Store the Hubspot export as a csv
hubspot_data.to_csv(os.path.join(data_folder, "Hubspot Export.csv"), index=False)
if not asset_list.geographical_areas.empty:
asset_list.geographical_areas.to_excel(writer, sheet_name="Geographical Areas", index=False)

View file

@ -0,0 +1,85 @@
from enum import IntEnum, Enum
CRM_PIPELINE_NAME = 'Operations - Housing Associations'
class HubspotProcessStatus(IntEnum):
def __new__(cls, value, label):
obj = int.__new__(cls, value)
obj._value_ = value
obj.label = label
return obj
# the numerical values of this enum aren't important, but they define the order of operations
# This is the first stage, where a survey is ready to go
READY_TO_BE_SCHEDULED = 1, "READY TO BE SCHEDULED"
# The property didn't get access and needs sign off
SURVEYED_NO_ACCESS_NEEDS_SIGN_OFF = 2, "SURVEYED - NO ACCESS - NEED SIGN OFF"
# The survey has been completed. We don't have any update as to whether the property has been installed
SURVEYED_COMPLETED_SIGNED_OFF = 3, "SURVEYED - COMPLETED - SIGNED OFF"
# The property turned out to be ineligibile
NOT_VIABLE = 4, "NOT VIABLE"
# The property is with the installer. This will likely be the default for historic programmes
SUBMITTED_TO_INSTALLER = 5, "SUBMITTED TO INSTALLER"
# The property has been installed
INSTALL_COMPLETE = 6, "INSTALL COMPLETE"
# The install has complete and lodgement is complete
LODGEMENT_COMPLETE = 7, "LODGEMENT COMPLETE"
# The property has been cancelled
INSTALLER_CANCELLED_FINALIZED = 8, "INSTALLER CANCELLED - FINALIZED"
class Installer(Enum):
SCIS = "SCIS"
JJ_CRUMP = "J & J CRUMP"
SGEC = "SGEC"
@classmethod
def is_valid_value(cls, value):
"""
Check if the value is a valid installer.
"""
return value in cls._value2member_map_
CRM_UPLOAD_COLUMNS = [
'Name <LISTING hs_name>', 'Associations: Listing', 'Company Domain Name <COMPANY domain>',
'Email <CONTACT email>', 'First Name <CONTACT firstname>', 'Last Name <CONTACT lastname>',
'Phone <CONTACT phone>', 'Secondary Phone <CONTACT secondary_phone_number>',
'Secondary Contact Full Name <CONTACT secondary_contact_full_name>',
'Listing Owner Email <LISTING hubspot_owner_id>',
'Full Address <LISTING full_address>', 'Address 1 <LISTING hs_address_1>',
'Address 2 <LISTING hs_address_2>', 'Postcode <LISTING hs_zip>',
'Property Type <LISTING property_type>', 'Property Sub Type <LISTING property_sub_type>',
'Bedroom(s) <LISTING hs_bedrooms>', 'Domna Property ID <LISTING domna_property_id>',
'National UPRN <LISTING national_uprn>', 'Owner Property ID <LISTING owner_property_id>',
'Wall Construction <LISTING wall_construction>', 'Heating System <LISTING heating_system>',
'Year Built <LISTING hs_year_built>', 'Boiler Make <LISTING boiler_make>',
'Boiler Model <LISTING boiler_model>',
'Non-Intrusives: Date Checked <LISTING non_intrusives__date_checked>',
'Non-Intrusives: Wall Type <LISTING non_intrusives__wall_type>',
'Non-intrusives: Insulation <LISTING non_intrusives__insulation>',
'Non-intrusives: Insulation Material <LISTING non_intrusives__insulation_material>',
'Non-Intrusives: CIGA Check Required <LISTING non_intrusives__ciga_check_required>',
'Non-Intrusives: PV Access Issues <LISTING non_intrusives__access_issues>',
'Non-Intrusives: Roof Orientation <LISTING non_intrusives__roof_orientation>',
'Non-Intrusives: Surveyor Notes <LISTING non_intrusives__surveyor_notes>',
'Non-Intrusives: Surveyor Name <LISTING non_intrusives__surveyor_name>',
'CIGA: Date Requested <LISTING ciga__date_requested>',
'CIGA: Cavity Guarantee Found <LISTING ciga__cavity_guarantee_found>',
'Last EPC: Is Estimated <LISTING last_epc__is_estimated>',
'Last EPC: EPC Rating <LISTING last_epc__epc_rating>',
'Last EPC: SAP Rating <LISTING last_epc__sap_rating>',
'Last EPC: Main Heating Description <LISTING last_epc__main_heating_description>',
'Last EPC: Heating Controls <LISTING last_epc__heating_controls>',
'Last EPC: Lodgement Date <LISTING last_epc__lodgement_date>',
'Last EPC: Floor Area <LISTING last_epc__floor_area>', 'Last EPC: Wall <LISTING last_epc__wall>',
'Last EPC: Roof <LISTING last_epc__roof>', 'Last EPC: Floor <LISTING last_epc__floor>',
'Last EPC: Room Height <LISTING last_epc__room_height>',
'Last EPC: Age Band <LISTING last_epc__age_band>', 'Deal Stage <DEAL dealstage>',
'Pipeline <DEAL pipeline>', 'Expected Commencement Date <DEAL expected_commencement_date>',
'Deal Name <DEAL dealname>', 'Project Code <DEAL project_code>',
'Product ID <LINE_ITEM hs_product_id>', 'Name <LINE_ITEM name>', 'Unit price <LINE_ITEM price>',
'Quantity <LINE_ITEM quantity>', 'Deal Owner', 'Amount <DEAL amount>', 'Installer <DEAL installer>'
]

View file

@ -0,0 +1,91 @@
import os
import pandas as pd
from asset_list.AssetList import AssetList
def app():
"""
TODO: Operations may have removed some cavity_reason/solar_reason values from the standardised asset list after
review. So, we will need to update the hubspot status for these entries and set them to None, if they
were previously being set to ready for scheduling. We don't want to just filter on rows where
cavity_reason and solar_reason are populated, as if we want to include historical surveys, this will remove
them
TODO: If we wish to upload deals in batches
:return:
"""
# inputs:
reconcile_programme = False # If True, the hubspot upload will include all properties with a project code
customer_domain = "https://sandwell.gov.uk"
installer_name = "J & J CRUMP"
asset_list_filepath = (
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Sandwell/Hubspot/Sandwell BC - Full Asset List MAIN - "
"Standardised.xlsx"
)
asset_list_sheet_name = "Proposed Program"
asset_list_header = 1
contact_details_filepath = (
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Sandwell/Hubspot/Sandwell Contact Details.xlsx"
)
contacts_sheet_name = "Sheet1"
contacts_landlord_property_id = "landlord_property_id"
contacts_phone_number_column = "phone_number"
contacts_secondary_phone_number_column = "secondary_phone_number"
contacts_secondary_contact_full_name = "secondary_contact_full_name"
contacts_email_column = "email"
contacts_fullname_column = "fullname"
contacts_firstname_column = "firstname"
contacts_lastname_column = "lastname"
existing_programme_filepath = (
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Sandwell/Hubspot/property-status.csv"
)
asset_list = AssetList.load_standardised_asset_list(
asset_list_filepath, asset_list_sheet_name, asset_list_header
)
asset_list.load_contact_details(
local_filepath=contact_details_filepath,
sheet_name=contacts_sheet_name,
landlord_property_id=contacts_landlord_property_id,
phone_number_column=contacts_phone_number_column,
secondary_phone_number_column=contacts_secondary_phone_number_column,
secondary_contact_full_name=contacts_secondary_contact_full_name,
email_column=contacts_email_column,
fullname_column=contacts_fullname_column,
firstname_column=contacts_firstname_column,
lastname_column=contacts_lastname_column
)
asset_list.prepare_for_crm(
company_domain=customer_domain,
installer_name=installer_name,
reconcile_programme=reconcile_programme
)
# Remove the existing programme
existing_programme = pd.read_csv(existing_programme_filepath, encoding="utf-8-sig")
asset_list.hubspot_data = asset_list.hubspot_data[
~asset_list.hubspot_data["Domna Property ID <LISTING domna_property_id>"].isin(
existing_programme['Domna Property ID'].values
)
]
# Get the filepath and the filename. Append hubspot upload to the filename. We also change the file type to csv
directory, filename = os.path.split(asset_list_filepath)
name, ext = os.path.splitext(filename)
output_filename = f"{name} - Hubspot Upload.csv"
output_filepath = os.path.join(directory, output_filename)
if pd.isnull(asset_list.hubspot_data['Project Code <DEAL project_code>']).sum():
raise ValueError("FIX MEEE")
if pd.isnull(asset_list.hubspot_data['Deal Stage <DEAL dealstage>']).any():
raise ValueError("Warning: Some rows have missing project codes. These will not be uploaded to HubSpot.")
# Just store locally
asset_list.hubspot_data.to_csv(output_filepath, index=False, encoding="utf-8-sig")

View file

@ -331,4 +331,33 @@ BUILT_FORM_MAPPINGS = {
'Low Rise': 'low rise',
'Upper Floor': 'top-floor',
'High Rise': 'high rise',
'2012 ONWARDS DETACHED': 'detached',
'1950-66 END TERRACE': 'end-terrace',
'1976-82 MID TERRACED': 'mid-terrace',
'1950-66 MID TERRACE': 'mid-terrace',
'1991-95 DETACHED': 'detached',
'1976-82 END TERRACED': 'end-terrace',
'1967-75 DETACHED': 'detached',
'PRE 1900 DETACHED': 'detached',
'PRE 1900 MID TERRACE': 'mid-terrace',
'1900 DET': 'detached',
'1967-75 MID TERR': 'mid-terrace',
'1930-49 SEMI DET': 'semi-detached',
'1900-29 SEMI DET': 'semi-detached',
'1900-29 MID TERR': 'mid-terrace',
'1983- 90 MID TERR': 'mid-terrace',
'1976-82 MID TERR': 'mid-terrace',
'1983-90 END TERR': 'end-terrace',
'1991-95 SEMI DET': 'semi-detached',
'1983-90 SEMI DET': 'semi-detached',
'1991-95 MID TERR': 'mid-terrace',
'1950-66 SEMI DET': 'semi-detached',
'1900 MID TERR': 'mid-terrace',
'1967-75 SEMI DET': 'semi-detached',
'1983- 90 SEMI DET': 'semi-detached',
'1983-90 MID TERR': 'mid-terrace',
'1976-82 SEMI DET': 'semi-detached',
'PRE 1900 MID TERR': 'mid-terrace'
}

View file

@ -16,5 +16,6 @@ EXISTING_PV_MAPPINGS = {
'PV: 25% roof area, PV: 3.6kWp array': 'already has PV',
'PV: 10% roof area, PV: 2kWp array': 'already has PV',
'PV: 50% roof area': 'already has PV',
'Solar PV': 'already has PV'
'Solar PV': 'already has PV',
'SOLAR PV': 'already has PV'
}

View file

@ -27,7 +27,7 @@ STANDARD_HEATING_SYSTEMS = {
"electric ceiling",
"electric underfloor",
"no heating",
"non-electric underfloor"
"non-electric underfloor",
}
HEATING_MAPPINGS = {
@ -292,4 +292,39 @@ HEATING_MAPPINGS = {
'Communal Heating': 'communal heating',
'No Data': 'unknown',
'Boiler System': 'gas condensing boiler',
'Storage heating': 'electric storage heaters',
'Storage heating (HHRSH)': 'high heat retention storage heaters',
'ELECTRIC BOILER': 'electric boiler',
'STORAGE HEATERS': 'electric storage heaters',
'GREENSTAR 24I JUNIOR': 'gas combi boiler',
'generic cond combi post98': 'gas condensing combi',
'SAP TABLE REG COND +98 NO PICTURE OF BOILER': 'gas condensing boiler',
'ECO TEC PRO 28 H COMBI A': 'gas combi boiler',
'GREENSTAR 25I ErP': 'gas combi boiler',
'IDEAL LOGIC MAX COMBI C30': 'gas combi boiler',
'ECO TEC PRO 28 (286/5-3)': 'gas combi boiler',
'IDEAL LOGIC HEAT 30': 'gas boiler, radiators',
'WORCESTER 240': 'gas boiler, radiators',
'ECO TEC PRO 24 (246/5-3)': 'gas combi boiler',
'ECO TEC PRO 28 (OLD)': 'gas combi boiler',
'LOGIC COMBI2 C30': 'gas combi boiler',
'GREENSTAR 28I JUNIOR': 'gas combi boiler',
'WORCESTER 24i': 'gas combi boiler',
'GREENSTAR 30I ErP': 'gas combi boiler',
'25 CDI': 'gas combi boiler',
'GREENSTAR 28CDI COMPACT ErP': 'gas combi boiler',
'GREENSTAR 24 RI': 'gas boiler, radiators',
'BAXI COMBI 105 HE': 'gas combi boiler',
'ECO TEC PRO 28 (OLD TYPE)': 'gas combi boiler',
'WORCESTER 28 SI ll RSF': 'gas combi boiler',
'GREENSTAR 30SI COMPACT ErP': 'gas combi boiler',
'SAP TABLE REG COND +98 NO PICTURE OF CYLINDER': 'gas condensing boiler',
'WORCESTER 24 SI ll RSF': 'gas combi boiler',
'GREENSTAR 4000': 'gas combi boiler',
'GREENSTAR 24i JUNIOR': 'gas combi boiler',
'ECO TEC PRO 24 (OLD TYPE)': 'gas combi boiler',
'GREENSTAR 30SI COMPACT': 'gas combi boiler',
'BAXI DUO TEC 28 COMBI ErP': 'gas combi boiler',
'Not applicable for this asset type': 'unknown'
}

View file

@ -252,5 +252,19 @@ PROPERTY_MAPPING = {
'Bedsit bungalow semi detached': 'bedsit',
'Bedsit Flat': 'bedsit',
'Semi detached house': 'house',
'Unit': 'unknown'
'Unit': 'unknown',
'HOUSE (3 STOREY)': 'house',
'FLAT GROUND FLOOR': 'flat',
'FLAT TOP FLOOR': 'flat',
'SHARED HOUSE': 'house',
'MAISONETTE': 'maisonette',
'DIRECT ACCESS HOSTEL': 'other',
'Day centre': 'other',
'Care home': 'other',
'BLOCK (Communal)': 'block of flats',
'SHOP': 'other',
'Office Block': 'other',
'BLOCK (Non-Communal)': 'block of flats',
'Refuge': 'other'
}

View file

@ -6,7 +6,7 @@ STANDARD_ROOF_CONSTRUCTIONS = {
"pitched unknown access to loft",
"piched unknown insulation",
"pitched insulated",
"pitched less than 100mm insulation"
"pitched less than 100mm insulation",
"another dwelling above",
"flat unknown insulation",
"unknown insulated",
@ -38,4 +38,18 @@ ROOF_CONSTRUCTION_MAPPINGS = {
'200mm': 'pitched insulated',
'0-49mm': 'pitched less than 100mm insulation',
'50mm': 'pitched less than 100mm insulation',
'': 'unknown',
'NR': 'unknown',
'Non-joist': 'unknown',
'25mm': 'pitched less than 100mm insulation',
'400mm+': 'pitched insulated',
'12mm': 'pitched less than 100mm insulation',
'150MM': 'pitched insulated',
'200MM': 'pitched insulated',
'250MM': 'pitched insulated',
'100MM': 'pitched less than 100mm insulation',
'U/K': 'unknown',
'U/K - 250MM RIR FLAT CEILING': 'flat unknown insulation',
'U/K - 200MM RIR FLAT CEILING': 'flat unknown insulation'
}

View file

@ -224,5 +224,31 @@ WALL_CONSTRUCTION_MAPPINGS = {
'Traditional Cavity Brickwork': 'cavity unknown insulation',
'System build (undefined)': 'system built',
'Non Trad Wimpey': 'system built',
'Non Trad Wates': 'system built'
'Non Trad Wates': 'system built',
'CAVITY FILLED 270MM': 'filled cavity',
'CAVITY FILLED 270MM': 'filled cavity',
'CAVITY FILLED 250MM': 'filled cavity',
'CAVITY FILLED 260MM': 'filled cavity',
'CAVITY FILLED 260MM': 'filled cavity',
'SOLID A/B 220MM': 'solid brick unknown insulation',
'CAVITY A/B 300MM': "uninsulated cavity",
'CAVITY A/B 250MM': "uninsulated cavity",
'CAVITY A/B 260MM': "uninsulated cavity",
'CAVITY A/B 270MM': "uninsulated cavity",
'SOLID BRICK/CAVITY EXT': 'solid brick unknown insulation',
'CAVITY EWI': 'filled cavity',
'SANDSTONE/CAVITY EXT': 'sandstone or limestone',
'SYSTEM BUILD 100MM EWI': 'system built',
'CAVITY A/B 260MM': "uninsulated cavity",
'CAVITY A/B 270MM': "uninsulated cavity",
'CAVITY A/B 250MM': "uninsulated cavity",
'System': 'system built',
'Sandstone/Limestone': 'sandstone or limestone',
'No Fines': 'system built',
'Granite/Whinstone': 'granite or whinstone',
'Not applicable to this asset type': 'unknown',
'Steel Frame': 'system built',
'Solid Wall As Built': 'uninsulated solid brick',
'Solid As Built': 'uninsulated solid brick'
}

View file

@ -79,7 +79,13 @@ def get_data(
uprn=uprn
)
# Force the skipping of estimating the EPC
searcher.ordnance_survey_client.property_type = None
# We check if the property was split
if home.get("is_expended_block"):
searcher.ordnance_survey_client.property_type = "Flat"
searcher.property_type = "Flat"
searcher.set_strict_property_type_search()
else:
searcher.ordnance_survey_client.property_type = None
searcher.ordnance_survey_client.built_form = None
searcher.find_property(skip_os=True)

View file

@ -5,7 +5,7 @@ from typing import List
from backend.app.plan.schemas import HousingType
class Funding:
class FundingOld:
"""
Given a property, this class identifies if the home is possibly eligible for funding under
the various funding schemes. It will also calculate the expected amount of funding available
@ -411,3 +411,190 @@ class Funding:
self.gbis()
# self.eco4()
self.whlg()
class Funding:
"""
New class to handle funding calculation
"""
def __init__(
self,
tenure: HousingType,
social_cavity_abs_rate: float,
social_solid_abs_rate: float,
private_cavity_abs_rate: float,
private_solid_abs_rate: float,
project_scores_matrix,
whlg_eligible_postcodes
):
self.tenure = tenure
self.social_cavity_abs_rate = social_cavity_abs_rate
self.social_solid_abs_rate = social_solid_abs_rate
self.private_cavity_abs_rate = private_cavity_abs_rate
self.private_solid_abs_rate = private_solid_abs_rate
self.starting_sap_band = None
self.ending_sap_band = None
self.floor_area_band = None
self.project_scores_matrix = project_scores_matrix
self.whlg_eligible_postcodes = whlg_eligible_postcodes
@staticmethod
def get_sap_band(sap_score_number):
bands = [
("High_A", 96, float("inf")),
("Low_A", 92, 96),
("High_B", 86, 92),
("Low_B", 81, 86),
("High_C", 74.5, 81),
("Low_C", 69, 74.5),
("High_D", 61.5, 69),
("Low_D", 55, 61.5),
("High_E", 46.5, 55),
("Low_E", 39, 46.5),
("High_F", 29.5, 39),
("Low_F", 21, 29.5),
("High_G", 10.5, 21),
("Low_G", 1, 10.5),
]
for band, lower, upper in bands:
if lower <= sap_score_number < upper:
return band
return None
@staticmethod
def get_floor_area_band(floor_area):
if floor_area <= 72:
return "0-72"
if floor_area <= 97:
return "73-97"
if floor_area <= 199:
return "98-199"
return "200"
@staticmethod
def eco4_prs_eligibility(
starting_sap: int, measures: List, mainheat_description: str, heating_control_description: str
):
"""
Handles the eligibility criteria for private rental properties under eco
:return:
"""
# Help to heat group
# 1) EPC E - G
# 2) Must receive one of SWI, FTCH, renewable heating or DHC
# 3) Tenant must be on benefits
# We don't consider the tenant being on benefits - we just notify the end user that this is a requirement
meets_epc = starting_sap <= 54
has_solid_wall = "internal_wall_insulation" in measures or "external_wall_insulation" in measures
# We check if the property has a heating system that means solar pv counts as a renewable heating system
has_eligible_electric_heating = any(x in mainheat_description for x in [
"air source heat pump", "ground source heat pump", "boiler and radiators, electric"
]) | (("electric storage heaters" in mainheat_description) and
(heating_control_description.lower() == "controls for high heat retention storage heaters")
)
# Counts as renewable heating
solar_renweable_heating = has_eligible_electric_heating & ("solar_pv" in measures)
# Is a renewable heating
ashp = "air_source_heat_pump" in measures
if meets_epc & (solar_renweable_heating or ashp or has_solid_wall):
return True
return False
def calculate_full_project_abs(self):
# Filter the project scores matrix
data = self.project_scores_matrix[
(self.project_scores_matrix["Floor Area Segment"] == self.floor_area_band) &
(self.project_scores_matrix["Starting Band"] == self.starting_sap_band) &
(self.project_scores_matrix["Finishing Band"] == self.ending_sap_band)
]
if data.emtpy:
raise ValueError("Missing abs rate, check the project scores matrix")
return data["Cost Savings"].values[0]
def check_funding(
self, measures: List,
starting_sap: int,
ending_sap: int,
floor_area: float,
mainheat_description: str,
heating_control_description: str,
is_cavity: bool
):
"""
Given a list of measures, this function will check if the package of measures is fundable
:param measures:
:param starting_sap:
:param ending_sap:
:param floor_area:
:param mainheat_description:
:param heating_control_description:
:param is_cavity: Indicates if the property has cavity wall insulation
:return:
"""
# If it's an E or D, should get to an EPC C
if starting_sap >= 55 and ending_sap < 69:
raise NotImplementedError("This property doesn't have sufficient SAP movement")
if starting_sap <= 38 & ending_sap <= 55:
# F or G should get to D
raise NotImplementedError("Implement F or G to D eligibility")
self.starting_sap_band = self.get_sap_band(starting_sap)
self.ending_sap_band = self.get_sap_band(ending_sap)
self.floor_area_band = self.get_floor_area_band(floor_area)
########################
# Private
########################
# 1) ECO4
# 2) GBIS
if self.tenure == "Private":
is_eco4_eligible = self.eco4_prs_eligibility(
starting_sap=starting_sap,
measures=measures,
mainheat_description=mainheat_description,
heating_control_description=heating_control_description
)
# Need to implement
# 1) Package has to include an insulation measure
# 2) We should use the funding for the measure that has the largest partial project score
is_gbis_eligible = ()
if not is_eco4_eligible:
return
eco4_abs = self.calculate_full_project_abs()
# We estimate rates now
eco4_funding = (
eco4_abs * self.private_cavity_abs_rate if is_cavity else eco4_abs & self.private_solid_abs_rate
)
########################
# Social
########################
# 1) ECO4
# 2) GBIS
if self.tenure == "Social":
pass
raise NotImplementedError("Only implemented for Private or Social housing")

View file

@ -160,6 +160,9 @@ class SearchEpc:
"""
Address lines 1 and postcode are mandatory fields. The other address lines are optional
but can be used to find the epc for the home, if address1 and postcode are insufficient
If you wish to run a strict property type search, please run set_strict_property_type_search()
:param address1: string, propery's address line 1
:param postcode: string, propery's postcode
:param full_address: string, optional parameter, the full address of the property
@ -189,6 +192,7 @@ class SearchEpc:
self.older_epcs = None
self.full_sap_epc = None
self.metadata = None
self.strict_property_type_search = False
# These are the address and postcode values, which we store in the database
self.address_clean = None
@ -199,6 +203,14 @@ class SearchEpc:
self.property_type = property_type
self.fast = fast
def set_strict_property_type_search(self):
"""
This method sets the strict property type search flag to True. When this flag is set, the search will
only return results that match the specified property type.
:return:
"""
self.strict_property_type_search = True
@staticmethod
def get_house_number(address: str, postcode=None) -> str | None:
"""
@ -315,6 +327,8 @@ class SearchEpc:
address_params["address"] = self.address1
if self.postcode:
address_params["postcode"] = self.postcode
if self.strict_property_type_search and self.property_type:
address_params["property-type"] = self.property_type.lower()
# We attempt the search with uprn params
@ -365,11 +379,16 @@ class SearchEpc:
unique_property_types = {r["property-type"] for r in rows}
is_just_a_house = (len(unique_property_types) == 1) & (
("House" in unique_property_types) | ("Bungalow" in unique_property_types)
)
# We allow for variation in property type across flats/maisonettes
# If we know that we have a flat/maisonette, we allow for both property types
if property_type in ["Flat", "Maisonette"]:
if ((len(uprns) == 1) and ((len(unique_property_types) == 1)
) or unique_property_types == {"Flat", "Maisonette"}):
# Make sure we have not JUST a house, or not JUST a flat/maisonette
if property_type in ["Flat", "Maisonette"] and not is_just_a_house:
if (((len(uprns) == 1) and ((len(unique_property_types) == 1)
) or unique_property_types == {"Flat", "Maisonette"})):
return rows
if property_type is not None:
@ -424,6 +443,8 @@ class SearchEpc:
return rows
raise ValueError("property type and address cannot both be None, at least one must be provided")
@staticmethod
def format_address(newest_epc):
"""
@ -702,6 +723,18 @@ class SearchEpc:
exclude_old=exclude_old
)
# Check if it's a new build EPC. A property that doesn't have an EPC is not going to be a new build
# so we avoid comparing it to new builds
# TODO - this is experimental
newer_age_bands = [
"England and Wales: 1996-2002", "England and Wales: 2003-2006", "England and Wales: 2007-2011",
"England and Wales: 2012 onwards"
]
if (~epc_data["construction-age-band"].isin(newer_age_bands)).sum():
# We have some older age bands, so we need to filter them out
epc_data = epc_data[~epc_data["construction-age-band"].isin(newer_age_bands)].copy()
# If we have missing lodgment date, we fill it with inspection-date
epc_data["lodgement-datetime"] = epc_data["lodgement-datetime"].fillna(epc_data["inspection-date"])
# If we still have missing dates, we set it to the mean of the non NA dates

View file

@ -507,7 +507,7 @@ async def model_engine(body: PlanTriggerRequest):
)
# if we have a remote assment data type, we pull the additional data and include it
if body.event_type == "remote_assessment":
if (body.event_type == "remote_assessment") and not (epc_searcher.newest_epc["estimated"]):
logger.info("Retrieving find my epc data")
try:
property_non_invasive_recommendations, patch = RetrieveFindMyEpc.get_from_epc(

View file

@ -0,0 +1,52 @@
import pytest
import pandas as pd
from utils.s3 import read_csv_from_s3
from backend.Funding import Funding
def get_funding_data():
"""
This function retrieves the eco project scores matrix and the warm homes local grant funding data
:return:
"""
project_scores_matrix = read_csv_from_s3(
bucket_name="retrofit-data-dev",
filepath="funding/ECO4 Full Project Scores Matrix.csv",
)
project_scores_matrix = pd.DataFrame(project_scores_matrix)
project_scores_matrix.columns = ['Floor Area Segment', 'Starting Band', 'Finishing Band', 'Cost Savings']
project_scores_matrix["Cost Savings"] = project_scores_matrix["Cost Savings"].astype(float)
whlg_eligible_postcodes = read_csv_from_s3(
bucket_name="retrofit-data-dev",
filepath="funding/whlg eligible postcodes.csv",
)
whlg_eligible_postcodes = pd.DataFrame(whlg_eligible_postcodes)
return project_scores_matrix, whlg_eligible_postcodes
class TestFunding:
def test_prs(self):
eco_project_scores_matrix, whlg_eligible_postcodes = get_funding_data()
funding = Funding(
project_scores_matrix=eco_project_scores_matrix,
whlg_eligible_postcodes=whlg_eligible_postcodes,
social_cavity_abs_rate=13.5,
social_solid_abs_rate=17,
private_cavity_abs_rate=13.5,
private_solid_abs_rate=17,
tenure="Private",
)
measures_1 = ["internal_wall_insulation", "solar_pv"]
funding.check_funding(
measures=measures_1,
starting_sap=54,
ending_sap=69,
floor_area=73,
mainheat_description="Boiler and radiators, mains gas",
heating_control_description="Programmer, room thermostat and TRVs",
is_cavity=True
)

View file

@ -0,0 +1,167 @@
import pandas as pd
def get_band(sap_score_number):
bands = [
("High_A", 96, float("inf")),
("Low_A", 92, 96),
("High_B", 86, 92),
("Low_B", 81, 86),
("High_C", 74.5, 81),
("Low_C", 69, 74.5),
("High_D", 61.5, 69),
("Low_D", 55, 61.5),
("High_E", 46.5, 55),
("Low_E", 39, 46.5),
("High_F", 29.5, 39),
("Low_F", 21, 29.5),
("High_G", 10.5, 21),
("Low_G", 1, 10.5),
]
for band, lower, upper in bands:
if lower <= sap_score_number < upper:
return band
return None
def classify_floor_area(floor_area):
if floor_area <= 72:
return "0-72"
if floor_area <= 97:
return "73-97"
if floor_area <= 199:
return "98-199"
return "200+"
asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Wates - Futures Housing/ECO 4 Wates - Standardised.xlsx",
sheet_name="Standardised Asset List"
)
asset_list["starting_sap_band"] = asset_list["epc_sap_score_on_register"].apply(get_band)
asset_list["floor_area_band"] = asset_list["epc_total_floor_area"].apply(classify_floor_area)
# Objective:
# We need to get a reasonable estimate for the cost of works for properties that are EPC D or below
#
# Therefore:
# 1) We know that some properties that are currently EPC C may* qualify for ECO4 funding. Right now, we aren't trying
# to determine which EPC C properties or above will qualify, just how much works will cost for properties that do
# qualify
# 2) We cannot survey everything, so before we undetake too much risk we should produce some costings for each of the
# archetypes
#
# Driving Factors:
# 1) Floor area band & starting SAP band - this will determine how much funding is produced
# 2) Heating system - this will determine if the property needs a heating upgrade or not
archetypes = asset_list[asset_list["epc_sap_score_on_register"] <= 68].groupby(
["floor_area_band", "starting_sap_band", "landlord_heating_system"]
)["landlord_property_id"].nunique().reset_index()
archetypes = archetypes.rename(columns={"landlord_property_id": "n_properties"})
archetypes = archetypes.sort_values("n_properties", ascending=False)
archetypes["running_total"] = archetypes["n_properties"].cumsum()
archetypes["cumulative_percentage"] = archetypes["running_total"] / archetypes["n_properties"].sum() * 100
archetypes["is_electric"] = archetypes["landlord_heating_system"] != "boiler - other fuel"
archetypes["needs_heating_upgrade"] = archetypes["landlord_heating_system"].isin(
["boiler - other fuel", "electric storage heaters"]
)
archetypes = archetypes.reset_index(drop=True)
# Right now, they don't want to treat the oil properties so we'll exclude them for the moment
electric_heated_archetypes = (
archetypes[archetypes["landlord_heating_system"] != "boiler - other fuel"].copy().reset_index(drop=True)
)
electric_heated_archetypes["running_total"] = electric_heated_archetypes["n_properties"].cumsum()
electric_heated_archetypes["cumulative_percentage"] = (
electric_heated_archetypes["running_total"] / electric_heated_archetypes["n_properties"].sum() * 100
)
# The main properties that need validation surveys are properties that require a heating upgrade
electric_heated_archetypes = electric_heated_archetypes[electric_heated_archetypes["needs_heating_upgrade"]]
electric_heated_archetypes = electric_heated_archetypes.merge(
archetypes[["starting_sap_band", "floor_area_band", "landlord_heating_system", "archetype_id"]],
how="left", on=["starting_sap_band", "floor_area_band", "landlord_heating_system"]
)
oil_archetypes = archetypes[
archetypes["landlord_heating_system"] == "boiler - other fuel"
].copy().reset_index(drop=True)
archetypes["archetype_id"] = archetypes.index
asset_list = asset_list.merge(
archetypes[["starting_sap_band", "floor_area_band", "landlord_heating_system", "archetype_id"]],
how="left", on=["starting_sap_band", "floor_area_band", "landlord_heating_system"]
)
properties_for_verification = asset_list[
asset_list["archetype_id"].isin(electric_heated_archetypes["archetype_id"].values)
].copy()
properties_for_verification["postal_region"] = properties_for_verification["domna_postcode"].str.split(" ").str[
0].str.strip()
properties_for_verification["epc_age"] = (
pd.Timestamp.now() - pd.to_datetime(properties_for_verification["epc_inspection_date"])
).dt.days
# We also survey 2 oil heater properties, so we take the 2 most prevelant archetypes
archetypes_for_survey = pd.concat(
[electric_heated_archetypes, oil_archetypes.head(2)]
)
# Take the property with the oldest EPC, by region. Prioritise estimated properties
sample = []
for _, config in archetypes_for_survey.iterrows():
properties = asset_list[
(asset_list["archetype_id"] == config["archetype_id"]) &
(asset_list["floor_area_band"] == config["floor_area_band"]) &
(asset_list["starting_sap_band"] == config["starting_sap_band"])
]
if pd.isnull(properties["epc_inspection_date"]).sum():
sample_property = properties[pd.isnull(properties["epc_inspection_date"])].head(1).to_dict("records")
else:
# Take the property with the oldest EPC
sample_property = properties.sort_values("epc_inspection_date", ascending=True).head(1).to_dict("records")
sample.extend(sample_property)
sample = pd.DataFrame(sample)
sample = sample[
[
"landlord_property_id", "epc_inspection_date", "epc_sap_score_on_register", "starting_sap_band",
"floor_area_band", "landlord_heating_system", "domna_postcode", "domna_full_address", "archetype_id"
]
]
archetypes = asset_list[["landlord_property_id", "archetype_id"]].copy()
archetypes["archetype_id"] = archetypes["archetype_id"].astype(str)
filename = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Wates - Futures Housing/archetypes.xlsx"
# Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data
with pd.ExcelWriter(filename) as writer:
archetypes.to_excel(writer, sheet_name="Archetypes", index=False)
sample.to_excel(writer, sheet_name="Survey Sample", index=False)
# We store this
# Questions:
# 1) If futures are considering changing properties that have oil heating systems, we could include them and
# we have 39 total archetypes. Otherwise, we have 25 archetypes
# 2) Can futures provide us with any information on the model of air source heat pumps and associated controls they're
# using
# Recommendations:
# 1) If they are willing to upgrade the heating systems of the oil properties, surveying 18 properties will cover
#

View file

@ -0,0 +1,24 @@
import pandas as pd
from backend.ml_models.Valuation import PropertyValuation
from backend.app.utils import sap_to_epc
# Read in the survey data
surveys = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Cambridge/Survey Data.xlsx",
sheet_name="Survey data",
)
increases = []
for _, x in surveys.iterrows():
current_epc = sap_to_epc(x["Pre SAP"])
target_epc = sap_to_epc(x["Scenario 1 Post SAP"])
current_value = x["Valuation"]
val = PropertyValuation.estimate_valuation_improvement(
current_value,
current_epc,
target_epc,
total_cost=None
)
avg_increase = val["average_increase"]
increases.append(round(avg_increase))

View file

@ -81,6 +81,7 @@ def app():
# We need to calculate the costs
cost_data = []
for _, row in epr_data.iterrows():
epc = row["EPC"][0]
sap = int(row["EPC"][1:])

View file

@ -0,0 +1,199 @@
"""
This script is to calculate the ABS for the Places for People London project
"""
import os
import pandas as pd
# London
pfp_london_cav = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs "
"rates/PFP_areas_surrounding_london_reviewed_standardised_15052025.xlsx",
sheet_name="Cav Route",
header=1
)
pfp_london_cav = pfp_london_cav.rename(columns={"Route": "Route March"})
pfp_london_pv = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs "
"rates/PFP_areas_surrounding_london_reviewed_standardised_15052025.xlsx",
sheet_name="PV Route",
header=1
)
pfp_london_pv = pfp_london_pv.rename(columns={"Route": "Route March"})
pfp_london_cav["location"] = "London"
pfp_london_pv["location"] = "London"
# East
pfp_east_cav = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs "
"rates/PFP_east_reviewed_standarised_15052025.xlsx",
sheet_name="Cav Route",
header=1
)
pfp_east_cav = pfp_east_cav.rename(columns={"Route": "Route March"})
pfp_east_pv = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs "
"rates/PFP_east_reviewed_standarised_15052025.xlsx",
sheet_name="PV Route",
header=1
)
pfp_east_pv = pfp_east_pv.rename(columns={"Route": "Route March"})
pfp_east_cav["location"] = "East"
pfp_east_pv["location"] = "East"
# North east
pfp_north_east_cav = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs "
"rates/PFP_north_east_reviewed_standardised_15052025.xlsx",
sheet_name="Cav Route",
header=1
)
pfp_north_east_cav = pfp_north_east_cav.rename(columns={"Route": "Route March"})
pfp_north_east_pv = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs "
"rates/PFP_north_east_reviewed_standardised_15052025.xlsx",
sheet_name="PV Route",
header=1
)
pfp_north_east_pv = pfp_north_east_pv.rename(columns={"Route": "Route March"})
pfp_north_east_cav["location"] = "North East"
pfp_north_east_pv["location"] = "North East"
# North West
pfp_north_west_cav = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs "
"rates/PFP_north_west_reviewed_standardised_15052025.xlsx",
sheet_name="Cav Route",
header=1
)
pfp_north_west_cav = pfp_north_west_cav.rename(columns={"Route": "Route March"})
pfp_north_west_pv = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs "
"rates/PFP_north_west_reviewed_standardised_15052025.xlsx",
sheet_name="PV Route",
header=1
)
pfp_north_west_pv = pfp_north_west_pv.rename(columns={"Route": "Route March"})
pfp_north_west_cav["location"] = "North West"
pfp_north_west_pv["location"] = "North West"
cav_route = pd.concat(
[
pfp_london_cav,
pfp_east_cav,
pfp_north_east_cav,
pfp_north_west_cav
]
)
solar_route = pd.concat(
[
pfp_london_pv,
pfp_east_pv,
pfp_north_east_pv,
pfp_north_west_pv
]
)
def get_band(sap_score_number):
bands = [
("High_A", 96, float("inf")),
("Low_A", 92, 96),
("High_B", 86, 92),
("Low_B", 81, 86),
("High_C", 74.5, 81),
("Low_C", 69, 74.5),
("High_D", 61.5, 69),
("Low_D", 55, 61.5),
("High_E", 46.5, 55),
("Low_E", 39, 46.5),
("High_F", 29.5, 39),
("Low_F", 21, 29.5),
("High_G", 10.5, 21),
("Low_G", 1, 10.5),
]
for band, lower, upper in bands:
if lower <= sap_score_number < upper:
return band
return None
def classify_floor_area(floor_area):
if floor_area <= 72:
return "0-72"
if floor_area <= 97:
return "73-97"
if floor_area <= 199:
return "98-199"
return "200+"
# We classify the abs bounds
solar_route["starting_abs_band"] = solar_route["epc_sap_score_on_register"].apply(get_band)
solar_route["ending_abs_band_scenario1"] = "High_C"
solar_route["ending_abs_band_scenario2"] = "Low_B"
solar_route["epc_total_floor_area"] = solar_route["epc_total_floor_area"].fillna(90)
solar_route["floor_area_band"] = solar_route["epc_total_floor_area"].apply(classify_floor_area)
# We classify the abs bounds
cav_route["epc_sap_score_on_register"] = cav_route["epc_sap_score_on_register"].fillna(68)
cav_route["starting_abs_band"] = cav_route["epc_sap_score_on_register"].apply(get_band)
cav_route["floor_area_band"] = cav_route["epc_total_floor_area"].apply(classify_floor_area)
cav_route["ending_abs_band"] = "Low_C"
abs_matrix = pd.read_csv(
"/Users/khalimconn-kowlessar/Downloads/ECO4 Full Project Scores Matrix.csv"
)
cav_route = cav_route.merge(
abs_matrix.rename(columns={"Cost Savings": "ABS Rate"}),
how="left",
left_on=["starting_abs_band", "ending_abs_band", "floor_area_band"],
right_on=["Starting Band", "Finishing Band", "Floor Area Segment"],
)
solar_route = solar_route.merge(
abs_matrix.rename(columns={"Cost Savings": "ABS Rate"}),
how="left",
left_on=["starting_abs_band", "ending_abs_band_scenario1", "floor_area_band"],
right_on=["Starting Band", "Finishing Band", "Floor Area Segment"],
)
cav_route["ABS Rate"] = cav_route["ABS Rate"].fillna(0)
solar_route["ABS Rate"] = solar_route["ABS Rate"].fillna(0)
cav_abs_agg = (
cav_route.groupby("Route March").agg(
{
"ABS Rate": "sum",
"landlord_property_id": "count",
}
).reset_index()
)
cav_abs_agg["Week Number"] = cav_abs_agg["Route March"].str.extract(r"(\d+)").astype(int)
cav_abs_agg = cav_abs_agg.sort_values("Week Number", ascending=True)
cav_abs_agg = cav_abs_agg.rename(columns={"landlord_property_id": "Number of Properties"})
solar_abs_agg = (
solar_route.groupby("Route March").agg(
{
"ABS Rate": "sum",
"landlord_property_id": "count",
}
).reset_index()
)
solar_abs_agg["Week Number"] = solar_abs_agg["Route March"].str.extract(r"(\d+)").astype(int)
solar_abs_agg = solar_abs_agg.rename(columns={"landlord_property_id": "Number of Properties"})
solar_abs_agg = solar_abs_agg.sort_values("Week Number", ascending=True)
# We store the data
# Store as an excel
filename = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/abs rates/pfp programme rates.xlsx"
# Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data
with pd.ExcelWriter(filename) as writer:
solar_abs_agg.to_excel(writer, sheet_name="Solar ABS", index=False)
cav_abs_agg.to_excel(writer, sheet_name="Cav ABS", index=False)
cav_route.to_excel(writer, sheet_name="Cavity data", index=False)
solar_route.to_excel(writer, sheet_name="Solar data", index=False)

View file

@ -8,6 +8,8 @@ address the following concerns:
"""
import pandas as pd
from tqdm import tqdm
from backend.SearchEpc import SearchEpc
# This is Thrive's list of properties and when they should have been surveyed
thrive_tracker = pd.read_excel(
@ -51,27 +53,10 @@ original_columns = {
}
original_asset_list = original_asset_list[original_columns.keys()].rename(columns=original_columns)
original_asset_list["Data Source"] = "Thrive Tracker"
original_asset_list["Data Source"] = "Original Asset List"
original_asset_list = original_asset_list.drop_duplicates()
# We append on the missed properties, with the information we have
# 'Unnamed: 0', 'Thrive Notes', 'Priority', 'UPRN', 'Short Address', '#',
# 'Adress Line 1', 'Postcode', 'Property Type', 'Build Form',
# 'Build year', 'Assumed mm ', 'SAP', 'Name', 'Primary Number',
# 'Secondary Number', 'Email', 'Thrive use: Tenancy Number',
# 'Special Requirements ', 'CIGA', 'Date CIGA check received',
# 'Proposed Progamme', 'New Proposed Programme',
# 'Missing from Route March?', 'Date Letters Sent (w.c)', 'Work Type',
# 'Warmfront Survey Notes', 'Notes Reply (Thrive)', 'Loft Insulation',
# 'Cost for Vents', 'Cavity Depth', 'Cavity Condition',
# 'Date Submitted to installer', 'PRRN Number',
# 'Loft insulation required? (Thrive)', 'Date booked ',
# 'Completed\n(yes/no)', 'Date Completed',
# 'Vents installed?\n(number and location)',
# 'Loft Top Up\n(amount of insulation) ', 'CIGA Warranty Provided ',
# 'Notes', 'Works Number', 'CW KGI Uploaded', 'Keystone Fan Added',
# 'SA Cavity Condition Updated', 'SA Loft & Energy Updated',
# 'PRRN Submitted '
missed_properties["Full Address"] = (
missed_properties["#"].astype(str) + ", " +
missed_properties["Adress Line 1"].astype(str) + ", " +
@ -94,6 +79,19 @@ missed_properties["WFT Findings"] = "Property Not Inspected"
missed_properties["ECO Eligibility"] = "Property Not Inspected"
missed_properties["Data Source"] = "Thrive Tracker"
# We de-dupe ides in original_asset_list
dupe_ids = original_asset_list[original_asset_list["thrive_property_id"].duplicated()]["thrive_property_id"].unique()
dupes = original_asset_list[
original_asset_list["thrive_property_id"].isin(dupe_ids)
].copy()
dupes = dupes.sort_values("thrive_property_id")
original_asset_list = original_asset_list.rename(
columns={
"detailed_property_type": "build_form"
}
)
master_list = pd.concat([missed_properties, original_asset_list], ignore_index=True)
# We were provided with a data update for a sample of properties. We update the data with this information
@ -103,12 +101,339 @@ data_update = pd.read_excel(
header=0
)
new_properties = data_update[~data_update["UPRN"].isin(master_list["thrive_property_id"].astype(str).values)]
new_properties = data_update[~data_update["UPRN"].isin(master_list["thrive_property_id"].astype(str).values)].copy()
new_properties["Full Address"] = (
new_properties["#"].astype(str) + ", " +
new_properties["Adress Line 1"].astype(str) + ", " +
new_properties["Postcode"].astype(str)
)
new_properties = new_properties[missed_columns.keys()].rename(columns=missed_columns)
new_properties["WFT Findings"] = "Property Not Inspected"
new_properties["ECO Eligibility"] = "Property Not Inspected"
new_properties["Data Source"] = "13.05.2025 Data Update"
master_list = pd.concat([new_properties, master_list])
# We append any new data on heating system, heating type, and insulation type, based on the data update
master_list = master_list.merge(
data_update[["UPRN", "Heating Type", "Assumed mm ", "SAP"]].rename(
columns={
"Heating Type": "heating_type_updated",
"Assumed mm ": "assumed_loft_insulation_thickness_updated",
"SAP": "sap_rating_updated"
}
),
how="left",
left_on="thrive_property_id",
right_on="UPRN"
)
# We fill the missings
master_list["heating_type_updated"] = master_list["heating_type_updated"].fillna(master_list["heating_type"])
master_list["assumed_loft_insulation_thickness_updated"] = master_list[
"assumed_loft_insulation_thickness_updated"
].fillna(master_list["assumed_loft_insulation_thickness"])
master_list["sap_rating_updated"] = master_list["sap_rating_updated"].fillna(master_list["sap_rating"])
assert not master_list["thrive_property_id"].duplicated().sum(), "Duplicate thrive_property_id found in master_list"
master_list["Address in tracker"] = master_list["thrive_property_id"].astype(str).isin(
thrive_tracker["UPRN"].astype(str).values
)
# Those the asset list - call it master asset list updated May2025
master_list = master_list.drop(columns=["UPRN"])
master_list["thrive_property_id"] = master_list["thrive_property_id"].astype(str)
# master_list.to_excel(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - "
# "Complete - Updated May 2025.xlsx",
# )
master_list["house_number_TEMP"] = master_list.apply(
lambda x: SearchEpc.get_house_number(address=x["full_address"], postcode=x["postcode"]),
axis=1
)
# We add in the status of the property
# TODO: Add the status of the property from the Thrive tracker
outcomes = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive - Outcomes - April "
"24-March25.xlsx",
header=0
)
outcomes["row_id"] = outcomes.index
# We have two ids which have the same phohe. nymber, but different UPRN, so we don't match to the tracker for these
tracker_for_matching = thrive_tracker[
~thrive_tracker["UPRN"].isin(['OAKGRE0065ABBLDW1', 'OAKGRE0066ABBLDW1', 'JACKET0102ABBLDW1', 'BELLCL0008BEDMDW1'])
].copy()
tracker_for_matching["Full Address"] = (
tracker_for_matching["#"].astype(str) + ", " +
tracker_for_matching["Adress Line 1"].astype(str) + ", " +
tracker_for_matching["Postcode"].astype(str)
)
outcomes_id_lookup = []
for _, x in tqdm(outcomes.iterrows(), total=len(outcomes)):
hn = str(x["No."])
address = x["Address"]
postcode = x["Postcode"]
contact_no = str(x["Contact No"]) if not pd.isnull(x["Contact No"]) else str(x["Contact No.1"])
contact_no = None if contact_no == "nan" else contact_no
if address == "292 Micklefield Road":
hn = "292"
if (address == "Micklefield Road") & (hn == "302"):
hn = "292"
if (address == "103a Norfolk Road Rickmansworth Hertfordshire WD3 1JY"):
hn = "103a"
if (address == "105a Norfolk Road Rickmansworth Hertfordshire WD3 1JY"):
hn = "105a"
if (address == "107a Norfolk Road Rickmansworth Hertfordshire WD3 1JY"):
hn = "107a"
#
# # We match this to the tracker
# m1 = tracker_for_matching[tracker_for_matching["Primary Number"].astype(str) == contact_no]
# # Many of the phone numbers don't have a leading zero in the tracker so we add them
# if (m1.shape[0] != 1) and not pd.isnull(contact_no):
# m1 = tracker_for_matching[tracker_for_matching["Primary Number"].astype(str) == contact_no.lstrip("0")]
#
# if m1.shape[0] > 1:
# raise ValueError(
# f"Error for {hn} - {address} - {postcode} - {contact_no} in the tracker"
# )
# if m1.empty:
m1 = tracker_for_matching[
(tracker_for_matching["#"].astype(str) == hn) &
(tracker_for_matching["Postcode"] == postcode)
]
if m1.empty:
# Some properties aren't in the tracker, we match to the master list
m1 = master_list[
(master_list["house_number_TEMP"].astype(str) == hn) &
(master_list["postcode"] == postcode)
]
outcomes_id_lookup.append(
{
"row_id": x["row_id"],
"thrive_property_id": m1["thrive_property_id"].values[0],
"address": m1["full_address"].values[0],
"postcode": m1["postcode"].values[0],
}
)
continue
if m1.shape[0] != 1:
raise ValueError(
f"Error for {hn} - {address} - {postcode} - {contact_no} in the tracker"
)
# We add the status to the master list
outcomes_id_lookup.append(
{
"row_id": x["row_id"],
"thrive_property_id": m1["UPRN"].values[0],
"address": m1["Full Address"].values[0],
"postcode": m1["Postcode"].values[0],
}
)
outcomes_id_lookup = pd.DataFrame(outcomes_id_lookup)
outcomes = outcomes.merge(
outcomes_id_lookup,
how="left",
left_on="row_id",
right_on="row_id"
)
outcomes = outcomes.drop(columns=["row_id"])
outcomes = outcomes.rename(
columns={
"Outcomes": "Outcome",
"Notes (If 'no "
"answer' under outcomes, have you checked around the property for access issues where possible?)": "Notes",
}
)
# Store the corrected outcomes
# outcomes.to_excel(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive - Outcomes -
# April 24-March25 - Corrected.xlsx",
# index=False
# )
data_update = = data_update[["UPRN", ""]]
# TODO: Flag the Thrive priorities and create a separate project code for these
# TODO: Add the general project code
# TODO: Add the thrive
def parse_date(value):
# Strip any 'W.C' or 'w/c' prefix and clean whitespace
value = value.strip().lower().replace('w.c', '').replace('w/c', '').strip()
try:
# Try parsing the date with dayfirst=True
return pd.to_datetime(value, dayfirst=True, errors='coerce')
except Exception:
return pd.NaT
outcomes['Parsed Date'] = outcomes['Date letters sent'].apply(parse_date)
# Next step - match the submissions master to the asset list. We will append on the UPRN
eco3_submissions = pd.read_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Submissions "
"ECO3.csv",
header=0
)
eco3_submissions["row_id"] = eco3_submissions.index
eco4_submissions = pd.read_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Submissions "
"ECO4.csv",
header=0
)
eco4_submissions["row_id"] = eco4_submissions.index
# List of properties never on the asset list
not_on_master = [
"7+FOXGROVE PATH+WD19 6YL", "9+FOXGROVE PATH+WD19 6YL", "11+FOXGROVE PATH+WD19 6YL",
"20+LINCOLN DRIVE+WD19 7BA", "22+LINCOLN DRIVE+WD19 7BA", "24+LINCOLN DRIVE+WD19 7BA",
"26+LINCOLN DRIVE+WD19 7BA", "1+Ryman Court, Stag Lane+WD3 5HN", "6+Ryman Court, Stag Lane+WD3 5HN",
"9+Ryman Court, Stag Lane+WD3 5HN", "10+Ryman Court, Stag Lane+WD3 5HN", "11+Ryman Court, Stag Lane+WD3 5HN",
"12+Ryman Court, Stag Lane+WD3 5HN", "14+Ryman Court, Stag Lane+WD3 5HN", "15+Ryman Court, Stag Lane+WD3 5HN",
"20+Ryman Court, Stag Lane+WD3 5HN", "21+Ryman Court, Stag Lane+WD3 5HN", "22+Ryman Court, Stag Lane+WD3 5HN",
"25+Ryman Court, Stag Lane+WD3 5HN", "26+Ryman Court, Stag Lane+WD3 5HN", "31+Ryman Court, Stag Lane+WD3 5HN",
"33+Ryman Court, Stag Lane+WD3 5HN", "34+Ryman Court, Stag Lane+WD3 5HN",
'37+Ryman Court, Stag Lane+WD3 5HN', '38+Ryman Court, Stag Lane+WD3 5HN', '39+Ryman Court, Stag Lane+WD3 5HN',
'41+Ryman Court, Stag Lane+WD3 5HN', '43+Ryman Court, Stag Lane+WD3 5HN', '45+Ryman Court, Stag Lane+WD3 5HN',
'46+Ryman Court, Stag Lane+WD3 5HN', '48+Ryman Court, Stag Lane+WD3 5HN', '49+Ryman Court, Stag Lane+WD3 5HN',
'50+Ryman Court, Stag Lane+WD3 5HN', '52+Ryman Court, Stag Lane+WD3 5HN'
]
eco3_remap = {
"19+OAKHILL ROAD+WD5 8RE": ('19', 'OAKHILL ROAD', 'WD3 9RE'),
"29+OAKHILL ROAD+WD5 8RE": ('29', 'OAKHILL ROAD', 'WD3 9RE'),
"31+OAKHILL ROAD+WD5 8RE": ('31', 'OAKHILL ROAD', 'WD3 9RE'),
"44+OAKHILL ROAD+WD5 8RE": ('44', 'OAKHILL ROAD', 'WD3 9RF'),
"64+OAKHILL ROAD+WD4 8RF": ('64', 'OAKHILL ROAD', 'WD3 9RF'),
"11+LANCASTER WAY+WD3 PRE": ('11', 'LANCASTER WAY', 'WD5 0PQ'),
"16+LANCASTER WAY+WD3 PRE": ('16', 'LANCASTER WAY', 'WD5 0PQ'),
"58+TALBOT ROAD +WD31HE": ('58', 'TALBOT ROAD', 'WD3 1HE'),
"10+PEARTREE COURT/WELWYN GARDEN CITY+AL73XN": ('10', 'PEARTREE COURT/WELWYN GARDEN CITY', 'AL7 3XN'),
"25+GOBLINS GREEN/WELWYN GARDEN CITY+AL73ST": ('25', 'GOBLINS GREEN/WELWYN GARDEN CITY', 'AL7 3ST'),
"32+GOBLINS GREEN/WELWYN GARDEN CITY+AL73ST": ('32', 'GOBLINS GREEN/WELWYN GARDEN CITY', 'AL7 3ST'),
"94+BAKER ST/POTTERS BAR+EN62EP": ('94', 'BAKER ST/POTTERS BAR', 'EN6 2EP'),
'33+Tudor Way+WD3JA': ('33', 'Tudor Way', 'WD3 8JA'),
'120+Hazlewood lane +WD5 0HF': ('120', 'Hazlewood lane', 'WD5 0HE'),
'35+Rosehill gardens +WD5 0HE': ('35', 'Rosehill gardens', 'WD5 0HF'),
'18+Rosehill gardens +WD5 0HE': ('18', 'Rosehill gardens', 'WD5 0HF'),
'34+Rosehill gardens +WD5 0HE': ('34', 'Rosehill gardens', 'WD5 0HF'),
'58+Rosehill gardens +WD5 0HE': ('58', 'Rosehill gardens', 'WD5 0HF'),
'48+Rosehill gardens +WD5 0HE': ('48', 'Rosehill gardens', 'WD5 0HF'),
'45+Rosehill gardens +WD5 0HE': ('45', 'Rosehill gardens', 'WD5 0HF'),
'6+Rosehill gardens +WD5 0HE': ('6', 'Rosehill gardens', 'WD5 0HF'),
'2+Rosehill gardens +WD5 0HE': ('2', 'Rosehill gardens', 'WD5 0HF'),
'29+Rosehill gardens +WD5 0HE': ('29', 'Rosehill gardens', 'WD5 0HF'),
'61+GOLDEN DELL+AL8 4EE': ('61', 'GOLDEN DELL', 'AL7 4EE'),
'2O+EDINBURGH AVENUE+WD3 8LB': ('20', 'EDINBURGH AVENUE', 'WD3 8LB'),
}
eco3_lookup = []
for _, row in tqdm(eco3_submissions.iterrows(), total=len(eco3_submissions)):
hn = row["NO "]
pc = row["Post Code"]
street = row["Street / Block Name"]
key = f"{hn}+{street}+{pc}"
if key in not_on_master:
continue
if key in eco3_remap:
hn, street, pc = eco3_remap[key]
# The postcode is different to the asse
# We filter the asset list, because it's hard to know how accurate this is
m1 = master_list[
(master_list["house_number_TEMP"].astype(str) == hn) &
(master_list["postcode"] == pc)
]
if m1.shape[0] != 1:
raise ValueError(
f"Error for {key} in the tracker"
)
eco3_lookup.append(
{
"row_id": row["row_id"],
"thrive_property_id": m1["thrive_property_id"].values[0],
"submission_house_number": row["NO "],
"submission_address1": row["Street / Block Name"],
"submission_postcode": row["Post Code"],
}
)
eco4_lookup = []
for _, row in tqdm(eco4_submissions.iterrows(), total=len(eco4_submissions)):
hn = row["NO."]
pc = row["Post Code"]
street = row["Street / Block Name"]
key = f"{hn}+{street}+{pc}"
if key in not_on_master:
continue
if key in eco3_remap:
hn, street, pc = eco3_remap[key]
# The postcode is different to the asse
# We filter the asset list, because it's hard to know how accurate this is
m1 = master_list[
(master_list["house_number_TEMP"].astype(str) == hn) &
(master_list["postcode"].str.lower() == pc.lower())
]
if m1.shape[0] != 1:
raise ValueError(
f"Error for {key} in the tracker"
)
eco4_lookup.append(
{
"row_id": row["row_id"],
"thrive_property_id": m1["thrive_property_id"].values[0],
"submission_house_number": row["NO."],
"submission_address1": row["Street / Block Name"],
"submission_postcode": row["Post Code"],
}
)
# We match the lookups back to the submission sheets
eco3_lookup = pd.DataFrame(eco3_lookup)
eco3_submissions = eco3_submissions.merge(
eco3_lookup,
how="left",
on="row_id",
)
eco4_lookup = pd.DataFrame(eco4_lookup)
eco4_submissions = eco4_submissions.merge(
eco4_lookup,
how="left",
on="row_id",
)
# Store
eco3_submissions.to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Submissions "
"ECO3 - with IDS.csv",
index=False
)
eco4_submissions.to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Submissions "
"ECO4 - with IDS.csv",
index=False
)

View file

@ -0,0 +1,130 @@
"""
THis script will take the standardised asset list and append on the project codes.
We also, review the existing install status, in case anything is wrong
"""
import pandas as pd
import numpy as np
standardised_asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - "
"Complete - Updated May 2025 - Standardised.xlsx",
sheet_name="Standardised Asset List",
)
project_code_allocations = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Master Tracker (Thrive - "
"Warmfront).xlsx",
sheet_name="Master Tracker",
header=1
)
programme_codes = project_code_allocations[
["UPRN", "Proposed Progamme", "New Proposed Programme", "Warmfront Survey Notes", ]
].copy()
programme_codes["programme_reference"] = programme_codes["New Proposed Programme"].copy()
programme_codes["programme_reference"] = np.where(
pd.isnull(programme_codes["programme_reference"]),
programme_codes["Proposed Progamme"],
programme_codes["programme_reference"]
)
PROJECT_CODE_MAP = {
'Phase 2': "THRIVE-002",
'Phase 3': "THRIVE-003",
'Phase 4': "THRIVE-004",
'Phase 5': "THRIVE-005",
'Phase 6': "THRIVE-006",
'Phase 7': "THRIVE-007",
'Phase 8': "THRIVE-008",
'Phase 9': "THRIVE-009",
'Phase 10': "THRIVE-010",
"Week 1": "THRIVE-WEEK-001",
"Week 2": "THRIVE-WEEK-002",
"Week 4": "THRIVE-WEEK-004",
"Week 7": "THRIVE-WEEK-007",
}
programme_codes["project_code"] = programme_codes["programme_reference"].map(PROJECT_CODE_MAP)
thrive_notes = project_code_allocations[["UPRN", "Thrive Notes", "Priority", "Notes Reply (Thrive)"]].copy()
standardised_asset_list = standardised_asset_list.merge(
programme_codes[["UPRN", "project_code", "programme_reference"]],
how="left",
left_on="landlord_property_id",
right_on="UPRN",
).merge(
thrive_notes[["UPRN", "Thrive Notes", "Priority", "Notes Reply (Thrive)"]],
how="left",
on="UPRN",
)
standardised_asset_list = standardised_asset_list.drop(columns=["UPRN"])
# We fill the project code for historical completions
standardised_asset_list["project_code"] = np.where(
pd.isnull(standardised_asset_list["project_code"]) & (
standardised_asset_list["hubspot_status"] != "READY TO BE SCHEDULED"
) & (
~pd.isnull(standardised_asset_list["hubspot_status"])
),
"THRIVE-HISTORICAL",
standardised_asset_list["project_code"]
)
# Store as an excel
filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Programme - "
"reconciled.xlsx")
# Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data
# Other tabs:
block_analysis = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - "
"Complete - Updated May 2025 - Standardised.xlsx",
sheet_name="Block Analysis",
)
outcomes = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - "
"Complete - Updated May 2025 - Standardised.xlsx",
sheet_name="Outcomes",
)
unmatched_submissions = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - "
"Complete - Updated May 2025 - Standardised.xlsx",
sheet_name="Unmatched Submissions",
)
unmatched_ecosurv = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - "
"Complete - Updated May 2025 - Standardised.xlsx",
sheet_name="Unmatched Ecosurv",
)
with pd.ExcelWriter(filename) as writer:
standardised_asset_list.to_excel(writer, sheet_name="Standardised Asset List", index=False)
block_analysis.to_excel(writer, sheet_name="Block Analysis", index=False)
# If we have outcomes, we add a tab with the outcomes
outcomes.to_excel(writer, sheet_name="Outcomes", index=False)
unmatched_submissions.to_excel(writer, sheet_name="Unmatched Submissions", index=False)
unmatched_ecosurv.to_excel(writer, sheet_name="Unmatched Ecosurv", index=False)
# A check, just comparing against the master tracker to make sure I have all of the installs
asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Thrive Asset List - "
"Complete - Updated May 2025 - Standardised.xlsx",
sheet_name="Standardised Asset List",
)
master_tracker = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Thrive/Programme Reconciliation/Master Tracker (Thrive - "
"Warmfront).xlsx",
sheet_name="Master Tracker",
header=1
)
df = asset_list[["landlord_property_id", "hubspot_status"]].merge(
master_tracker[~pd.isnull(master_tracker['Date Completed'])][["UPRN", "Date Completed"]],
how="inner",
left_on="landlord_property_id",
right_on="UPRN"
)
df["hubspot_status"].value_counts()
df[df["hubspot_status"] == "SUBMITTED TO INSTALLER"]

View file

@ -49,6 +49,6 @@ class TestLightingRecommendations:
'lighting in all '
'fixed outlets',
'low-energy-lighting': 100},
'total': 240.24, 'subtotal': 200.20000000000002, 'vat': 40.040000000000006, 'contingency': 14.3,
'preliminaries': 14.3, 'material': 80.0, 'profit': 28.6, 'labour_hours': 3.2, 'labour_days': 0.4,
'labour_cost': 63.0, 'survey': False}]
'total': 188.76000000000002, 'subtotal': 157.3, 'vat': 31.460000000000004, 'contingency': 14.3,
'material': 80.0, 'labour_hours': 3.2, 'labour_days': 0.4, 'labour_cost': 63.0, 'survey': False}
]