Merge pull request #380 from Hestia-Homes/main

Major deployment
This commit is contained in:
KhalimCK 2025-04-14 12:03:40 +01:00 committed by GitHub
commit 4b9ebb008f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
101 changed files with 19989 additions and 568 deletions

9
.gitignore vendored
View file

@ -268,4 +268,11 @@ adhoc
adhoc/*
etl-router-venv/
refactor_datasets/
refactor_datasets/
etl/eligibility/ha_15_32/
cache/
*/.idea
*.png
*.pptx

6
.idea/terraform.xml generated Normal file
View file

@ -0,0 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="TerraformProjectSettings">
<option name="toolPath" value="/opt/homebrew/bin/terraform" />
</component>
</project>

2436
asset_list/AssetList.py Normal file

File diff suppressed because it is too large Load diff

178
asset_list/DataMapper.py Normal file
View file

@ -0,0 +1,178 @@
# OpenAI API Key (set this in your environment variables for security)
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
class DataRemapper:
def __init__(self, standard_values, standard_map=None, max_tokens=1000):
"""
Initialize the remapper with standard values and a predefined mapping.
:param standard_values: Set of allowed standardized values.
:param standard_map: Dictionary of common remappings {raw_value: standard_value}.
"""
self.standard_values = standard_values
self.standard_map = standard_map
self.fuzzy_threshold = 90 # Adjust fuzzy matching sensitivity
self.ai_model = "gpt-4-turbo" # Use gpt-3.5-turbo for cheaper processing
# Tokenizer for counting tokens
self.tokenizer = tiktoken.encoding_for_model(self.ai_model)
# Track token usage and remap dictionary
self.total_tokens_used = 0
self.total_cost = 0
self.remap_dict = {} # {original_value: standardized_value}
self.max_tokens = max_tokens # Limit for OpenAI API
# Memoization for AI calls
self.ai_cache = {} # {tuple(unmapped_values): {original_value: standardized_value}}
# Capture the reponse for debugging
self.ai_response = None
# OpenAI pricing (as of Feb 2024)
self.pricing = {
"gpt-4-turbo": {"input": 0.01 / 1000, "output": 0.03 / 1000},
"gpt-3.5-turbo": {"input": 0.0015 / 1000, "output": 0.002 / 1000},
}
self.openai_client = OpenAI(api_key=OPENAI_API_KEY)
@staticmethod
def clean_string(text):
"""Basic text cleaning: remove extra spaces, punctuation, and normalize case."""
if not isinstance(text, str):
return None
text = text.strip().lower()
text = re.sub(r'[^\w\s]', '', text) # Remove punctuation
# Replace double strings
text = re.sub(r'\s+', ' ', text)
return text
def fuzzy_match(self, text):
"""Use fuzzy matching to find the closest standard value."""
match, score = process.extractOne(text, self.standard_values) if text else (None, 0)
return match if score >= self.fuzzy_threshold else None
def count_tokens(self, text):
"""Estimate the number of tokens in a given text."""
return len(self.tokenizer.encode(text)) if text else 0
def ai_standardize(self, unmapped_values):
"""Call OpenAI API **once** for all unmapped values to minimize cost, with memoization."""
if not unmapped_values:
return {}
unmapped_tuple = tuple(sorted(unmapped_values)) # Ensure consistency for memoization
if unmapped_tuple in self.ai_cache:
return self.ai_cache[unmapped_tuple] # Return memoized result
prompt = f"""
You are an expert in data classification. Standardize each of these values into one of the categories:
{list(self.standard_values)}.
Return only a JSON dictionary where:
- The keys are the original values.
- The values are the standardized ones.
Strictly return JSON **without markdown formatting** or extra text.
Example Output:
{{
"BLKHOUS": "block house",
"BEDSIT": "bedsit"
}}
Values to standardize:
{unmapped_values}
"""
# Count input tokens
input_tokens = self.count_tokens(prompt)
if input_tokens > self.max_tokens:
raise ValueError("Input tokens exceed the maximum limit.")
logger.info("Calling OpenAI API for standardization...")
response = self.openai_client.chat.completions.create(
model=self.ai_model,
messages=[{"role": "user", "content": prompt}],
max_tokens=self.max_tokens,
temperature=0.1,
)
output_text = response.choices[0].message.content.strip()
output_tokens = self.count_tokens(output_text) # Count output tokens
# Track total token usage
self.total_tokens_used += input_tokens + output_tokens
# Estimate cost
input_cost = input_tokens * self.pricing[self.ai_model]["input"]
output_cost = output_tokens * self.pricing[self.ai_model]["output"]
self.total_cost += input_cost + output_cost
try:
# Parse response as dictionary
mapping = eval(output_text) # OpenAI should return a valid dictionary
except:
mapping = {val: "unknown" for val in unmapped_values} # Fallback
# Memoize the AI response
self.ai_cache[unmapped_tuple] = mapping
# We store the raw AI response for debugging
logger.debug(f"AI Response: {mapping}")
self.ai_response = output_text
return mapping
def standardize_list(self, values_to_remap):
"""
Standardizes a list of values and returns a dictionary {original_value: standardized_value}.
:param values_to_remap: List of raw values to standardize.
:return: Dictionary {original_value: standardized_value}.
"""
unique_values = set(values_to_remap) # Process only unique values
unmapped_values = []
for value in unique_values:
if pd.isna(value): # Handle NaN values
self.remap_dict[value] = "unknown"
continue
cleaned_value = self.clean_string(value)
# Rule-Based Check (Predefined Mapping)
if cleaned_value in self.standard_map or value in self.standard_map:
self.remap_dict[value] = (
self.standard_map[cleaned_value] if cleaned_value in self.standard_map else self.standard_map[value]
)
continue
if value.lower() in self.standard_map:
self.remap_dict[value] = self.standard_map[value.lower()]
continue
# Exact Match in Standard Values
if cleaned_value in self.standard_values:
self.remap_dict[value] = cleaned_value
continue
# Fuzzy Matching
fuzzy_match = self.fuzzy_match(cleaned_value)
if fuzzy_match:
self.remap_dict[value] = fuzzy_match
continue
# Capture anything that wasn't mapped
unmapped_values.append(value)
# AI Model - remap anything unmapped (batch request)
ai_mapping = self.ai_standardize(unmapped_values)
self.remap_dict.update(ai_mapping)
return self.remap_dict
def report_usage(self):
"""Prints a summary of token usage and cost."""
print(f"\n🔹 Total Tokens Used: {self.total_tokens_used}")
print(f"💰 Estimated Cost: ${self.total_cost:.4f}")

953
asset_list/app.py Normal file
View file

@ -0,0 +1,953 @@
import os
import json
import pandas as pd
from pprint import pprint
import msgpack
from utils.s3 import read_from_s3
from asset_list.AssetList import AssetList
from asset_list.mappings.property_type import PROPERTY_MAPPING
from asset_list.mappings.built_form import BUILT_FORM_MAPPINGS
from asset_list.mappings.walls import WALL_CONSTRUCTION_MAPPINGS
from asset_list.mappings.heating_systems import HEATING_MAPPINGS
from asset_list.mappings.exising_pv import EXISTING_PV_MAPPINGS
from asset_list.mappings.roof import ROOF_CONSTRUCTION_MAPPINGS
from asset_list.utils import get_data
from dotenv import load_dotenv
from backend.SearchEpc import SearchEpc
load_dotenv(dotenv_path="backend/.env")
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
def extract_address1(asset_list, full_address_col, postcode_col, method="first_two_words"):
if method == "first_two_words":
asset_list["address1_extracted"] = asset_list[full_address_col].str.split(" ").str[:2].str.join(" ")
return asset_list
if method == "first_word":
asset_list["address1_extracted"] = asset_list[full_address_col].str.split(" ").str[0]
return asset_list
if method == "house_number_extraction":
asset_list["address1_extracted"] = asset_list.apply(
lambda x: SearchEpc.get_house_number(address=x[full_address_col], postcode=x[postcode_col]),
axis=1
)
return asset_list
raise ValueError(f"Method {method} not recognized")
def app():
"""
This app is EPC pulling data for some properties owned by Livewest
Data request contents:
Date of last EPC
Reason for EPC
SAP score on register
Property Type
Property Area
Property Age
Any Dimensions (HLP,PW,RH)
Property Wall Construction
Heating Type
Secondary Heating
Loft Insulation Depth
Additional if possible:
Heat loss calculations
EPC recommendations
Property UPRN
"""
# TODO:
# For cavity work:
# - Flag any entries that have a different wall type between non-intrusive data against EPC
# - Worth double checking entries that have a difference in wall construction
# - Look at anything that is flagged as an empty cavity but the EPC data says its a filled cavity
# - Look at the current EPC scores - Anything that is C75 or above, especially if its assumed no insulation
# - By postcode, we can try and deduce if all of the addresses are a flats and then estimate if 50% of the flats
# are less than C75
# - Flag anything pre SAP2012
# - Flag anything over 5 years old
# - Look at year built vs age band
#
# For Solar:
# - Discount any that have solar PV - based on non-intrusives and from the inspections team
# - In the heating, discount anything that isnt ashp, ghsp, hhrs, electric storage - possibly homes with
# electric room heaters but it might need to be an EPC E
# - Fabric - check the floor, wall and roof:
# - Filled or empty cavity is good
# - Insulated solid/timber/system built is good
# - SCIS/CEG needs solid floors
# - JJC dont care
# - Anything with a loft 200 or below
# - Anything C75 and above wont qualify
# - Insulated loft = 200mm
# - We want: fully insulated property (all wall types), EPC D or below (floors should be solid)
# - Or the insulation required is loft/cavity (floors should be solid)
# Bromford
data_folder = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme "
"Rebuild/Prepared data/")
data_filename = "asset_list.xlsx"
sheet_name = "Sheet1"
postcode_column = 'PostCode'
fulladdress_column = "FullAddress"
address1_column = None
address1_method = "house_number_extraction"
address_cols_to_concat = []
missing_postcodes_method = None
landlord_year_built = "ConYear"
landlord_os_uprn = None
landlord_property_type = "AssetTypeDesc"
landlord_built_form = "PropTypeDesc"
landlord_wall_construction = "Construction type"
landlord_roof_construction = None
landlord_heating_system = "Heating Type"
landlord_existing_pv = None
landlord_property_id = "Asset"
landlord_sap = None
outcomes_filename = "outcomes.xlsx"
outcomes_sheetname = "Sheet1"
outcomes_postcode = "Postcode"
outcomes_houseno = "No"
outcomes_id = None
outcomes_address = "Address"
master_filepaths = [
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Prepared data/ECO "
"3 submissions.csv",
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Prepared data/ECO "
"4 submissions.csv",
]
master_to_asset_list_filepath = None
phase = False
# Torus
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Torus/Phase 1"
data_filename = "Torus Property Asset List - Phase 1.xlsx"
sheet_name = "TORUS"
postcode_column = 'Postcode'
fulladdress_column = None
address1_column = "AddressLine1"
address1_method = None
address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"]
missing_postcodes_method = None
landlord_year_built = "Property Age"
landlord_os_uprn = "NatUPRN"
landlord_property_type = "Property Type"
landlord_built_form = "Built Form"
landlord_wall_construction = "Wall Construction"
landlord_roof_construction = "Roof Construction"
landlord_heating_system = "Space Heating Source"
landlord_existing_pv = "Low Carbon Technology (Solar PV)"
landlord_property_id = "UPRN"
landlord_sap = "SAP Score"
outcomes_filename = None
outcomes_sheetname = None
outcomes_postcode = None
outcomes_houseno = None
outcomes_id = None
outcomes_address = None
master_filepaths = []
master_to_asset_list_filepath = None
phase = True
# Ealing - houses
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing"
data_filename = "Ealing_rechecked_cleaned_05042025.csv"
sheet_name = None
postcode_column = 'Postcode'
fulladdress_column = "Address"
address1_column = None
address1_method = "house_number_extraction"
address_cols_to_concat = []
missing_postcodes_method = None
landlord_year_built = "Year Built"
landlord_os_uprn = None
landlord_property_type = "Property Type Code"
landlord_built_form = None
landlord_wall_construction = None
landlord_heating_system = None
landlord_existing_pv = None
landlord_property_id = "Property ref"
outcomes_filename = None
outcomes_sheetname = None
outcomes_postcode = None
outcomes_houseno = None
outcomes_id = None
outcomes_address = None
master_filepaths = []
master_to_asset_list_filepath = None
# Southern Midlands
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Southern/Midlands Properties - Apr 2025"
data_filename = "Southern Housing Midlands Property List - combined.xlsx"
sheet_name = "Sheet 1"
postcode_column = 'Post Code'
fulladdress_column = "Address"
address1_column = None
address1_method = "house_number_extraction"
address_cols_to_concat = []
missing_postcodes_method = None
landlord_year_built = "Age_1"
landlord_os_uprn = None
landlord_property_type = "Prop_Type"
landlord_built_form = "Prop_Type"
landlord_wall_construction = "Walls_P"
landlord_heating_system = "Heating System"
landlord_existing_pv = None
landlord_property_id = "AssetID"
outcomes_filename = None
outcomes_sheetname = None
outcomes_postcode = None
outcomes_houseno = None
outcomes_id = None
outcomes_address = None
master_filepaths = []
master_to_asset_list_filepath = None
# Live West (2018 Asset list)
data_folder = (
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March 2025/2018 Asset List"
)
data_filename = "LIVEWEST STOCK - 23rd October 2018.xlsx"
sheet_name = "Assets"
postcode_column = 'Postcode'
fulladdress_column = "Address"
address1_column = None
address1_method = "house_number_extraction"
address_cols_to_concat = []
missing_postcodes_method = None
landlord_year_built = "Build Year"
landlord_os_uprn = None
landlord_property_type = "Property Archetype"
landlord_built_form = None
landlord_wall_construction = None
landlord_heating_system = "Heating Fuel Type"
landlord_existing_pv = None
landlord_property_id = "Uprn - DO NOT DELETE"
outcomes_filename = "RT - LiveWest.xlsx"
outcomes_sheetname = "Feedback"
outcomes_postcode = "Poscode"
outcomes_houseno = "No."
outcomes_id = "UPRN"
master_filepaths = [
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March 2025/Rolling Master "
"- redacted for analysis/CAVITY-Table 1.csv"
]
master_to_asset_list_filepath = None
# Live West (South West asset list)
data_folder = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March "
"2025/Livewest Asset List (Original) - csv")
data_filename = "Report-Table 1.csv"
sheet_name = None
postcode_column = 'Postcode'
fulladdress_column = "T1_Address"
address1_column = None
address1_method = "house_number_extraction"
address_cols_to_concat = []
missing_postcodes_method = None
landlord_year_built = "Build Yr"
landlord_os_uprn = None
landlord_property_type = "T1_AssetType"
landlord_built_form = "T1_AssetType"
landlord_wall_construction = "Wall Type Cavity"
landlord_heating_system = "Heating Fuel"
landlord_existing_pv = None
landlord_property_id = "T1_UPRN"
outcomes_filename = "RT - LiveWest.xlsx"
outcomes_sheetname = "Feedback"
outcomes_postcode = "Poscode"
outcomes_houseno = "No."
outcomes_id = "UPRN"
master_filepaths = [
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Livewest/Programme Update - March 2025/Rolling Master "
"- redacted for analysis/CAVITY-Table 1.csv"
]
master_to_asset_list_filepath = None
# PFP London
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/London"
data_filename = "PFP AREAS SURROUNDING LONDON - JAY, RUTH & LANE.xlsx"
sheet_name = "PFP SURROUNDING LONDON"
postcode_column = 'Postcode'
fulladdress_column = None
address1_column = "AddressLine1"
address1_method = None
address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"]
missing_postcodes_method = None
landlord_year_built = None
landlord_os_uprn = None
landlord_property_type = "Archetype (PFP)"
landlord_built_form = "Archetype (PFP)"
landlord_wall_construction = None
landlord_heating_system = None
landlord_existing_pv = None
landlord_property_id = "Uprn"
outcomes_filename = None
outcomes_sheetname = None
outcomes_postcode = None
outcomes_houseno = None
outcomes_id = None
master_filepaths = []
master_to_asset_list_filepath = None
# PFP North-West
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/North-West"
data_filename = "Places for People NORTH WEST - INSPECTIONS MASTER - UPDATE.xlsx"
sheet_name = "CHECKED"
postcode_column = 'Postcode'
fulladdress_column = None
address1_column = "AddressLine1"
address1_method = None
address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"]
missing_postcodes_method = None
landlord_year_built = None
landlord_os_uprn = None
landlord_property_type = "Archetype (PFP)"
landlord_built_form = "Archetype (PFP)"
landlord_wall_construction = None
landlord_heating_system = None
landlord_existing_pv = None
landlord_property_id = "Uprn"
outcomes_filename = None
outcomes_sheetname = None
outcomes_postcode = None
outcomes_houseno = None
outcomes_id = None
master_filepaths = []
master_to_asset_list_filepath = None
# PFP North-East
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/North-East"
data_filename = "Places for People NORTH EAST - INSPECTIONS MASTER.xlsx"
sheet_name = "CHECKED"
postcode_column = 'Postcode'
fulladdress_column = None
address1_column = "AddressLine1"
address1_method = None
address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"]
missing_postcodes_method = None
landlord_year_built = None
landlord_os_uprn = None
landlord_property_type = "Archetype (PFP)"
landlord_built_form = "Archetype (PFP)"
landlord_wall_construction = None
landlord_heating_system = None
landlord_existing_pv = None
landlord_property_id = "Uprn"
outcomes_filename = None
outcomes_sheetname = None
outcomes_postcode = None
outcomes_houseno = None
outcomes_id = None
master_filepaths = []
master_to_asset_list_filepath = None
# PFP East
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Places For People/East"
data_filename = "PFP EAST - Master - DN LN NG NR PE POSTCODES.xlsx"
sheet_name = "PFP EAST"
postcode_column = 'Postcode'
fulladdress_column = None
address1_column = "AddressLine1"
address1_method = None
address_cols_to_concat = ["AddressLine1", "AddressLine2", "AddressLine3"]
missing_postcodes_method = None
landlord_year_built = None
landlord_os_uprn = None
landlord_property_type = "Archetype (PFP)"
landlord_built_form = "Archetype (PFP)"
landlord_wall_construction = None
landlord_heating_system = None
landlord_existing_pv = None
landlord_property_id = "Uprn"
outcomes_filename = None
outcomes_sheetname = None
outcomes_postcode = None
outcomes_houseno = None
outcomes_id = None
master_filepaths = []
master_to_asset_list_filepath = None
# Wates
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Wates - "
data_filename = "ECO 4 Wates.xlsx"
sheet_name = "Roadmap Homes"
postcode_column = 'Postcode'
fulladdress_column = None
address1_column = "Address Line 1"
address1_method = None
address_cols_to_concat = ["Address Line 1", "Address Line 2", "Address Line 3"]
missing_postcodes_method = None
landlord_year_built = "Build Year"
landlord_os_uprn = None
landlord_property_type = "Archetype"
landlord_built_form = "Archetype"
landlord_wall_construction = "Wall"
landlord_heating_system = "Heating Type"
landlord_existing_pv = None
landlord_property_id = "UPRN"
outcomes_filename = None
outcomes_sheetname = None
outcomes_postcode = None
outcomes_houseno = None
master_filepaths = []
master_to_asset_list_filepath = None
# Ealing
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Ealing/Programme data - 04032025"
# data_filename = "Ealing BC - Property Plus Tenure 25.02.2025.xlsx"
# sheet_name = "IGNORE - FULL MAIN"
# postcode_column = 'Postcode'
# fulladdress_column = "Address"
# address1_column = None
# address1_method = "first_word"
# address_cols_to_concat = []
# missing_postcodes_method = None
# landlord_year_built = "Year Built"
# landlord_os_uprn = None
# landlord_property_type = "Property Type Code"
# landlord_wall_construction = None
# landlord_heating_system = None
# landlord_existing_pv = None
# landlord_property_id = "Property ref"
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Colchester"
# data_filename = "Warmfront data- Colchester Borough Homes (Complete).xlsx"
# sheet_name = "Sheet1"
# postcode_column = 'Full Address.1'
# fulladdress_column = "Full Address"
# address1_column = None
# address1_method = "first_word"
# address_cols_to_concat = []
# missing_postcodes_method = None
# landlord_year_built = "Build Date"
# landlord_os_uprn = None
# landlord_property_type = "Property Type"
# landlord_wall_construction = "Wallinsul"
# landlord_heating_system = "HeatSorc"
# landlord_existing_pv = None
# landlord_property_id = "Property Reference"
# outcomes_filename = None
# outcomes_sheetname = None
# outcomes_postcode = None
# outcomes_houseno = None
# master_filepaths = []
# master_to_asset_list_filepath = None
# For Westward
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Westward"
data_filename = "WESTWARD - completed list - 20.03.2025.xlsx"
sheet_name = "Sheet1"
postcode_column = "WFT EDIT Postcode"
fulladdress_column = "Address"
address1_column = None
address1_method = "house_number_extraction"
address_cols_to_concat = []
missing_postcodes_method = None
landlord_year_built = "Build date"
landlord_os_uprn = "UPRN"
landlord_property_type = "Location type"
landlord_built_form = None
landlord_wall_construction = "Wall Construction (EPC)"
landlord_heating_system = "Heat Source"
landlord_existing_pv = "PV (Y/N)"
landlord_property_id = "Place ref"
outcomes_filename = None
outcomes_sheetname = None
outcomes_postcode = None
outcomes_houseno = None
master_filepaths = []
master_to_asset_list_filepath = None
outcomes_id = None
# For ACIS - programme re-build
# data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/ACIS/ACIS Full Programme Review March 2025"
# data_filename = "ACIS asset list.xlsx"
# sheet_name = "Assets"
# address1_column = "House No"
# postcode_column = "Postcode"
# landlord_property_id = "UPRN"
# fulladdress_column = None
# address_cols_to_concat = ["House No", "Street", "Town"]
# missing_postcodes_method = None
# address1_method = None
# landlord_year_built = "YEAR BUILT"
# landlord_os_uprn = None
# landlord_property_type = "Property type"
# landlord_built_form = None
# landlord_wall_construction = "Wall Constuction"
# landlord_heating_system = "Heating"
# landlord_existing_pv = None
# outcomes_filename = "ACIS Group - 25.11.2024 - outcomes.xlsx"
# outcomes_sheetname = "Feedback"
# outcomes_postcode = "Postcode"
# outcomes_houseno = "No"
# master_filepaths = [
# os.path.join(data_folder, "ECO 3 -Table 1.csv"),
# os.path.join(data_folder, "ECO 4 -Table 1.csv"),
# ]
# master_to_asset_list_filepath = None
# For plus dane
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Plus Dane"
data_filename = "PLUS DANE Asset List - for analysis.xlsx"
sheet_name = "Asset List"
address1_column = " Address"
postcode_column = " Postcode"
landlord_property_id = "UPRN"
fulladdress_column = " Address"
address_cols_to_concat = []
missing_postcodes_method = None
address1_method = None
landlord_year_built = "Property Age"
landlord_os_uprn = None
landlord_property_type = "Property Type"
landlord_wall_construction = "Landlord Wall Full"
landlord_heating_system = "Landlord Heating"
landlord_existing_pv = None
outcomes_filename = "plus dane outcomes.xlsx"
outcomes_sheetname = "EVERYTHING"
outcomes_postcode = "Post Code"
outcomes_houseno = "Numb."
master_filepaths = [
os.path.join(data_folder, "JJC Rolling Master.csv"),
os.path.join(data_folder, "SCIS Rolling Master.csv"),
]
master_to_asset_list_filepath = os.path.join(data_folder, "surveys_to_assets.csv")
# Maps addresses to uprn in problematic cases
manual_uprn_map = {}
asset_list = AssetList(
local_filepath=os.path.join(data_folder, data_filename),
header=0,
sheet_name=sheet_name,
address1_colname=address1_column,
postcode_colname=postcode_column,
landlord_property_id=landlord_property_id,
full_address_colname=fulladdress_column,
full_address_cols_to_concat=address_cols_to_concat,
missing_postcodes_method=missing_postcodes_method,
address1_extraction_method=address1_method,
landlord_year_built=landlord_year_built,
landlord_uprn=landlord_os_uprn,
landlord_property_type=landlord_property_type,
landlord_built_form=landlord_built_form,
landlord_wall_construction=landlord_wall_construction,
landlord_roof_construction=landlord_roof_construction,
landlord_heating_system=landlord_heating_system,
landlord_existing_pv=landlord_existing_pv,
landlord_sap=landlord_sap,
phase=phase
)
asset_list.init_standardise()
# We produce the new maps, which can be saved for future useage
new_property_type_map = {
k: v for k, v in (
asset_list.variable_mappings[asset_list.landlord_property_type] if
asset_list.landlord_property_type else {}
).items()
if k not in PROPERTY_MAPPING
}
new_built_form_map = {
k: v for k, v in (
asset_list.variable_mappings[asset_list.landlord_built_form] if
asset_list.landlord_built_form else {}
).items()
if k not in BUILT_FORM_MAPPINGS
}
new_wall_map = {
k: v for k, v in (
asset_list.variable_mappings[asset_list.landlord_wall_construction] if
asset_list.landlord_wall_construction else {}
).items()
if k not in WALL_CONSTRUCTION_MAPPINGS
}
new_heating_map = {
k: v for k, v in (
asset_list.variable_mappings[asset_list.landlord_heating_system] if
asset_list.landlord_heating_system else {}
).items()
if k not in HEATING_MAPPINGS
}
new_existing_pv_map = {
k: v for k, v in (
asset_list.variable_mappings[asset_list.landlord_existing_pv] if asset_list.landlord_existing_pv else {}
).items()
if k not in EXISTING_PV_MAPPINGS
}
new_roof_construction_map = {
k: v for k, v in (
asset_list.variable_mappings[asset_list.landlord_roof_construction] if
asset_list.landlord_roof_construction else {}
).items()
if k not in ROOF_CONSTRUCTION_MAPPINGS
}
asset_list.apply_standardiation()
# We now flag properties that have been treated under existing programmes
asset_list.flag_outcomes(
outcomes_filepath=os.path.join(data_folder, outcomes_filename) if outcomes_filename else None,
outcomes_sheetname=outcomes_sheetname,
outcomes_address=outcomes_address,
outcomes_postcode=outcomes_postcode,
outcomes_houseno=outcomes_houseno,
outcomes_id=outcomes_id
)
asset_list.flag_survey_master(
master_filepaths=master_filepaths,
master_to_asset_list_filepath=master_to_asset_list_filepath
)
### We retrieve the EPC data
# We chunk up this data into 5000 rows at a time
# Create the chunks directory
epc_api_only = False
force_retrieve_data = False
skip = None # Used to skip already completed chunks
chunk_size = 1000
filename = "Chunk {i}.csv"
download_folder = os.path.join(data_folder, "Chunks")
if not os.path.exists(download_folder):
os.makedirs(download_folder)
chunk_indexes = list(range(0, len(asset_list.standardised_asset_list), chunk_size))
downloaded_files = {filename.format(i=i) for i in chunk_indexes}
# We check if we have files associated to these files already and if we do, and we do not want to force the
# fetching of the data, we skip
folder_contents = os.listdir(download_folder)
if all(x in folder_contents for x in downloaded_files):
skip = max(chunk_indexes)
if any(x in folder_contents for x in downloaded_files):
skip = max([i for i in chunk_indexes if filename.format(i=i) in folder_contents])
for i in range(0, len(asset_list.standardised_asset_list), chunk_size):
print(f"Processing chunk {i} to {i + chunk_size}")
if skip is not None and not force_retrieve_data:
if i <= skip:
continue
chunk = asset_list.standardised_asset_list[i:i + chunk_size]
epc_data_chunk, errors_chunk, no_epc_chunk = get_data(
df=chunk,
row_id_name=asset_list.DOMNA_PROPERTY_ID,
uprn_column=AssetList.STANDARD_UPRN,
fulladdress_column=AssetList.STANDARD_FULL_ADDRESS,
address1_column=AssetList.STANDARD_ADDRESS_1,
postcode_column=AssetList.STANDARD_POSTCODE,
property_type_column=AssetList.STANDARD_PROPERTY_TYPE,
built_form_column=AssetList.STANDARD_BUILT_FORM,
manual_uprn_map=manual_uprn_map,
epc_api_only=epc_api_only,
epc_auth_token=EPC_AUTH_TOKEN
)
# We now retrieve any failed properties
chunk_failed = chunk[chunk[asset_list.DOMNA_PROPERTY_ID].isin(errors_chunk)]
epc_data_failed, _, _ = get_data(
df=chunk_failed,
row_id_name=asset_list.DOMNA_PROPERTY_ID,
uprn_column=AssetList.STANDARD_UPRN,
fulladdress_column=AssetList.STANDARD_FULL_ADDRESS,
address1_column=AssetList.STANDARD_ADDRESS_1,
postcode_column=AssetList.STANDARD_POSTCODE,
property_type_column=AssetList.STANDARD_PROPERTY_TYPE,
built_form_column=AssetList.STANDARD_BUILT_FORM,
manual_uprn_map=manual_uprn_map,
epc_api_only=epc_api_only,
epc_auth_token=EPC_AUTH_TOKEN
)
epc_data_chunk.extend(epc_data_failed)
# Append the failed data to the main data
# Store the chunk locally as a csv
pd.DataFrame(epc_data_chunk).to_csv(os.path.join(data_folder, f"Chunks/Chunk {i}.csv"), index=False)
# Store the errors and no-data locally
with open(os.path.join(data_folder, f"Chunks/Chunk {i} errors.json"), "w") as f:
json.dump(errors_chunk, f)
with open(os.path.join(data_folder, f"Chunks/Chunk {i} nodata.csv"), "w") as f:
json.dump(no_epc_chunk, f)
# We read in and concatenate the created created chunks
# List the contents
epc_data = []
for file in downloaded_files:
csv_data = pd.read_csv(os.path.join(download_folder, file))
# We need to convert the recommendations back to a list
csv_data["recommendations"] = csv_data["recommendations"].apply(eval)
# We don't have this if we didn't run the pulling from find my epc
if "find_my_epc_data" in csv_data.columns:
csv_data["find_my_epc_data"] = csv_data["find_my_epc_data"].apply(eval)
epc_data.append(csv_data)
epc_df = pd.concat(epc_data)
epc_df["estimated"] = epc_df["estimated"].fillna(False)
# We expand out the recommendations
recommendations_df = epc_df[[asset_list.DOMNA_PROPERTY_ID, "recommendations"]]
unique_recommendations = set()
for _, row in recommendations_df.iterrows():
unique_recommendations.update([rec["improvement-summary-text"] for rec in row["recommendations"]])
columns = [asset_list.DOMNA_PROPERTY_ID] + list(unique_recommendations)
transformed_data = []
for _, row in recommendations_df.iterrows():
# Initialize a dictionary for this row with False for all recommendations
row_data = {col: False for col in columns}
row_data[asset_list.DOMNA_PROPERTY_ID] = row[asset_list.DOMNA_PROPERTY_ID]
# Set True for each recommendation present in this row
for rec in row["recommendations"]:
recommendation_text = rec["improvement-summary-text"]
row_data[recommendation_text] = True
# Append the row data to transformed_data
transformed_data.append(row_data)
transformed_df = pd.DataFrame(transformed_data)
transformed_df = transformed_df[
[
asset_list.DOMNA_PROPERTY_ID, "Floor insulation (solid floor)",
"Floor insulation", "Floor insulation (suspended floor)"
]
]
transformed_df["epc_has_floor_recommendation"] = (
transformed_df["Floor insulation (solid floor)"] | transformed_df["Floor insulation"] |
transformed_df["Floor insulation (suspended floor)"]
)
# Get the find my epc data
if "find_my_epc_data" not in epc_df.columns:
epc_df["find_my_epc_data"] = None
find_my_epc_data = []
for _, x in epc_df.iterrows():
if x["find_my_epc_data"]:
find_my_epc_data.append(
{
asset_list.DOMNA_PROPERTY_ID: x[asset_list.DOMNA_PROPERTY_ID],
**x["find_my_epc_data"]
}
)
else:
find_my_epc_data.append(
{
asset_list.DOMNA_PROPERTY_ID: x[asset_list.DOMNA_PROPERTY_ID]
}
)
find_my_epc_data = pd.DataFrame(find_my_epc_data)
find_my_epc_data = find_my_epc_data.merge(
transformed_df[[asset_list.DOMNA_PROPERTY_ID, "epc_has_floor_recommendation"]],
how="left", on=asset_list.DOMNA_PROPERTY_ID
)
# We check if we get the solar pv column:
if "Solar photovoltaics" not in find_my_epc_data.columns:
find_my_epc_data["Solar photovoltaics"] = False
# Retrieve just the data we need
epc_df = epc_df[
[asset_list.DOMNA_PROPERTY_ID] + list(asset_list.EPC_API_DATA_NAMES.keys())
].rename(
columns=asset_list.EPC_API_DATA_NAMES
)
# Look for columns not in the find my EPC data, which will have happened if we didn't
# retrieve it in the first place
missed_find_epc_cols = [c for c in list(asset_list.FIND_EPC_DATA_NAMES.keys()) if c not in find_my_epc_data.columns]
if missed_find_epc_cols:
for c in missed_find_epc_cols:
find_my_epc_data[c] = None
epc_df = epc_df.merge(
find_my_epc_data[
[asset_list.DOMNA_PROPERTY_ID, "epc_has_floor_recommendation"] + list(asset_list.FIND_EPC_DATA_NAMES.keys())
]
.rename(columns=asset_list.FIND_EPC_DATA_NAMES),
how="left",
on=asset_list.DOMNA_PROPERTY_ID
)
asset_list.merge_data(epc_df)
asset_list.extract_attributes()
cleaned = read_from_s3(
s3_file_name="cleaned_epc_data/cleaned.bson",
bucket_name="retrofit-data-dev"
)
cleaned = msgpack.unpackb(cleaned, raw=False)
asset_list.identify_worktypes(cleaned)
pprint(asset_list.work_type_figures)
asset_list.flat_analysis()
################################################################
# WESTWARD - comparison between Kieran's method & automated
################################################################
# Check 1)
cavity_fills = pd.read_excel(
os.path.join(data_folder, "WESTWARD - Route March Prep.xlsx"),
sheet_name="Straight Fill"
)
cavity_fills = cavity_fills.merge(
asset_list.standardised_asset_list[
[asset_list.STANDARD_LANDLORD_PROPERTY_ID, "cavity_reason"]
],
how="left",
left_on=asset_list.landlord_property_id,
right_on=asset_list.STANDARD_LANDLORD_PROPERTY_ID
)
cavity_fills["cavity_reason"] = cavity_fills["cavity_reason"].fillna("Not identified")
print(cavity_fills["cavity_reason"].value_counts())
# Didn't identify 3 properties because they're bedsits
# 4 properties were identified, not based on the non-intrusives but instead because
# Westward said they were built in 2003/2007. Have adjusted this to use the age from the
# epc as well, as EPC says 1975 and they look like 1975 properties
# 37 properties flagged as already having solar - these are all because the landlord said they have solar
# e.g.
# https://earth.google.com/web/search/11+Winsland+Avenue+TOTNES+TQ9+5FT/@50.43354465,-3.71318276,46.57468503a,
# 59.14004365d,35y,0h,0t,
# 0r/data=CpABGmISXAolMHg0ODZkMWQxOGE4NWRiZjdkOjB4YjBhM2E5M2Q3YWVlMWEwYhlZYgp7fzdJQCHFfC9027QNwCohMTEgV2luc2xhbmQgQXZlbnVlIFRPVE5FUyBUUTkgNUZUGAIgASImCiQJbxsQEoo3SUARXQcp_HE3SUAZBmiZGJ6yDcAhCA0fqq63DcBCAggBOgMKATBCAggASg0I____________ARAA
# https://earth.google.com/web/search/15+St+Anne%27s+Ct,+Newton+Abbot+TQ12+1TL/@50.53068337,-3.61611128,
# 11.74908956a,135.73212429d,35y,0h,0t,
# 0r/data=CpUBGmcSYQolMHg0ODZkMDVkMjFhODhjZjgxOjB4MjBmMzE2Zjc3MGI2NGMwYxlCxHLw8UNJQCFZqyzALe4MwComMTUgU3QgQW5uZSdzIEN0LCBOZXd0b24gQWJib3QgVFExMiAxVEwYAiABIiYKJAm-r6U2iDdJQBHS5ICRdDdJQBmYGVpmiLINwCG8wcrtqbYNwEICCAE6AwoBMEICCABKDQj___________8BEAA
# Check 2)
cavity_fills_with_solar = pd.read_excel(
os.path.join(data_folder, "WESTWARD - Route March Prep.xlsx"),
sheet_name="Solar PV - Straight Fill"
)
cavity_fills_with_solar = cavity_fills_with_solar.merge(
asset_list.standardised_asset_list[
[asset_list.STANDARD_LANDLORD_PROPERTY_ID, "cavity_reason"]
],
how="left",
left_on=asset_list.landlord_property_id,
right_on=asset_list.STANDARD_LANDLORD_PROPERTY_ID
)
cavity_fills_with_solar["cavity_reason"] = cavity_fills_with_solar["cavity_reason"].fillna("Not identified")
print(cavity_fills_with_solar["cavity_reason"].value_counts())
# 203 properties total
# 140 properties were flagged up based on non-intrusives (Non-Intrusive Data Showed Empty Cavity)
# 63 property already has solar
# Check 3) RDF
rdf = pd.read_excel(
os.path.join(data_folder, "WESTWARD - Route March Prep.xlsx"),
sheet_name="RDF CIGA checks"
)
rdf = rdf.merge(
asset_list.standardised_asset_list[
[asset_list.STANDARD_LANDLORD_PROPERTY_ID, "cavity_reason", "solar_reason"]
],
how="left",
left_on=asset_list.landlord_property_id,
right_on=asset_list.STANDARD_LANDLORD_PROPERTY_ID
)
rdf["cavity_reason"] = rdf["cavity_reason"].fillna("Not identified")
print(rdf["cavity_reason"].value_counts())
# 264 properties are not identified, 261 of which are due to the fact they contain materials
# The other 3 were determined to be eligible for solar instead
# Many of these units that were identified for rdf works could be solar jobs
rdf_with_solar = pd.read_excel(
os.path.join(data_folder, "WESTWARD - Route March Prep.xlsx"),
sheet_name="Solar PV - RDF CIGA Checks"
)
rdf_with_solar = rdf_with_solar.merge(
asset_list.standardised_asset_list[
[asset_list.STANDARD_LANDLORD_PROPERTY_ID, "cavity_reason", "solar_reason"]
],
how="left",
left_on=asset_list.landlord_property_id,
right_on=asset_list.STANDARD_LANDLORD_PROPERTY_ID
)
rdf_with_solar["cavity_reason"] = rdf_with_solar["cavity_reason"].fillna("Not identified")
rdf_with_solar["cavity_reason"].value_counts()
# All others identified - some flagged as empties due to EPC or landlord data suggesting as much
# 5 not identified due to containing COMPACTED BEAD
asset_list.standardised_asset_list = asset_list.standardised_asset_list[
asset_list.standardised_asset_list[asset_list.landlord_property_id]
]
asset_list.load_contact_details(
local_filepath=os.path.join(data_folder, "Full property list wth D&V report V look up 12.2.25.xlsx"),
sheet_name="Report 1",
landlord_property_id=asset_list.landlord_property_id,
phone_number_column='Property Current Tel. Number',
fullname_column='Proeprty Current Occupant',
firstname_column=None,
lastname_column=None,
email_column=None, # TODO - we need this
)
# Convert to a format suitable for CRM
# TODO: TEMP
assigned_surveyors = pd.DataFrame(
[
{
asset_list.landlord_property_id: "02610001",
"week_commencing": "10/10/2025",
"surveyor_name": "Khalim Conn-Kowlessar",
"surveyor_email": "khalim@domna.homes",
}
]
)
# TODO: Sort the output by postcode
company_domain = "ealing.gov.uk"
crm_pipeline_name = "Survey Management"
first_dealstage = "READY TO BEGIN SCHEDULING"
# TODO - temp, upload to either SharePoint or AWS
asset_list.prepare_for_crm(
assigned_surveyors=assigned_surveyors,
company_domain=company_domain,
crm_pipeline_name=crm_pipeline_name,
first_dealstage=first_dealstage
)
hubspot_data = asset_list.hubspot_data
# Store as an excel
filename = os.path.join(data_folder, ".".join(data_filename.split(".")[:-1])) + " - Standardised.xlsx"
# Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data
with pd.ExcelWriter(filename) as writer:
asset_list.standardised_asset_list.to_excel(writer, sheet_name="Standardised Asset List", index=False)
asset_list.flat_data.to_excel(writer, sheet_name="Flat Data", index=False)
# If we have outcomes, we add a tab with the outcomes
if not asset_list.outcomes_for_output.empty:
asset_list.outcomes_for_output.to_excel(writer, sheet_name="Outcomes", index=False)
if not asset_list.unmatched_submissions.empty:
asset_list.unmatched_submissions.to_excel(writer, sheet_name="Unmatched Submissions", index=False)
if not asset_list.outcomes_no_match.empty:
asset_list.outcomes_no_match.to_excel(writer, sheet_name="Unmatched Outcomes", index=False)
# Store the Hubspot export as a csv
hubspot_data.to_csv(os.path.join(data_folder, "Hubspot Export.csv"), index=False)

View file

@ -0,0 +1,148 @@
import numpy as np
STANDARD_BUILT_FORMS = {
"unknown",
# Houses
"end-terrace", "semi-detached", "detached", "mid-terrace",
# Flats
"ground floor", "mid-floor", "top-floor", "basement"
}
BUILT_FORM_MAPPINGS = {
'House (End Terrace)': 'end-terrace',
'Ground Floor Flat General': 'ground floor',
'House (Semi)': 'semi-detached',
'House (Mid Terrace)': 'mid-terrace',
'Bungalow': 'unknown',
'House (Mid terrace)': 'mid-terrace',
'Maisonette': 'unknown',
'Flat': 'unknown',
'First Floor Flat General': 'mid-floor',
'Bungalow (Semi)': 'semi-detached',
'Detached House': 'detached',
'End Terraced House': 'end-terrace',
'Studio (Ground floor)': 'ground floor',
'Mid Terraced House': 'mid-terrace',
'Ground Floor Flat': 'ground floor',
'Semi Detached House': 'semi-detached',
'Detached Property': 'detached',
'Level not confirmed': 'unknown',
'Bedsit': 'unknown',
'Cottage': 'detached',
'Terraced House': 'mid-terrace',
'Studio (1st Floor)': 'ground floor',
'Standard Maisonette': 'unknown',
'Third Floor Flat or Above': 'top-floor',
'Town House': 'end-terrace',
'Guest room in a complex': 'unknown',
'Back To Back House': 'mid-terrace',
'PIMSS EMPTY': 'unknown',
'Flat Basement': 'basement',
'House': 'unknown',
'Second Floor Flat': 'mid-floor',
'First Floor Flat': 'ground floor',
'Room Only': 'unknown',
'End Terrace Housex': 'end-terrace',
'Mid Terrace Bungalow': 'mid-terrace',
'End Terrace Bungalow': 'end-terrace',
'Mid Terrace House': 'mid-terrace',
'Detached Bungalow': 'detached',
'End Terrace House': 'end-terrace',
'Mid Terrace Housekeeping ': 'mid-terrace',
'Semi Detached Bung': 'semi-detached',
'Guest Room': 'unknown',
'Coach House': 'detached',
'Office Buildings': 'unknown',
'Maisonnette': 'mid-floor',
'Bedspace': 'unknown',
'Studio (3rd floor and above)': 'top-floor',
'Adapted Property For Disabled': 'unknown',
'Studio (2nd floor)': 'mid-floor',
np.nan: 'unknown',
'Third Floor Flat': 'mid-floor',
'2 Ext. Wall Flat': 'mid-terrace',
'Hostel': 'unknown',
'Flat: Mid Terrace: Mid Floor': 'mid-terrace',
'Bungalow: SemiDetached': 'semi-detached',
'Flat: End Terrace: Top Floor': 'end-terrace',
'Flat: Enclosed End Terrace: Top Floor': 'end-terrace',
'Maisonette: End Terrace: Ground Floor': 'end-terrace',
'Flat: End Terrace: Ground Floor': 'end-terrace',
'Flat: Mid Terrace: Top Floor': 'mid-terrace',
'House: Detached': 'detached',
'Flat: End Terrace: Mid Floor': 'end-terrace',
'House: SemiDetached': 'semi-detached',
'Flat: Semi Detached: Ground Floor': 'semi-detached',
'Flat: Semi Detached: Top Floor': 'semi-detached',
'Flat: Mid Terrace: Ground Floor': 'mid-terrace',
'House: MidTerrace': 'mid-terrace',
'House: EndTerrace': 'end-terrace',
'Bungalow: EndTerrace': 'end-terrace',
'Bungalow: MidTerrace': 'mid-terrace',
'Flat: Semi Detached: Mid Floor': 'semi-detached',
'Maisonette: Mid Terrace: Top Floor': 'mid-terrace',
'Flat: Enclosed Mid Terrace: Mid Floor': 'mid-terrace',
'Flat: Enclosed Mid Terrace: Ground Floor': 'mid-terrace',
'Flat: Detached: Ground Floor': 'detached',
'Flat: Detached: Mid Floor': 'detached',
'Flat: Detached: Top Floor': 'detached',
'Flat: Enclosed End Terrace: Mid Floor': 'end-terrace',
'Bungalow: Detached': 'detached',
'Maisonette: End Terrace: Mid Floor': 'end-terrace',
'Maisonette: Detached: Top Floor': 'detached',
'Flat: Enclosed End Terrace: Ground Floor': 'end-terrace',
'Flat: Enclosed Mid Terrace: Top Floor': 'mid-terrace',
'House: EnclosedEndTerrace': 'end-terrace',
'3 Ext. Wall Flat': 'semi-detached',
'Bungalow Detached': 'detached',
'Bungalow End Terrace': 'end-terrace',
'Bungalow Mid Terrace': 'mid-terrace',
'Bungalow Semi Detached': 'detached',
'Maisonette 2 Ext. Wall': 'mid-terrace',
'Maisonette 3 Ext. Wall': 'semi-detached',
'End-terrace': 'end-terrace',
'Mid-terrace': 'mid-terrace',
'Semi-detached': 'semi-detached',
'Detached': 'detached',
'Flat / maisonette': 'unknown',
'2014 onwards': 'unknown',
'Semi Detached': 'semi-detached',
'End Terraced': 'end-terrace',
'Basement': 'basement',
'No': 'unknown',
'Mid Terrace': 'mid-terrace',
'Link Detached': 'detached',
'Mid Terraced': 'mid-terrace',
'Ground Floor': 'ground floor',
'End Terrace': 'end-terrace',
'Sheltrd Semi Det': 'semi-detached',
'Shop': 'unknown',
'Fourth Floor': 'mid-floor',
'Terraced': 'mid-terrace',
'Leasehold Terr': 'mid-terrace',
'Room': 'unknown',
'Second Floor': 'mid-floor',
'Third Floor': 'mid-floor',
'Office': 'unknown',
'First Floor Over Arch': 'ground floor',
'16-25 IND-PPL': 'unknown',
'Seventh Floor': 'top-floor',
'Sheltered': 'unknown',
'Shelt Bung End': 'end-terrace',
'Room In Shared Accommodation': 'unknown',
'Sheltred Bung Terrace': 'mid-terrace',
'Garage In Block': 'unknown',
'First Floor': 'ground floor',
'First Floor Over Garage': 'ground floor',
'Leasehold': 'unknown',
'Sheltred Bung': 'unknown',
'Garage': 'unknown',
'Sixth Floor': 'top-floor',
'Sheltered Bung': 'semi-detached',
'Guest': 'unknown',
'Fifth Floor': 'mid-floor'
}

View file

@ -0,0 +1,20 @@
import numpy as np
STANDARD_EXISTING_PV = {
"already has PV", "no PV", "unknown"
}
EXISTING_PV_MAPPINGS = {
"NO": "no PV",
"YES": "already has PV",
"no": "no PV",
"yes": "already has PV",
True: "already has PV",
False: "no PV",
np.nan: 'unknown',
'PV: 2kWp array': 'already has PV',
'PV: 25% roof area, PV: 3.6kWp array': 'already has PV',
'PV: 10% roof area, PV: 2kWp array': 'already has PV',
'PV: 50% roof area': 'already has PV',
'Solar PV': 'already has PV'
}

View file

@ -0,0 +1,206 @@
import numpy as np
STANDARD_HEATING_SYSTEMS = {
"gas combi boiler",
"electric storage heaters",
"district heating",
"gas condensing boiler",
"oil boiler",
"gas condensing combi",
"air source heat pump",
"boiler - other fuel",
"ground source heat pump",
"electric radiators",
"other",
"electric boiler",
"unknown",
"communal gas boiler",
"high heat retention storage heaters",
"room heaters",
'electric fuel',
'oil fuel',
'solid fuel',
'gas combi boiler',
'unknown',
"electric ceiling",
"electric underfloor",
"no heating"
}
HEATING_MAPPINGS = {
"Combi - GAS": "gas combi boiler",
"E7 Storage Heaters": "high heat retention storage heaters",
"District heating system": "district heating",
"Condensing Boiler - GAS": "gas condensing boiler",
"Boiler Oil/other": "oil boiler",
"Condensing Combi - Gas": "gas condensing combi",
"Air Source Source Heat Pump": "air source heat pump",
"Biomass Boiler": "boiler - other fuel",
"Ground Source Heat Pump": "ground source heat pump",
"Electric Oil filled radiators": "electric radiators",
"Solid Fuel": "other",
"LPG Boiler": "boiler - other fuel",
"Electric Boiler": "electric boiler",
"No data": "unknown",
"Boiler Communal/Commercial - GAS": "communal gas boiler",
"Eco Electric Radiators": "electric radiators",
"Gas fire": "other",
"Backboiler - Solid fuel": "other",
'combi - gas': 'gas combi boiler',
'e7 storage heaters': 'high heat retention storage heaters',
'district heating system': 'district heating',
'condensing boiler - gas': 'gas condensing boiler',
'boiler oil/other': 'oil boiler',
'condensing combi - gas': 'gas condensing combi',
'air source source heat pump': 'air source heat pump',
'biomass boiler': 'boiler - other fuel',
'ground source heat pump': 'ground source heat pump',
'electric oil filled radiators': 'electric radiators',
'solid fuel': 'other',
'lpg boiler': 'boiler - other fuel',
'electric boiler': 'electric boiler',
'no data': 'unknown', 'boiler communal/commercial - gas': 'communal gas boiler',
'eco electric radiators': 'electric radiators',
'gas fire': 'other', 'backboiler - solid fuel': 'other',
'ASHP': 'air source heat pump',
'COMMHEAT': 'communal gas boiler',
'GBB': 'gas combi boiler',
'GFS': 'gas condensing boiler',
'GWA': 'gas condensing boiler',
'GWM': 'gas condensing combi',
'HDU': 'district heating',
'OILBLR': 'oil boiler',
'SOLIDFUEL': 'boiler - other fuel',
'STORHTR': 'electric storage heaters',
np.nan: 'unknown',
'Oil': 'boiler - other fuel',
'Gas': 'gas condensing boiler',
'Electric': 'electric storage heaters',
'Solid fuel': 'other',
'No Heat': 'unknown',
'GSHP': 'ground source heat pump',
'Boiler Oil': 'oil boiler',
'Boiler Electricity': 'electric boiler',
'Boiler ND': 'unknown',
'ND Mains gas': 'unknown',
'Room heaters Mains gas': "room heaters",
'Heat pump (air) Electricity': 'air source heat pump',
'Room heaters Electricity': 'electric radiators',
'Room heaters Oil': 'room heaters',
'No heating system ND': 'no heating',
'Heat pump (wet) Electricity': 'ground source heat pump',
'Room heaters Biomass': 'room heaters',
'ND Solid fuel': 'unknown',
'Boiler Mains gas': 'gas combi boiler',
'Boiler LPG': 'boiler - other fuel',
'Room heaters Solid fuel': 'room heaters',
'ND ND': 'unknown',
'Storage heating Electricity': 'electric storage heaters',
'ND Electricity': 'unknown',
'Community heating Community (non-gas)': 'district heating',
'No heating system N/A': 'no heating',
'Boiler Solid fuel': 'boiler - other fuel',
'Community heating Community (mains gas)': 'communal gas boiler',
'Boiler Biomass': 'boiler - other fuel',
'No heating system Mains gas': 'no heating',
'Storage heaters': 'electric storage heaters',
'Air Source': 'air source heat pump',
'Ground source': 'ground source heat pump',
'OIl': 'boiler - other fuel',
'Quantum storage heaters (old sh on EPC)': 'high heat retention storage heaters',
'Quanum Storage heaters': 'high heat retention storage heaters',
'Quantum storage heaters (Old SH on EPC)': 'high heat retention storage heaters',
'Quantum storage heaters': 'high heat retention storage heaters',
'Air Source (EPC says SH)': 'air source heat pump',
'ASHP - Was logged as oil': 'air source heat pump',
'Ground Source': 'ground source heat pump',
'District Heating': 'district heating',
'Mains Gas (Communal)': 'communal gas boiler',
'LPG': 'boiler - other fuel',
'Mains Gas': 'gas condensing boiler',
'ELECTRIC': 'electric fuel',
'OIL': 'oil fuel',
'SOLID FUEL': 'solid fuel',
'GAS': 'gas combi boiler',
'DO NOT SURVEY': 'unknown',
'Gas Boiler': 'gas combi boiler',
'Communal Gas ': 'communal gas boiler',
'Communal': 'communal gas boiler',
'Communal Gas': 'communal gas boiler',
'Wood Burning Boiler': "boiler - other fuel",
'Oil Fired Boiler': 'oil boiler',
'Electric (direct acting) room heaters: Panel, convector or radiant heaters Electricity: Electricity': 'room '
'heaters',
'Electric Storage Systems: Integrated storage+direct-acting heater Electricity: Electricity': 'electric storage '
'heaters',
'Community Heating Systems: Community CHP and boilers (RdSAP) Gas: Mains Gas (Community)': 'communal gas boiler',
'Boiler: D rated Regular Boiler Gas: Mains Gas': 'gas boiler',
'Boiler: C rated Combi Gas: Mains Gas': 'gas combi boiler',
'Electric Storage Systems: Fan storage heaters Electricity: Electricity': 'electric storage heaters',
' ': 'unknown',
'Boiler: G rated Regular Boiler Gas: Mains Gas': 'gas boiler',
'Electric Storage Systems: Modern (slimline) storage heaters Electricity: Electricity': 'electric storage heaters',
'Boiler: E rated Regular Boiler Gas: Mains Gas': 'gas boiler',
'Boiler: A rated Regular Boiler Electricity: Electricity': 'electric boiler',
'Community Heating Systems: Community boilers only (RdSAP) Gas: Mains Gas (Community)': 'communal gas boiler',
'Boiler: A rated Combi Gas: Mains Gas': 'gas condensing combi',
'Boiler: A rated CPSU Electricity: Electricity': 'electric boiler',
'Heat Pump: Electric Heat pumps: Ground source heat pump with flow temperature <= 35°C': 'ground source heat pump',
'Heat Pump: Electric Heat pumps: Ground source heat pump in other cases': 'ground source heat pump',
'Electric Storage Systems: High heat retention storage heaters': 'high heat retention storage heaters',
'Heat Pump: Electric Heat pumps: Air source heat pump with flow temperature <= 35°C': 'air source heat pump',
'Electric (direct acting) room heaters: Panel, convector or radiant heaters': 'room heaters',
'Boiler: C rated Combi': 'gas combi boiler',
'Boiler: B rated Regular Boiler': 'gas condensing boiler',
'Boiler: E rated Combi': 'gas combi boiler',
'Boiler: A rated Combi': 'gas combi boiler',
'Boiler: E rated Regular Boiler': 'gas condensing boiler',
'Community Heating Systems: Community boilers only (RdSAP)': 'district heating',
'Boiler: C rated Regular Boiler': 'gas condensing boiler',
'Boiler: A rated Regular Boiler': 'gas condensing boiler',
'Electric Storage Systems: Fan storage heaters': 'electric storage heaters',
'Boiler: F rated Combi': 'gas combi boiler',
'Room heaters': 'room heaters',
'Room Heaters': 'room heaters',
'Boiler': 'gas condensing boiler',
'Heat Pump (Wet)': 'air source heat pump',
'Community Heating': 'district heating',
'Heat pump (wet)': 'air source heat pump',
'Electric ceiling heating': 'electric ceiling',
'Electric under floor heating': 'electric underfloor',
'Community heating': 'district heating',
'Wet - Radiators Air Source Heat Pump': 'air source heat pump',
'Wet - Radiators Electric': 'electric boiler',
'Storage Heaters': 'high heat retention storage heaters',
'Wet - Radiators Oil': 'oil boiler',
'Communal Wet - Radiators Gas': 'communal gas boiler',
'Electric - Storage/Panel Heaters Electric': 'electric storage heaters',
'Gas Central Heating': 'gas combi boiler',
'Wet - Radiators Solar': 'other',
'Electric - Storage/Panel Heaters LPG': 'electric storage heaters',
'No Heating Solid': 'no heating',
'Wet - Underfloor Gas': 'gas condensing boiler',
'No Heating Electric': 'no heating',
'Oil Fired Central Heating': 'oil boiler',
'Warm Air Gas': 'other',
'Communal Boilers': 'communal gas boiler',
'Wet - Radiators Gas': 'gas combi boiler',
'Wet - Radiators Solid': 'solid fuel',
'Wet - Radiators LPG': 'other',
'No Heating Gas': 'no heating',
'No Heating': 'no heating',
'Panel Heaters': 'electric radiators',
'Rointe Electric Heating': 'electric storage heaters',
'Underfloor Heating': 'electric underfloor',
'Air Source Heating': 'air source heat pump',
'Warm Air Electric': 'other',
'Communal Wet - Radiators Electric': 'communal gas boiler',
'Wet - Underfloor Solar': 'other',
'No Heating Required Gas': 'unknown',
'Electric - Storage/Panel Heaters Gas': 'electric storage heaters',
'Electric - Storage/Panel Heaters Solid': 'electric storage heaters'
}

View file

@ -0,0 +1,182 @@
import numpy as np
# These are the standard categories for property types
STANDARD_PROPERTY_TYPES = {
"house", "flat", "maisonette", "bungalow", "park home", "block house", "bedsit", "coach house",
"unknown", "other", "block of flats"
}
# This is a basic mapping that we use to map values that we've seen commonly to standard values
PROPERTY_MAPPING = {
"HOUSE": "house",
"FLAT": "flat",
"MAISONET": "maisonette",
"BUNGALOW": "bungalow",
"BLKHOUS": "block house",
"blkhous": "block house",
"BEDSIT": "bedsit",
"COACHSE": "coach house",
"coachse": "coach house",
'Admin Unit Type': 'unknown',
'Block': 'block of flats',
'Bungalow': 'bungalow',
'Flat': 'flat',
'House': 'house',
'Maisonette': 'maisonette',
'Stairwell': 'other',
'MAISON': 'maisonette',
'3 Bed Semi Detached House': 'house',
'3 Bed Mid Terrace House': 'house',
'2 Bed Semi Detached House': 'house',
'4 Bed Semi Detached House': 'house',
'2 Bed End Terrace House': 'house',
'1 Bed Sheltered Bungalow': 'bungalow',
'1 Bed 1st Floor Sheltered Flat': 'flat',
'2 Bed Second Floor Flat': 'flat',
'1 Bed Mid Terrace House': 'house',
'1 Bed End Terrace House': 'house',
'7 Bed Detached House': 'house',
'4 Bed End Terrace House': 'house',
'1 Bed Link House': 'house',
'1 Bed Second Floor Flat': 'flat',
'2 Bed Detached House': 'house',
'1 Bed Ground Floor Flat': 'flat',
'2 Bed Sheltered Bungalow': 'bungalow',
'4 Bed Mid Terrace House': 'house',
'2 Bed Mid Terrace House': 'house',
'2 Bed First Floor Flat': 'flat',
'3 Bed Detached House': 'house',
'Ground Floor Bedsit': 'bedsit',
'3 Bed Bungalow': 'bungalow',
np.nan: 'unknown',
'5 Bed End Terrace House': 'house',
'1 Bed Grd Floor Sheltered Flat': 'flat',
'3 Bed End Terrace House': 'house',
'2 Bed Second Floor Maisonette': 'maisonette',
'2 Bed Ground Floor Flat': 'flat',
'2 Bed First Floor Maisonette': 'maisonette',
'4 Bed Detached House': 'house',
'1 Bed Bungalow': 'bungalow',
'2 Bed Bungalow': 'bungalow',
'First Floor Bedsit': 'bedsit',
'3 Bed First Floor Maisonette': 'maisonette',
'2 Bed 1st Floor Sheltered Flat': 'flat',
'1 Bed First Floor Flat': 'flat',
'3 Bed First Floor Flat': 'flat',
'ND': 'unknown',
'House (Mid Terrace)': 'house',
'First Floor Flat General': 'flat',
'House (End Terrace)': 'house',
'House (Mid terrace)': 'house',
'Bungalow (Semi)': 'bungalow',
'Ground Floor Flat General': 'flat',
'House (Semi)': 'house',
'Detached House': 'house',
'Bedsit': 'bedsit',
'Terraced House': 'house',
'Standard Maisonette': 'maisonette',
'End Terraced House': 'house',
'Third Floor Flat or Above': 'flat',
'Town House': 'house',
'Mid Terraced House': 'house',
'Back To Back House': 'house',
'Flat Basement': 'flat',
'Ground Floor Flat': 'flat',
'Semi Detached House': 'house',
'Second Floor Flat': 'flat',
'First Floor Flat': 'flat',
'Level not confirmed': 'flat',
'Cottage': 'house',
'Studio (1st Floor)': 'flat',
'Studio (Ground floor)': 'flat',
'Guest room in a complex': 'other',
'PIMSS EMPTY': 'bedsit',
'Room Only': 'other',
'Detached Property': 'house',
'End Terrace Housex': 'house',
'Coach House': 'coach house',
'Mid Terrace Bungalow': 'bungalow',
'End Terrace Bungalow': 'bungalow',
'Mid Terrace House': 'house',
'Detached Bungalow': 'bungalow',
'End Terrace House': 'house',
'Mid Terrace Housekeeping ': 'house',
'Maisonnette': 'maisonette',
'Guest Room': 'unknown',
'Office Buildings': 'unknown',
'Semi Detached Bung': 'bungalow',
'Bedspace': 'bedsit',
'Houses/Bungalows': 'bungalow',
'Bedsits': 'bedsit',
'Unknown': 'unknown',
'Sheltered Flats/besits': 'flat',
'House/Bungalow ': 'bungalow',
'Low/Med Rise Flats/Mais': 'flat',
'Staff/Comm': 'other',
'A Rooms': 'other',
'Studio (3rd floor and above)': 'flat',
'Adapted Property For Disabled': 'unknown',
'Studio (2nd floor)': 'flat',
'Third Floor Flat': 'flat',
'2 Ext. Wall Flat': 'flat',
'Hostel': 'other',
'House: MidTerrace': 'house',
'House: EndTerrace': 'house',
'Flat: Mid Terrace: Mid Floor': 'flat',
'Bungalow: SemiDetached': 'bungalow',
'Bungalow: EndTerrace': 'bungalow',
'Flat: End Terrace: Top Floor': 'flat',
'Maisonette: End Terrace: Ground Floor': 'maisonette',
'Flat: End Terrace: Ground Floor': 'flat',
'Flat: Mid Terrace: Top Floor': 'flat',
'House: Detached': 'house',
'Flat: End Terrace: Mid Floor': 'flat',
'House: SemiDetached': 'house',
'Flat: Semi Detached: Ground Floor': 'flat',
'Flat: Semi Detached: Top Floor': 'flat',
'Flat: Mid Terrace: Ground Floor': 'flat',
'Bungalow: MidTerrace': 'bungalow',
'Flat: Enclosed End Terrace: Top Floor': 'flat',
'Flat: Semi Detached: Mid Floor': 'flat',
'Maisonette: Mid Terrace: Top Floor': 'maisonette',
'House: EnclosedEndTerrace': 'house',
'Flat: Detached: Ground Floor': 'flat',
'Flat: Detached: Mid Floor': 'flat',
'Flat: Detached: Top Floor': 'flat',
'Bungalow: Detached': 'bungalow',
'Maisonette: End Terrace: Mid Floor': 'maisonette',
'Maisonette: Detached: Top Floor': 'maisonette',
'Flat: Enclosed Mid Terrace: Mid Floor': 'flat',
'Flat: Enclosed Mid Terrace: Ground Floor': 'flat',
'Flat: Enclosed End Terrace: Mid Floor': 'flat',
'Flat: Enclosed End Terrace: Ground Floor': 'flat',
'Flat: Enclosed Mid Terrace: Top Floor': 'flat',
'2013 onwards': 'unknown',
'House 2 Storey': 'house',
'Bung': 'bungalow',
'House 3 Storey': 'house',
'Shared Flat': 'flat',
'd': 'unknown',
'Mais': 'maisonette',
'e': 'unknown',
'Shared House': 'house',
'House 4 Storey': 'house',
'Shared Bungalow': 'bungalow',
'Detch': 'house',
'Shop': 'other',
'Terr': 'house',
'Terrace': 'house',
'Description': 'unknown',
'Hse': 'house',
'Room': 'other',
'Office': 'other',
'Room In Shared Accommodation': 'other',
'Apartment': 'flat',
'm': 'unknown',
'Garage': 'other',
'Parking Space': 'other',
'Community Centre': 'other',
'Communal Facility': 'other',
'Semi': 'house'
}

View file

@ -0,0 +1,27 @@
import numpy as np
STANDARD_ROOF_CONSTRUCTIONS = {
"pitched access to loft",
"pitched no access to loft",
"pitched unknown access to loft",
"piched unknown insulation",
"pitched insulated",
"another dwelling above",
"flat unknown insulation",
"unknown insulated",
"unknown",
}
ROOF_CONSTRUCTION_MAPPINGS = {
'Flat': 'flat unknown insulation',
'Pitched (access to loft)': 'pitched access to loft',
'Pitched (no access to loft)': 'pitched no access to loft',
'Another dwelling above': 'another dwelling above',
'Same dwelling above': 'another dwelling above',
'As-built': 'unknown',
'ND (inferred)': 'unknown',
'2018 onwards': 'unknown',
'Pitched (vaulted ceiling)': 'pitched insulated',
np.nan: "unknown",
None: "unknown"
}

View file

@ -0,0 +1,170 @@
import numpy as np
STANDARD_WALL_CONSTRUCTIONS = {
# Cavity
"uninsulated cavity", "filled cavity", "partial insulated cavity", "cavity unknown insulation",
# Solic Brick
"uninsulated solid brick", "insulated solid brick", "solid brick unknown insulation",
# Timber Frame
"timber frame unknown insulation", "insulated timber frame", "uninsulated timber frame",
"system built", "granite or whinstone", "other",
"unknown", "sandstone or limestone",
"cob",
"new build - average thermal transmittance",
}
WALL_CONSTRUCTION_MAPPINGS = {
"New Build - Average Thermal Transmittance": "new build - average thermal transmittance",
'Average thermal transmittance 0.25 W/m?K': 'unknown',
'Cavity wall, as built, insulated (assumed)': 'filled cavity',
'Average thermal transmittance 0.31 W/m?K': 'unknown',
'Cavity wall, as built, no insulation (assumed)': 'uninsulated cavity',
'Average thermal transmittance 0.30 W/m?K': 'unknown', 'Average thermal transmittance 0.28 W/m-¦K': 'unknown',
'Average thermal transmittance 0.25 W/m-¦K': 'unknown', 'Average thermal transmittance 0.21 W/m-¦K': 'unknown',
'Average thermal transmittance 0.20 W/m-¦K': 'unknown', 'Average thermal transmittance 0.29 W/m?K': 'unknown',
'Average thermal transmittance 0.16 W/m?K': 'unknown',
'Average thermal transmittance 0.27 W/m&#0178;K': 'unknown',
'Average thermal transmittance 0.15 W/m-¦K': 'unknown', 'Average thermal transmittance 0.23 W/m-¦K': 'unknown',
'Average thermal transmittance 0.18 W/m?K': 'unknown',
'Granite or whin, with internal insulation': 'granite or whinstone',
"Granite or whinstone, as built, insulated (assumed)": "granite or whinstone",
'Average thermal transmittance 0.22 W/m-¦K': 'unknown', 'Average thermal transmittance 0.24 W/m?K': 'unknown',
'Average thermal transmittance 0.16 W/m-¦K': 'unknown', 'Average thermal transmittance 0.35 W/m?K': 'unknown',
'Average thermal transmittance 0.26 W/m-¦K': 'unknown', 'Average thermal transmittance 0.62 W/m?K': 'unknown',
'Average thermal transmittance 0.64 W/m?K': 'unknown', 'Average thermal transmittance 0.61 W/m?K': 'unknown',
'Sandstone or limestone, as built, no insulation (assumed)': 'sandstone or limestone',
'Average thermal transmittance 0.33 W/m?K': 'unknown',
'Cavity wall,': "cavity unknown insulation",
'Cavity wall, as built, partial insulation (assumed)': 'partial insulated cavity',
'Average thermal transmittance 0.29 W/m-¦K': 'unknown', 'Average thermal transmittance 0.32 W/m-¦K': 'unknown',
'Average thermal transmittance 0.19 W/m-¦K': 'unknown', 'Average thermal transmittance 0.27 W/m?K': 'unknown',
'Average thermal transmittance 0.22 W/m?K': 'unknown', 'Average thermal transmittance 0.38 W/m?K': 'unknown',
'Average thermal transmittance 0.26 W/m?K': 'unknown', 'Average thermal transmittance 0.27 W/m-¦K': 'unknown',
'Average thermal transmittance 0.18 W/m-¦K': 'unknown', 'Average thermal transmittance = 0.27 W/m?K': 'unknown',
'Cavity wall, with external insulation': 'filled cavity', 'Average thermal transmittance 0.21 W/m?K': 'unknown',
'Average thermal transmittance 0.23 W/m?K': 'unknown', 'Average thermal transmittance 0.20 W/m?K': 'unknown',
'Average thermal transmittance 0.32 W/m?K': 'unknown', 'Average thermal transmittance 0.24 W/m-¦K': 'unknown',
'Cavity wall, with internal insulation': 'filled cavity',
'Average thermal transmittance 0.17 W/m-¦K': 'unknown', 'Average thermal transmittance 0.28 W/m?K': 'unknown',
'new build - average thermal transmittance': 'new build - average thermal transmittance',
'average thermal transmittance 0.25 w/m?k': 'unknown',
'cavity wall, as built, insulated (assumed)': 'filled cavity',
'average thermal transmittance 0.31 w/m?k': 'unknown',
'cavity wall, as built, no insulation (assumed)': 'uninsulated cavity',
'average thermal transmittance 0.30 w/m?k': 'unknown', 'average thermal transmittance 0.28 w/m-¦k': 'unknown',
'average thermal transmittance 0.25 w/m-¦k': 'unknown', 'average thermal transmittance 0.21 w/m-¦k': 'unknown',
'average thermal transmittance 0.20 w/m-¦k': 'unknown', 'average thermal transmittance 0.29 w/m?k': 'unknown',
'average thermal transmittance 0.16 w/m?k': 'unknown', 'average thermal transmittance 0.27 w/m&#0178;k': 'unknown',
'average thermal transmittance 0.15 w/m-¦k': 'unknown', 'average thermal transmittance 0.23 w/m-¦k': 'unknown',
'average thermal transmittance 0.18 w/m?k': 'unknown',
'granite or whin, with internal insulation': 'granite or whinstone',
'average thermal transmittance 0.22 w/m-¦k': 'unknown', 'average thermal transmittance 0.24 w/m?k': 'unknown',
'average thermal transmittance 0.16 w/m-¦k': 'unknown', 'average thermal transmittance 0.35 w/m?k': 'unknown',
'average thermal transmittance 0.26 w/m-¦k': 'unknown', 'average thermal transmittance 0.62 w/m?k': 'unknown',
'average thermal transmittance 0.64 w/m?k': 'unknown', 'average thermal transmittance 0.61 w/m?k': 'unknown',
'sandstone or limestone, as built, no insulation (assumed)': 'sandstone or limestone',
'average thermal transmittance 0.33 w/m?k': 'unknown', 'cavity wall,': "cavity unknown insulation",
'cavity wall, as built, partial insulation (assumed)': 'partial insulated cavity',
'average thermal transmittance 0.29 w/m-¦k': 'unknown', 'average thermal transmittance 0.32 w/m-¦k': 'unknown',
'average thermal transmittance 0.19 w/m-¦k': 'unknown', 'average thermal transmittance 0.27 w/m?k': 'unknown',
'average thermal transmittance 0.22 w/m?k': 'unknown', 'average thermal transmittance 0.38 w/m?k': 'unknown',
'average thermal transmittance 0.26 w/m?k': 'unknown', 'average thermal transmittance 0.27 w/m-¦k': 'unknown',
'average thermal transmittance 0.18 w/m-¦k': 'unknown', 'average thermal transmittance = 0.27 w/m?k': 'unknown',
'cavity wall, with external insulation': 'filled cavity', 'average thermal transmittance 0.21 w/m?k': 'unknown',
'average thermal transmittance 0.23 w/m?k': 'unknown', 'average thermal transmittance 0.20 w/m?k': 'unknown',
'average thermal transmittance 0.32 w/m?k': 'unknown', 'average thermal transmittance 0.24 w/m-¦k': 'unknown',
'cavity wall, with internal insulation': 'filled cavity', 'average thermal transmittance 0.17 w/m-¦k': 'unknown',
'average thermal transmittance 0.28 w/m?k': 'unknown',
'Cavity wall, filled cavity': 'filled cavity',
'Cavity wall, filled cavity and external insulation': 'filled cavity',
'Granite or whinstone, as built, no insulation (assumed)': 'granite or whinstone',
'Solid brick, as built, insulated (assumed)': 'insulated solid brick',
'Solid brick, as built, no insulation (assumed)': 'uninsulated solid brick',
'Solid brick, with external insulation': 'insulated solid brick',
'Solid brick, with internal insulation': 'insulated solid brick',
'System built, as built, insulated (assumed)': 'system built',
'System built, as built, no insulation (assumed)': 'system built',
'System built, with external insulation': 'system built',
'System built, with internal insulation': 'system built',
'Timber frame, as built, insulated (assumed)': 'timber frame',
'Timber frame, as built, no insulation (assumed)': 'timber frame',
'Timber frame, as built, partial insulation (assumed)': 'timber frame',
'Timber frame, with additional insulation': 'timber frame',
'CAVITY': 'cavity unknown insulation',
'COMB': 'unknown',
'NONE': 'unknown',
'NOTKNOWN': 'unknown',
'SOLID': 'solid brick unknown insulation',
np.nan: 'unknown',
'RENDER/TIMBER FRAME': 'timber frame',
'SYSTEM BUILT': 'system built',
'PCC PANELS': 'other',
'NOT APPLICABLE - FLAT': 'unknown',
'BRICK/TIMBER FRAME': 'timber frame',
'BRICK/BLOCK CAVITY': 'cavity unknown insulation',
'STONE SOLID': 'sandstone or limestone',
'EXT CLADDING SYSTEM': 'system built',
'BRICK/BLOCK SOLID': 'solid brick unknown insulation',
'Cavity Filled cavity (with internal/external)': 'filled cavity',
'ND (inferred) Filled cavity': 'filled cavity',
'Cavity Filled cavity': 'filled cavity',
'Cavity Unknown insulation': 'cavity unknown insulation',
'Timber frame As-built': 'timber frame',
'System build Unknown insulation': 'system built',
'Cavity As-built': 'uninsulated cavity',
'System build External': 'system built',
'ND (inferred) ND (inferred)': 'unknown',
'Solid brick External': 'insulated solid brick',
'Cavity External': 'filled cavity',
'System build As-built': 'system built',
'Solid brick Internal': 'insulated solid brick',
'Cavity Internal': 'filled cavity',
'System build Internal': 'system built',
'Solid brick As-built': 'solid brick unknown insulation',
'Cavity ': 'cavity unknown insulation',
'Solid brick ': 'solid brick unknown insulation',
'Timber frame Timber frame (good insulation)': 'insulated timber frame',
' ': 'unknown',
'Cavity No data': 'cavity unknown insulation',
'Non trad ': 'other',
'Solid brick / Multiple Attributes ': 'solid brick unknown insulation',
'Cavity Believe CWI done by Dyson': 'filled cavity',
'Cavity CWI required': 'uninsulated cavity',
'Solid brick EWI installed': 'insulated solid brick',
'Cavity Cavity batts': 'filled cavity',
'Cavity CWI Completed by Dyson': 'filled cavity',
None: "unknown",
"Cavity": "cavity unknown insulation",
'SolidBrick: Unknown': 'solid brick unknown insulation',
'Cavity: Unknown': 'cavity unknown insulation',
'Cavity: AsBuilt (Post 1995)': 'filled cavity',
'Cavity: AsBuilt (1976-1982)': 'cavity unknown insulation',
'SystemBuilt: AsBuilt': 'system built',
'TimberFrame: AsBuilt': "timber frame unknown insulation",
'Cavity: AsBuilt (1983-1995)': 'cavity unknown insulation',
'Cavity: AsBuilt (1983-1995), Cavity: FilledCavity': 'filled cavity',
'SolidBrick: AsBuilt': 'solid brick unknown insulation',
'Cavity: FilledCavity': 'filled cavity',
'SolidBrick: Internal': 'insulated solid brick',
'Cavity: External': 'filled cavity',
'Sandstone: Internal': 'sandstone or limestone',
'Cavity: AsBuilt (Pre 1976)': 'cavity unknown insulation',
'System build': 'system built',
'Solid brick': 'solid brick unknown insulation',
'Stone': 'sandstone or limestone',
'Timber frame': 'timber frame unknown insulation',
'2017 onwards': 'new build - average thermal transmittance',
'ND (inferred)': 'unknown',
'Flat / maisonette': 'other',
'Other': 'other',
'Timber Frame': 'timber frame unknown insulation',
'Cavity Wall': 'cavity unknown insulation',
'Non-Traditional': 'system built',
'PRC': 'system built',
'Cross Wall': 'system built',
'Solid Wall': 'solid brick unknown insulation',
'Traditional': 'other'
}

View file

@ -0,0 +1,12 @@
postal
pandas
usaddress
pydantic-settings==2.6.0
epc-api-python==1.0.2
fuzzywuzzy
boto3
openpyxl
openai
tiktoken
msgpack
beautifulsoup4

View file

@ -0,0 +1,5 @@
from asset_list.AssetList import AssetList
def test_multi_unit_address_flagging():
assert AssetList._identify_multi_address('Block (Rooms 1-4), 23 Clifton Hill, Newtown, Exeter, EX1 2DL')

183
asset_list/utils.py Normal file
View file

@ -0,0 +1,183 @@
import time
import numpy as np
import pandas as pd
from backend.SearchEpc import SearchEpc
from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc
from tqdm import tqdm
from utils.logger import setup_logger
logger = setup_logger()
def get_data(
df,
manual_uprn_map,
epc_auth_token,
uprn_column,
fulladdress_column,
address1_column,
postcode_column,
property_type_column,
built_form_column,
epc_api_only=False,
row_id_name="row_id",
):
# These re-map the standard property types to forms accepted by the EPC api, so we can predict EPCs
property_type_map = {
"house": "House",
"flat": "Flat",
"maisonette": "Maisonette",
"bungalow": "Bungalow",
"block house": "House",
"coach house": "House",
"bedsit": "Flat"
}
built_form_map = {
"mid-terrace": "Mid-Terrace",
"end-terrace": "End-Terrace",
"semi-detached": "Semi-Detached",
"detached": "Detached"
}
epc_data = []
errors = []
no_epc = []
for _, home in tqdm(df.iterrows(), total=len(df)):
try:
# If we have a block of flats, we cannot retrieve this data
if home.get(property_type_column) == "block of flats":
no_epc.append(home[row_id_name])
continue
postcode = home[postcode_column]
house_number = str(home[address1_column]).strip()
full_address = home[fulladdress_column].strip()
house_no = SearchEpc.get_house_number(address=str(house_number), postcode=postcode)
if house_no is None:
house_no = house_number
uprn = manual_uprn_map.get(full_address, None)
if uprn is None and home.get(uprn_column):
uprn = home[uprn_column]
if pd.isnull(uprn):
uprn = None
property_type = property_type_map.get(home.get(property_type_column), None)
built_form = built_form_map.get(home.get(built_form_column))
searcher = SearchEpc(
address1=str(house_no),
postcode=postcode,
auth_token=epc_auth_token,
os_api_key="",
property_type=None,
fast=True,
full_address=full_address,
max_retries=5,
uprn=uprn
)
# Force the skipping of estimating the EPC
searcher.ordnance_survey_client.property_type = None
searcher.ordnance_survey_client.built_form = None
searcher.find_property(skip_os=True)
# Check if we have a flat or appartment
if searcher.newest_epc is None and uprn is None:
# Try again:
if SearchEpc.get_house_number(address=str(house_number), postcode=postcode) is None:
# Backup
add1 = full_address.split(",")
if len(add1) > 1:
add1 = add1[1].strip()
else:
# Try splitting on space
add1 = full_address.split(" ")[0].strip()
else:
add1 = str(house_number)
searcher = SearchEpc(
address1=add1,
postcode=postcode,
auth_token=epc_auth_token,
os_api_key="",
property_type=None,
fast=True,
full_address=full_address,
max_retries=5
)
if (
"flat" in house_number.lower() or "apartment" in house_number.lower() or "apt" in
house_number.lower()
):
searcher.ordnance_survey_client.property_type = "Flat"
searcher.find_property(skip_os=True)
# As a final resort, we estimate the EPC
if property_type is not None and searcher.newest_epc is None:
searcher.ordnance_survey_client.property_type = property_type
searcher.ordnance_survey_client.built_form = built_form
searcher.find_property(skip_os=True)
if searcher.newest_epc is None:
no_epc.append(home[row_id_name])
continue
# Look for EPC recommendatons
try:
property_recommendations = searcher.client.domestic.recommendations(searcher.newest_epc["lmk-key"])
except:
property_recommendations = {"rows": []}
if epc_api_only:
epc = {
row_id_name: home[row_id_name],
**searcher.newest_epc.copy(),
"recommendations": property_recommendations["rows"]
}
epc_data.append(epc)
continue
# Retrieve data from FindMyEPC
try:
find_epc_searcher = RetrieveFindMyEpc(
address=searcher.newest_epc["address"], postcode=searcher.newest_epc["postcode"]
)
find_epc_data = find_epc_searcher.retrieve_newest_find_my_epc_data()
except ValueError as e:
if "No EPC found" in str(e) and "address1" in searcher.newest_epc:
try:
find_epc_searcher = RetrieveFindMyEpc(
address=searcher.newest_epc["address1"], postcode=searcher.newest_epc["postcode"]
)
find_epc_data = find_epc_searcher.retrieve_newest_find_my_epc_data()
except ValueError as e:
if "No EPC found" in str(e):
find_epc_data = {}
else:
logger.error(f"Error retrieving FindMyEPC data: {e}")
raise Exception(f"Error retrieving FindMyEPC data: {e}")
else:
find_epc_data = {}
except Exception as e:
raise Exception(f"Error retrieving FindMyEPC data: {e}")
time.sleep(np.random.uniform(0.1, 1))
epc = {
row_id_name: home[row_id_name],
**searcher.newest_epc.copy(),
"recommendations": property_recommendations["rows"],
"find_my_epc_data": find_epc_data,
}
epc_data.append(epc)
except Exception as e:
errors.append(home[row_id_name])
time.sleep(5)
return epc_data, errors, no_epc

413
backend/Funding.py Normal file
View file

@ -0,0 +1,413 @@
import pandas as pd
import numpy as np
from typing import List
from backend.app.plan.schemas import HousingType
class Funding:
"""
Given a property, this class identifies if the home is possibly eligible for funding under
the various funding schemes. It will also calculate the expected amount of funding available
and flag any tenant specific requirements that need to be considered to the funding to be attained
"""
SCHEMES = ["eco4", "gbis", "whlg"]
ECO_SAP_SCORE_THREHOLDS = [
{'Band': 'High_A', 'From': 96.0, 'Up to': 100.0, 'Mid-point': 98.0},
{'Band': 'Low_A', 'From': 92.0, 'Up to': 96.0, 'Mid-point': 94.0},
{'Band': 'High_B', 'From': 86.0, 'Up to': 91.0, 'Mid-point': 88.5},
{'Band': 'Low_B', 'From': 81.0, 'Up to': 86.0, 'Mid-point': 83.5},
{'Band': 'High_C', 'From': 74.5, 'Up to': 80.0, 'Mid-point': 77.25},
{'Band': 'Low_C', 'From': 69.0, 'Up to': 74.5, 'Mid-point': 71.75},
{'Band': 'High_D', 'From': 61.5, 'Up to': 68.0, 'Mid-point': 64.75},
{'Band': 'Low_D', 'From': 55.0, 'Up to': 61.5, 'Mid-point': 58.25},
{'Band': 'High_E', 'From': 46.5, 'Up to': 54.0, 'Mid-point': 50.25},
{'Band': 'Low_E', 'From': 39.0, 'Up to': 46.5, 'Mid-point': 42.75},
{'Band': 'High_F', 'From': 29.5, 'Up to': 38.0, 'Mid-point': 33.75},
{'Band': 'Low_F', 'From': 21.0, 'Up to': 29.5, 'Mid-point': 25.25},
{'Band': 'High_G', 'From': 10.5, 'Up to': 20.0, 'Mid-point': 15.25},
{'Band': 'Low_G', 'From': 1.0, 'Up to': 10.5, 'Mid-point': 5.75}
]
def __init__(
self,
tenure: HousingType,
starting_epc,
starting_sap,
postcode,
floor_area,
council_tax_band,
property_recommendations,
project_scores_matrix,
whlg_eligible_postcodes,
gbis_abs_rate: int,
eco4_abs_rate: int,
):
"""
Use Pydantic to validate the parameter types
:param tenure: Indicates if the property is a social or private home
:param starting_epc: The current EPC rating of the property
:param starting_sap: The current SAP score for the property
:param floor_area: The total floor area of the property
:param council_tax_band: The council tax band of the property
:param property_recommendations: The recommendations for the property
:param project_scores_matrix: The matrix of project scores for ECO4
:param whlg_eligible_postcodes: The postcodes eligible for WHLG
:param gbis_abs_rate: The assumed £/abs achieved by the installer for GBIS
:param eco4_abs_rate: The assumed £/abs achieved by the installer for ECO4
"""
# TODO: Things we need to include:
# 1) Amount of funding
# 2) Fundable measures, as a subset of measures may be fundable, not all
self.tenure = tenure
self.starting_epc = starting_epc
self.starting_sap = starting_sap
self.postcode = postcode
self.starting_eco_band = self.sap_to_eco_band(self.starting_sap)
self.floor_area_segment = self.classify_floor_area(floor_area)
self.gbis_abs_rate = gbis_abs_rate
self.eco4_abs_rate = eco4_abs_rate
self.council_tax_band = council_tax_band
self.recommendations = property_recommendations
self.measure_types = list({r["measure_type"] for r in property_recommendations if r["default"]})
# Load in the eco4 project scores matrix
# Filter the matrix on scores relevant to this property
self.project_scores_matrix = project_scores_matrix[
(project_scores_matrix["Floor Area Segment"] == self.floor_area_segment) &
(project_scores_matrix["Starting Band"] == self.starting_eco_band)
]
# The postcode column is already lower case
self.whlg_eligible_postcodes = whlg_eligible_postcodes[
whlg_eligible_postcodes["Postcode"] == self.postcode.lower()
]
# Store the final outputs
self.gbis_eligibiltiy = {}
self.eco4_eligibility = {}
self.whlg_eligibility = {}
def output(
self,
scheme: str,
eligible: bool,
types: List[str],
measure_types: List[str],
project_score: float,
estimated_funding: float,
notify_tenant_benefits_requirements: bool,
notify_council_tax_band_requirements: bool,
notify_tenant_low_income_requirements: bool,
innovation_required: bool,
):
""""
"""
if scheme not in self.SCHEMES:
raise ValueError("Scheme not recognised")
return {
"scheme": scheme,
"eligible": eligible,
"type": types,
"measure_types": measure_types,
"project_score": project_score,
"estimated_funding": estimated_funding,
"requires_benefits": notify_tenant_benefits_requirements,
"requires_council_tax_band": notify_council_tax_band_requirements,
"requires_low_income": notify_tenant_low_income_requirements,
"innovation_required": innovation_required,
}
@staticmethod
def classify_floor_area(floor_area):
if floor_area <= 72:
return "0-72"
if floor_area <= 97:
return "73-97"
if floor_area <= 199:
return "98-199"
return "200"
def eco4(self):
"""
Checks if a property is eligible for ECO4
:return:
"""
pass
def find_gbis_measures(self, measures):
"""
The best measure is one that:
1) Creates some SAP movement, therefore enables eligiblity
2) Generates the most funding
3) Has a reasonable ROI
:return:
"""
measure_table = pd.DataFrame([
m for m in self.recommendations if
(m["type"] in measures) or (m["measure_type"] in measures) and m["default"]
])
measure_table["post_install_sap"] = measure_table["sap_points"] + self.starting_sap
# We classify the movement
measure_table["Finishing Band"] = np.floor(measure_table["post_install_sap"]).apply(
lambda points: self.sap_to_eco_band(points)
)
# Remove any measures that generate zero SAP movement
measure_table = measure_table[measure_table["Finishing Band"] != self.starting_eco_band]
if measure_table.empty:
raise NotImplementedError("No measures available, handle me!")
# We merge on the project matrix, on post install band
measure_table = measure_table.merge(
self.project_scores_matrix, how="left", on="Finishing Band"
)
# Cost Savings is the abs
measure_table["estimated_funding"] = measure_table["Cost Savings"] * self.gbis_abs_rate
# We cap any estimated funding at the install cost
measure_table["estimated_funding"] = np.where(
measure_table["estimated_funding"] >= measure_table["total"],
measure_table["total"],
measure_table["estimated_funding"]
)
# Sort by the measure that will cost the client the least, per sap point
measure_table["cost_minus_funding"] = measure_table["total"] - measure_table["estimated_funding"]
measure_table["cost_minus_funding_per_sap"] = measure_table["cost_minus_funding"] / measure_table["sap_points"]
measure_table = measure_table.sort_values(["cost_minus_funding_per_sap", "total"], ascending=[True, False])
return measure_table[
["type", "measure_type", "Cost Savings", "estimated_funding"]
].rename(columns={"Cost Savings": "project_score"}).to_dict("records")
def sap_to_eco_band(self, sap_points):
"""
Giuven a sap point score, this function will classify the points into the SAP half-band
:param sap_points:
:return:
"""
if sap_points > 100:
return "High_A"
classification = [
x for x in self.ECO_SAP_SCORE_THREHOLDS if (x["From"] <= sap_points) and (sap_points <= x["Up to"])
]
if len(classification) != 1:
raise Exception("We should have a single classifcation for SAP points to half band")
return classification[0]['Band']
def gbis_prs(self):
"""
Checks if a private rental is eligible for GBIS. There are the following possible options
1) General Eligibilty, contigent on EPC D-G and council tax band A-D. Excludes CWI, LI and heating
controls
2) Low income group - contigent on EPC D-G and tenant must receive benefits. Excludes heating controls
3) GBIS Flex route 1, 3 - Great British Insulation Scheme Routes 1 and 3 are for pre-installation
SAP bands D-G for owner-occupied households, D-E for private rented sector households
(Including F & G if exempt from MEES). If houseold is low income. Excludes heating controls
4) GBIS Flex route 2 - EPC E - G and low income household. Excludes heating controls
Eligible measures:
Solid wall
pitched roof
flat roof
under floor
solid floor park home and
room in-roof insulation
:return:
"""
valid_measures = [
"internal_wall_insulation",
"external_wall_insulation",
"flat_roof_insulation",
"suspended_floor_insulation",
"room_roof_insulation",
# Not available for every eligiblity type
"cavity_wall_insulation",
"loft_insulation",
]
# General Eligibility
if (
(self.starting_epc in ["G", "D", "E", "F"]) and
any(
[measure in valid_measures for measure in self.measure_types
if measure not in ["cavity_wall_insulation", "loft_insulation"]]
) and
(self.council_tax_band in [None, "A", "B", "C", "D"])
):
# This function pulls out the various measures that can provide funding under GBIS
recommended_measures = self.find_gbis_measures(
measures=[m for m in valid_measures if m not in ["cavity_wall_insulation", "loft_insulation"]]
)
# If the council tax band is missing, we nofify the customer that this is a requirement that
# should be checked
return [
self.output(
scheme="gbis",
eligible=True,
types=[m["type"]], # This is single measure so we only have one type
measure_types=[m["measure_type"]],
project_score=m["project_score"],
estimated_funding=m["estimated_funding"],
notify_tenant_benefits_requirements=False,
notify_council_tax_band_requirements=self.council_tax_band is None,
notify_tenant_low_income_requirements=False,
innovation_required=False
) for m in recommended_measures
]
# Low income/flex
if (
(self.starting_sap in ["G", "D", "E", "F"]) and
any([measure in valid_measures for measure in self.measure_types])
):
# Find the best measure, and can also include CWI/LI but requires the tenant to be
# low inome or on benefits
# We find the best measure for GBIS
recommended_measures = self.find_gbis_measures(measures=valid_measures)
return [
self.output(
scheme="gbis",
eligible=True,
types=[m["type"]], # This is single measure so we only have one type
measure_types=[m["measure_type"]],
project_score=m["project_score"],
estimated_funding=m["estimated_funding"],
notify_tenant_benefits_requirements=True,
notify_council_tax_band_requirements=False,
notify_tenant_low_income_requirements=True,
innovation_required=False
) for m in recommended_measures
]
# Otherwise, no funding availability
return []
def gbis_social(self):
"""
Because this is social housing, we have two typical means for eligibility
1) EPC D, where an innovation measure is required
2) EPC G-E, where an innovation measure isn't required
:return:
"""
valid_measures = [
"internal_wall_insulation",
"external_wall_insulation",
"flat_roof_insulation",
"suspended_floor_insulation",
"room_roof_insulation",
# Not available for every eligiblity type
"cavity_wall_insulation",
"loft_insulation",
"heating_control"
]
recommended_measures = self.find_gbis_measures(
measures=valid_measures
)
# All measures are available
if self.starting_sap == "D":
return [
self.output(
scheme="gbis",
eligible=True,
types=[m["type"]], # This is single measure so we only have one type
measure_types=[m["measure_type"]],
project_score=m["project_score"],
estimated_funding=m["estimated_funding"],
notify_tenant_benefits_requirements=False,
notify_council_tax_band_requirements=False,
notify_tenant_low_income_requirements=False,
innovation_required=True
) for m in recommended_measures
]
if self.starting_sap in ["G", "F", "E"]:
return [
self.output(
scheme="gbis",
eligible=True,
types=[m["type"]], # This is single measure so we only have one type
measure_types=[m["measure_type"]],
project_score=m["project_score"],
estimated_funding=m["estimated_funding"],
notify_tenant_benefits_requirements=False,
notify_council_tax_band_requirements=False,
notify_tenant_low_income_requirements=False,
innovation_required=False
) for m in recommended_measures
]
return []
def gbis(self):
"""
Check if a property is eligible for GBIS
:return:
"""
if self.tenure == "Private":
self.gbis_eligibiltiy = self.gbis_prs()
return
if self.tenure == "Social":
self.gbis_eligibiltiy = self.gbis_social()
raise NotImplementedError("Implement social/oo")
def whlg(self):
if self.tenure == "Social":
# We can't do anything for social housing
self.whlg_eligibility = []
return
if not self.whlg_eligible_postcodes.empty:
raise Exception("Implement me")
# self.whlg_eligibility = [
# self.output(
# scheme,
# eligible,
# types,
# measure_types,
# project_score: float,
# estimated_funding: float,
# notify_tenant_benefits_requirements: bool,
# notify_council_tax_band_requirements: bool,
# notify_tenant_low_income_requirements: bool,
# innovation_required: bool,
# )
# ]
def eco4(self):
if self.tenure == "Private":
self.eco4_eligibiltiy = self.eco4_prs()
return
def check_eligibiltiy(self):
"""
This function instigates the checking process
:return:
"""
self.gbis()
# self.eco4()
self.whlg()

View file

@ -22,6 +22,7 @@ from recommendations.recommendation_utils import (
)
from backend.ml_models.AnnualBillSavings import AnnualBillSavings
from backend.app.utils import sap_to_epc
from backend.Funding import Funding
import backend.app.assumptions as assumptions
ENVIRONMENT = os.environ.get("ENVIRONMENT", "dev")
@ -69,6 +70,10 @@ class Property:
# Contains the solar panel optimisation results from the Google Solar API
solar_panel_configuration = None
# If true, indicates the floor area has actually been given to us by the owner, and we should use this figure
# instead of the one in the EPC, when we simulate
owner_floor_area = False
def __init__(
self,
id,
@ -103,7 +108,7 @@ class Property:
self.already_installed = ast.literal_eval(already_installed['already_installed']) if already_installed else []
self.non_invasive_recommendations = (
ast.literal_eval(non_invasive_recommendations['recommendations']) if
non_invasive_recommendations['recommendations'] if
non_invasive_recommendations else []
)
# This is a list of measures that have been recommended for the property
@ -132,9 +137,14 @@ class Property:
self.energy_cost_estimates = {}
self.energy_consumption_estimates = {}
# when storing the energy, we'll also
self.energy = {
"primary_energy_consumption": epc_record.get("energy_consumption_current"),
"co2_emissions": epc_record.get("co2_emissions_current"),
"epc_co2_emissions": epc_record.get("co2_emissions_current"),
# These will be added in once we estimate the amount of emissions from appliances - using the carbon
# intensity of electricity
"appliances_co2_emissions": None,
"co2_emissions": None
}
self.ventilation = {
"ventilation": epc_record.get("mechanical_ventilation"),
@ -202,6 +212,11 @@ class Property:
# TODO: We keep this but only temporarily until we add bathrooms, bedrooms, building id to the condition data
self.parse_kwargs(kwargs)
# Funding
self.gbis_eligibiltiy = None
self.eco4_eligibility = None
self.whlg_eligibility = None
@classmethod
def extract_kwargs(cls, kwargs):
"""
@ -215,25 +230,24 @@ class Property:
# as we collect more data from the energy assessment
n_bathrooms = kwargs.get("n_bathrooms", None)
if n_bathrooms not in [None, ""]:
# We add on a small value to ensure that the number of bathrooms is rounded up, in case the value is 0.5
n_bathrooms = int(round(float(n_bathrooms) + 1e-5))
# We add on a small value to ensure that the number of bathrooms is rounded up, in case the value is 0.5
n_bathrooms = int(round(float(n_bathrooms) + 1e-5)) if n_bathrooms not in [None, ""] else None
n_bedrooms = kwargs.get("n_bedrooms", None)
if n_bedrooms not in [None, ""]:
n_bedrooms = int(round(float(n_bedrooms) + 1e-5))
n_bedrooms = int(round(float(n_bedrooms) + 1e-5)) if n_bedrooms not in [None, ""] else None
number_of_floors = kwargs.get("number_of_floors", None)
if number_of_floors not in [None, ""]:
number_of_floors = int(round(float(number_of_floors) + 1e-5))
number_of_floors = int(round(float(number_of_floors) + 1e-5)) if number_of_floors not in [None, ""] else None
insulation_floor_area = kwargs.get("insulation_floor_area", None)
if insulation_floor_area not in [None, ""]:
insulation_floor_area = float(insulation_floor_area)
insulation_floor_area = float(insulation_floor_area) if insulation_floor_area not in [None, ""] else None
insulation_wall_area = kwargs.get("insulation_wall_area", None)
if insulation_wall_area not in [None, ""]:
insulation_wall_area = float(insulation_wall_area)
insulation_wall_area = float(insulation_wall_area) if insulation_wall_area not in [None, ""] else None
# We allow for the asset owner to provide us with total floor area, in the event of it being incorrect
floor_area = kwargs.get("floor_area", None)
floor_area = float(floor_area) if floor_area not in [None, ""] else None
return {
"n_bathrooms": n_bathrooms,
@ -242,12 +256,15 @@ class Property:
"insulation_floor_area": insulation_floor_area,
"insulation_wall_area": insulation_wall_area,
"building_id": kwargs.get("building_id", None),
"floor_area": floor_area
}
def parse_kwargs(self, kwargs):
# We extract the elements from kwargs that we recognise. Anything additional is ignored
for arg, val in kwargs.items():
if val is not None:
if arg == "floor_area":
self.owner_floor_area = True
setattr(self, arg, val)
def create_base_difference_epc_record(self, cleaned_lookup: dict):
@ -257,14 +274,7 @@ class Property:
It will be the same starting and ending EPC, as we don't have the expected EPC yet
"""
# difference_record = self.epc_record - self.epc_record
# TODO: change these lower and replace in the settings file
# print(
# "CHANGE THE LATEST FIELD TO REMOVE NUMBER HABITABLE ROOMS IF WE WANT TO USE STARTING/ENDING"
# )
fixed_data_col_names = MANDATORY_FIXED_FEATURES + LATEST_FIELD
# print("NEED TO CHANGE THE DASH TO LOWER CASE")
fixed_data_col_names = [
x.lower().replace("_", "-") for x in fixed_data_col_names
]
@ -275,8 +285,6 @@ class Property:
if k in fixed_data_col_names
}
# difference_record.append_fixed_data(fixed_data)
difference_record = self.epc_record.create_EPCDifferenceRecord(
self.epc_record, fixed_data
)
@ -285,10 +293,11 @@ class Property:
datasets=[difference_record], cleaned_lookup=cleaned_lookup
)
# TODO: adjust the base difference record with the previously calculated u values + features
# estimated_perimeter is different to the perimeter in the epc record
# self.base_difference_record.df
# If we have variables that have been given to us by the landlord that we know are correct, whereas the EPC
# may not be, we use them
if self.owner_floor_area is not None:
self.base_difference_record.df["total_floor_area_ending"] = self.floor_area
self.base_difference_record.df["estimated_perimeter_ending"] = self.perimeter
def simulate_all_representative_recommendations(
self, property_representative_recommendations,
@ -374,7 +383,7 @@ class Property:
for rec in property_recommendations_by_phase:
# We simulate the impact of the recommendation at this current phase, and all of the prior phases
if rec["type"] in ["mechanical_ventilation", "trickle_vents", "draught_proofing"]:
if rec["type"] in ["trickle_vents", "draught_proofing"]:
continue
scoring_dict = self.create_recommendation_scoring_data(
@ -382,8 +391,8 @@ class Property:
recommendation_record=recommendation_record,
recommendations=previous_phase_representatives + [rec],
primary_recommendation_id=rec["recommendation_id"],
non_invasive_recommendations=self.non_invasive_recommendations,
)
self.recommendations_scoring_data.append(scoring_dict)
simulation_epc = self.epc_record.prepared_epc.copy()
@ -426,6 +435,18 @@ class Property:
if phase_epc_transformation[k] == v:
continue
if k == "hotwater-description":
if (
v == "From main system"
) and (
phase_epc_transformation["mainheat-description"] == "Electric storage heaters"
) and (
"Electric immersion" in phase_epc_transformation["hotwater-description"]
):
# It means we've recommended HHR with electric immersion, and shouldn't overwrite
# the hot water description
continue
raise NotImplementedError(
"Already have this key in the phase_epc_transformation - implement me"
)
@ -441,7 +462,7 @@ class Property:
if self.simulation_epcs is None:
raise ValueError("Simulation EPCs have not been created")
rec_ids = sorted(list(self.simulation_epcs.keys()))
rec_ids = list(self.simulation_epcs.keys())
updated_simulation_epcs = []
for rec_id in rec_ids:
sim_epc = self.simulation_epcs[rec_id].copy()
@ -467,15 +488,12 @@ class Property:
# Now we havet this data inthe
self.updated_simulation_epcs = updated_simulation_epcs
return updated_simulation_epcs
@staticmethod
def create_recommendation_scoring_data(
property_id,
recommendation_record,
recommendations: list,
primary_recommendation_id: int,
non_invasive_recommendations: list = None,
):
"""
This function will iterate through a list of recommendations and apply a simulation for each recommendation
@ -484,7 +502,6 @@ class Property:
:param recommendation_record: The record of the property, which will be updated
:param recommendations: The list of recommendations to apply
:param primary_recommendation_id: The id of the primary recommendation, which is used to identify the record
:param non_invasive_recommendations: The list of non-invasive recommendations
:return: The updated recommendation record
"""
@ -513,7 +530,7 @@ class Property:
"internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation",
"cylinder_thermostat", "loft_insulation", "room_roof_insulation", "flat_roof_insulation",
"solid_floor_insulation", "suspended_floor_insulation", "mixed_glazing",
"windows_glazing"
"windows_glazing", "mechanical_ventilation"
]:
# We update the data, as defined in the recommendaton
for prefix in ["walls", "roof", "floor"]:
@ -539,7 +556,7 @@ class Property:
"solid_floor_insulation", "suspended_floor_insulation",
"windows_glazing", "solar_pv", "heating", "hot_water_tank_insulation",
"heating_control", "secondary_heating", "cylinder_thermostat", "mixed_glazing",
"extension_cavity_wall_insulation",
"extension_cavity_wall_insulation", "mechanical_ventilation",
]:
raise NotImplementedError(
"Implement me, given type %s" % recommendation["type"]
@ -707,6 +724,15 @@ class Property:
"unadjusted": unadjusted_kwh_estimates
}
# Update carbon with appliances
self.energy["appliances_co2_emissions"] = (
(unadjusted_kwh_estimates["appliances"] * assumptions.ELECTRICITY_CARBON_INTENSITY) / 1000
)
# Re-calculate total CO2 emissions
self.energy["co2_emissions"] = float(np.round(
self.energy["epc_co2_emissions"] + self.energy["appliances_co2_emissions"], 2
))
def set_spatial(self, spatial: pd.DataFrame):
"""
Sets whether the property is in a conservation area given the output of the ConservationAreaClient
@ -1226,6 +1252,15 @@ class Property:
if (self.building_id is not None) and (self.solar_panel_configuration is not None):
return True
# If the property is in a conservation area, is listed or is a heriage building, solar panels
# become a difficult measure to generally get through planning restrictions and so we do not recommend
# solar panels
if self.is_listed or self.is_heritage:
# If the property is in a conservation area, we can still recommend solar panels
# but they need to be done in a way that is sympathetic to the building. E.g. the panels
# may be installed such that they are not visible from the street
return False
is_valid_property_type = self.data["property-type"] in ["House", "Bungalow", "Maisonette"]
is_valid_roof_type = (
self.roof["is_flat"] or self.roof["is_pitched"] or self.roof["is_roof_room"]
@ -1294,3 +1329,11 @@ class Property:
)
return electric_consumption
def insert_funding(self, funding_calulator: Funding):
"""
This method inserts the funding into the property object
"""
self.gbis_eligibiltiy = funding_calulator.gbis_eligibiltiy
self.eco4_eligibility = funding_calulator.eco4_eligibility
self.whlg_eligibility = funding_calulator.whlg_eligibility

View file

@ -2,6 +2,7 @@ import os
import time
import re
from urllib.parse import urlencode
import usaddress
import pandas as pd
import numpy as np
@ -95,7 +96,7 @@ vartypes = {
'walls-env-eff': 'str',
'transaction-type': 'str',
# 'uprn': "Int64",
'current-energy-efficiency': 'float',
'current-energy-efficiency': 'Int64',
'energy-consumption-current': 'float',
'mainheat-description': 'str',
'lighting-cost-current': 'float',
@ -138,8 +139,8 @@ class SearchEpc:
}
NODATA = {
"status": 201,
"message": "No data",
"status": 204,
"message": "no data",
"error": None
}
@ -154,7 +155,7 @@ class SearchEpc:
uprn: [int, None] = None,
size=None,
property_type=None,
fast=False
fast=False,
):
"""
Address lines 1 and postcode are mandatory fields. The other address lines are optional
@ -206,10 +207,15 @@ class SearchEpc:
try:
# Updated regex to catch house numbers including alphanumeric ones
pattern = r'(?i)(?:flat|apartment)\s*(\d+\w*)|^\s*(\d+\w*)'
match = re.search(pattern, address)
if match:
return next(g for g in match.groups() if g is not None)
pattern = r'(?i)(?:flat|apartment|room)\s*(\d+\w*)|^\s*(\d+\w*)'
match1 = re.search(pattern, address)
if match1:
return next(g for g in match1.groups() if g is not None)
pattern2 = r'(?i)(flat|apartment|room)\s*([a-zA-Z]?\d+[a-zA-Z]?)'
match2 = re.search(pattern2, address)
if match2:
return match2.group(2)
parsed = usaddress.parse(address)
# First, try to get the 'OccupancyIdentifier' if 'OccupancyType' is detected
@ -220,7 +226,8 @@ class SearchEpc:
continue
if part == postcode.split(" ")[1]:
continue
return part # This assumes the first 'OccupancyIdentifier' after 'OccupancyType' is the primary
return part.rstrip(",")
# This assumes the first 'OccupancyIdentifier' after 'OccupancyType' is the primary
# number
# Fallback to 'AddressNumber' if no 'OccupancyIdentifier' is found
@ -247,46 +254,36 @@ class SearchEpc:
else:
return None
def get_epc(self, params=None, size=None):
# Get the EPC data with retries
size = size if size is not None else self.size
if params is None:
if self.uprn:
params = {"uprn": self.uprn}
else:
params = {"address": self.address1, "postcode": self.postcode}
def _get_epc(self, params, size):
"""
To be called by get_epc() - not for external usage
"""
url = os.path.join(self.client.domestic.host, "search")
if size:
url += "?" + urlencode({k: v for k, v in {"size": size}.items() if v})
for retry in range(self.max_retries):
try:
if "uprn" in params:
# We use the direct call method inside, since we need to implement uprn as a valid
# parameter for the search function
url = os.path.join(self.client.domestic.host, "search")
response = self.client.domestic.call(method="get", url=url, params=params)
else:
response = self.client.domestic.search(params=params, size=size)
response = self.client.domestic.call(method="get", url=url, params=params)
if response:
self.data = response
return self.SUCCESS
return {
"response": response,
"msg": self.SUCCESS
}
if retry > 0:
logger.info("Failed previous attempt but retry successful")
# If we got nothing, final try
if not response:
return {
"status": 204,
"message": "no data",
"error": None
"response": response,
"msg": self.NODATA
}
return {
"status": 200,
"message": "success",
"error": None
}
except Exception as e:
if retry < self.max_retries - 1:
# If not the last retry, wait for 3 seconds before retrying
@ -294,11 +291,66 @@ class SearchEpc:
else:
# If it's the last retry, we continue
return {
"status": 500,
"message": "Could not retrieve EPC data",
"error": str(e)
"response": {},
"msg": {
"status": 500,
"message": "Could not retrieve EPC data",
"error": str(e)
}
}
def get_epc(self, params=None, size=None):
# Get the EPC data with retries
size = size if size is not None else self.size
if params:
output = self._get_epc(params=params, size=size)
if output["msg"]["status"] == 200:
self.data = output["response"]
return output["msg"]
if not self.uprn and not self.address1 and not self.postcode:
raise ValueError("No search parameters provided")
uprn_params = {"uprn": self.uprn} if self.uprn else {}
address_params = {}
if self.address1:
address_params["address"] = self.address1
if self.postcode:
address_params["postcode"] = self.postcode
# We attempt the search with uprn params
data = {"rows": []}
api_response = {}
if uprn_params:
api_response = self._get_epc(params=uprn_params, size=size)
if api_response["msg"]["status"] == 200:
data["rows"].extend(api_response["response"]["rows"])
# If we were unsuccessful, we then make a second attempt to fetch the data. We find that
# properties are sometimes listed under the wrong UPRN
if address_params:
api_response = self._get_epc(params=address_params, size=size)
if api_response["msg"]["status"] == 200:
# We update the data with the correct uprn
if self.uprn:
for x in api_response["response"]["rows"]:
x["uprn"] = self.uprn
data["rows"].extend(api_response["response"]["rows"])
# We no de-dupe on lmk-key to avoid duplicates
seen = set()
data["rows"] = [
row for row in data["rows"]
if row["lmk-key"] not in seen and not seen.add(row["lmk-key"])
]
if data["rows"]:
api_response["msg"] = self.SUCCESS
return api_response["msg"]
def filter_rows(self, rows, property_type=None, address=None):
"""
This method should not be used when property_type and address are both not None
@ -343,8 +395,12 @@ class SearchEpc:
rows_filtered = [r for r in rows if ", ".join([r["address"], r["posttown"]]) == best_match[0]]
else:
best_match = process.extractOne(address, [r["address"] for r in rows], score_cutoff=0)
# Get the UPRN for the best match
best_match_uprn = {r["uprn"] for r in rows if r["address"] == best_match[0]}.pop()
# Get all of the scores
rows_filtered = [r for r in rows if r["address"] == best_match[0]]
rows_filtered = [
r for r in rows if (r["address"] == best_match[0]) or (r["uprn"] == best_match_uprn)
]
if rows_filtered:
return rows_filtered
@ -643,6 +699,7 @@ class SearchEpc:
estimation_data = epc_data[[key, "weight", "lodgement-datetime"]].copy()
estimation_data = estimation_data[~pd.isnull(estimation_data[key])]
estimation_data = estimation_data[~estimation_data[key].isin(Definitions.DATA_ANOMALY_MATCHES)]
if vartype == "Int64":
# We have some edge cases where we get the error "invalid literal for int() with base 10: '1.0'"
# so this handles this
@ -654,6 +711,13 @@ class SearchEpc:
estimated_epc[key] = None
continue
if key == "floor-height":
# We speficially handle this, to avoid extreme values
# We check if we have any rows less than 3.5m
if estimation_data[estimation_data["floor-height"].astype(float) <= 3.5].shape[0] > 0:
# Perform the filter
estimation_data = estimation_data[estimation_data["floor-height"].astype(float) <= 3.5]
if vartype == "Int64":
estimated_value = self._estimate_int(estimation_data, key)
elif vartype == "float":
@ -676,7 +740,30 @@ class SearchEpc:
estimated_epc["current-energy-rating"] = sap_to_epc(estimated_epc["current-energy-efficiency"])
# Convert the cost current and potential variables - to string integers
for variable in ["heating-cost-current", "hot-water-cost-current", "lighting-cost-current",
"heating-cost-potential", "hot-water-cost-potential", "lighting-cost-potential"]:
estimated_epc[variable] = str(int(estimated_epc[variable]))
# This is a string
estimated_epc["low-energy-fixed-light-count"] = (
str(estimated_epc["low-energy-fixed-light-count"]) if estimated_epc["low-energy-fixed-light-count"] else ""
)
# This is an int
estimated_epc["photo-supply"] = (
int(np.round(estimated_epc["photo-supply"])) if estimated_epc["photo-supply"] else estimated_epc[
"photo-supply"]
)
estimated_epc["co2-emiss-curr-per-floor-area"] = (
estimated_epc["co2-emissions-current"] / estimated_epc["total-floor-area"]
)
estimated_epc["postcode"] = self.postcode
if not self.uprn:
# Update self.uprn too
self.uprn = hash(self.address1 + self.postcode)
estimated_epc["uprn"] = self.uprn
estimated_epc["address"] = self.full_address
# Indicate that this epc was estimated

View file

@ -9,8 +9,7 @@ from tqdm import tqdm
from math import sin, cos, sqrt, atan2, radians
from utils.logger import setup_logger
from recommendations.Costs import Costs, MCS_SOLAR_PV_COST_DATA
from etl.bill_savings.EnergyConsumptionModel import EnergyConsumptionModel
from recommendations.Costs import Costs
from backend.ml_models.AnnualBillSavings import AnnualBillSavings
from backend.Property import Property
from backend.app.db.functions.solar_functions import get_solar_data, store_batch_data
@ -51,6 +50,16 @@ class GoogleSolarApi:
MIN_UNIT_PANELS = 4 # Minimum number of panels we allow for a domestic building
MIN_BUILDING_PANELS = 10 # Minimum number of panels we allow for a block of flats
# Max area of a roof space we allow panels for
PERCENTAGE_OF_ROOF_LIMIT = 0.8
# If the roof area that comes back from the solar API is more than 25% larger than the estiamted roof area
# that we calcualte based on the property dimensions, we will correct the roof area
ROOF_AREA_TOLERANCE = 1.25
# Error Messages
ENTITY_NOT_FOUND_ERROR = 'Requested entity was not found.'
def __init__(self, api_key, max_retries=5):
"""
Initialize the GoogleSolarApi class with the provided API key and maximum retries.
@ -109,6 +118,13 @@ class GoogleSolarApi:
response.raise_for_status() # Raise an error for bad status codes
return response.json()
except requests.exceptions.RequestException as e:
if (
(e.response.status_code == 404) &
(e.response.json()["error"]["message"] == self.ENTITY_NOT_FOUND_ERROR)
):
logger.warning("No building insights found for the given location.")
return {"error": self.ENTITY_NOT_FOUND_ERROR}
attempt += 1
print(f"Attempt {attempt} failed: {e}")
time.sleep(2 ** attempt) # Exponential backoff
@ -152,6 +168,10 @@ class GoogleSolarApi:
# If we have no data in the db, or updated_at is more than 6 months
if self.insights_data is None or is_outdated:
self.insights_data = self.get_building_insights(longitude, latitude, required_quality)
if self.insights_data.get("error") == self.ENTITY_NOT_FOUND_ERROR:
# We use default performance since in this case, we couldn't retrieve data. We don't store
self.panel_performance = self.default_panel_performance(property_instance=property_instance)
return
self.need_to_store = True
# Extract key data from the insights response
@ -159,12 +179,19 @@ class GoogleSolarApi:
# Automatically exclude north-facing segments
self.exclude_north_facing_segments(property_instance=property_instance)
# If a property is semi-detached, it's possible for us to include segments from an attached unit
if (property_instance.data["built-form"] == "Semi-Detached") and (
property_instance.data["extension-count"] == 0
):
self.exclude_likely_duplicate_surfaces()
if property_instance is not None:
if (property_instance.data["built-form"] == "Semi-Detached") and (
property_instance.data["extension-count"] == 0
):
self.exclude_likely_duplicate_surfaces()
# We constrain the roof area, based on the floor area to be more conservative
self.roof_area = self.insights_data["solarPotential"]["wholeRoofStats"]['areaMeters2']
if (
self.roof_area > property_instance.roof_area * self.ROOF_AREA_TOLERANCE
) | (self.roof_area < (2 - self.ROOF_AREA_TOLERANCE) * property_instance.roof_area):
self.roof_area = property_instance.roof_area
self.floor_area = self.insights_data["solarPotential"]["wholeRoofStats"]['groundAreaMeters2']
self.panel_wattage = self.insights_data["solarPotential"]["panelCapacityWatts"]
if self.panel_wattage != 400:
@ -179,7 +206,9 @@ class GoogleSolarApi:
# We now start finding the solar panel configurations
self.optimise_solar_configuration(
energy_consumption=energy_consumption, is_building=is_building, property_instance=property_instance
energy_consumption=energy_consumption,
is_building=is_building,
property_instance=property_instance
)
# Finally, if we have a double property, we half the data we stored area
@ -259,8 +288,6 @@ class GoogleSolarApi:
# minimum is 4
min_panels = self.MIN_BUILDING_PANELS if is_building else self.MIN_UNIT_PANELS
cost_instance = Costs(property_instance=property_instance) if property_instance is not None else None
# Remove any north facing roof segments
panel_performance = []
for config in self.insights_data["solarPotential"].get("solarPanelConfigs", []):
@ -294,14 +321,12 @@ class GoogleSolarApi:
if roi_summary["n_panels"].sum() < min_panels:
continue
if cost_instance is None:
total_cost = MCS_SOLAR_PV_COST_DATA["average_cost_per_kwh"] * (wattage / 1000)
else:
total_cost = cost_instance.solar_pv(
n_panels=roi_summary["n_panels"].sum(),
has_battery=False,
n_floors=property_instance.number_of_floors,
)["total"]
total_cost = Costs.solar_pv(
n_panels=roi_summary["n_panels"].sum(),
has_battery=False,
# Assume the most amount of scaffolding
n_floors=3 if property_instance is None else property_instance.number_of_floors
)["total"]
weighted_ratio = np.average(
roi_summary["ratio"].values, weights=roi_summary["generated_dc_energy"].values
@ -491,6 +516,11 @@ class GoogleSolarApi:
panel_performance = panel_performance.drop(columns=["n_panels_halved"])
panel_performance = panel_performance[panel_performance["n_panels"] >= min_panels]
# Finally, we prevent pannelled roof area being above a limit
panel_performance = panel_performance[
panel_performance["panneled_roof_area"] <= self.roof_area * self.PERCENTAGE_OF_ROOF_LIMIT
]
self.panel_performance = panel_performance
def exclude_north_facing_segments(self, property_instance):
@ -792,15 +822,19 @@ class GoogleSolarApi:
property_instance = [p for p in input_properties if p.id == unit["property_id"]][0]
# At this level, we check if the property is suitable for solar and if now, skip
# Or if we have a solar non-invasive recommendation
non_invasive_rec = next(
(r for r in property_instance.non_invasive_recommendations if r["type"] == "solar_pv"), {}
).get("array_wattage")
if (
(not property_instance.is_solar_pv_valid()) or
[r for r in property_instance.non_invasive_recommendations if r["type"] == "solar_pv"]
non_invasive_rec is not None
):
continue
if unit["longitude"] is None or unit["latitude"] is None:
# At this point, we've checked that solar PV is valid, and so we provide some defaults
property_instance.set_solar_panel_configuration(
solar_panel_configuration={
"insights_data": None,
@ -855,19 +889,19 @@ class GoogleSolarApi:
cost_instance = Costs(property_instance=property_instance)
# We return a 2.4 and 4 kwp system
# We return a 1.6 and 3.2 kwp system
panel_performance = pd.DataFrame(
[
{
'n_panels': 10,
'yearly_dc_energy': 4000 * 0.99, # Assumed 99% efficient wattage -> dc
'n_panels': 8,
'yearly_dc_energy': 3200 * assumptions.MEDIAN_WATTAGE_TO_DC,
'total_cost': cost_instance.solar_pv(
n_panels=10, has_battery=False, n_floors=property_instance.number_of_floors
n_panels=8, has_battery=False, n_floors=property_instance.number_of_floors
)["total"],
'weighted_ratio': None,
'panneled_roof_area': 10 * assumptions.RDSAP_AREA_PER_PANEL,
'array_wattage': 4000,
'initial_ac_kwh_per_year': 4000 * 0.95, # Assumed 95% efficient wattage -> ac
'panneled_roof_area': 8 * assumptions.RDSAP_AREA_PER_PANEL,
'array_wattage': 3200,
'initial_ac_kwh_per_year': 3200 * assumptions.MEDIAN_WATTAGE_TO_AC,
'lifetime_ac_kwh': None,
'lifetime_dc_kwh': None,
'roi': None,
@ -879,15 +913,15 @@ class GoogleSolarApi:
'rank': None
},
{
'n_panels': 6,
'yearly_dc_energy': 2400 * 0.99, # Assumed 99% efficient wattage -> dc
'n_panels': 4,
'yearly_dc_energy': 1600 * assumptions.MEDIAN_WATTAGE_TO_DC,
'total_cost': cost_instance.solar_pv(
n_panels=6, has_battery=False, n_floors=property_instance.number_of_floors
)["total"],
'weighted_ratio': None,
'panneled_roof_area': 6 * assumptions.RDSAP_AREA_PER_PANEL,
'array_wattage': 2400,
'initial_ac_kwh_per_year': 2400 * 0.95, # Assumed 95% efficient wattage -> ac
'panneled_roof_area': 4 * assumptions.RDSAP_AREA_PER_PANEL,
'array_wattage': 1600,
'initial_ac_kwh_per_year': 1600 * assumptions.MEDIAN_WATTAGE_TO_AC,
'lifetime_ac_kwh': None,
'lifetime_dc_kwh': None,
'roi': None,

View file

@ -1,7 +1,7 @@
# Assumes that the average efficiency of an air source heat pump is 250%, taking the median of the 200-400% range,
# which is often quoted as a sensible efficiency range for air source heat pumps.
# We assume that the ASHP efficiency is 280%, which is the minimum that Cotswolds Energy Group achieves, as
# they target this
PESSIMISTIC_ASHP_EFFICIENCY = 200
AVERAGE_ASHP_EFFICIENCY = 250
AVERAGE_ASHP_EFFICIENCY = 280
# Conservative estimate of the proportion of electricity that will be consumed, whereas the rest will
# be exported. These are averages based on Google research. E.g
@ -11,9 +11,15 @@ SOLAR_CONSUMPTION_WITH_BATTERY_PROPORTION = 0.7
# Typically, each solar panel takes up around 3.4 m2 of roof space under RdSAP. This was been verified in Elmhurst
RDSAP_AREA_PER_PANEL = 3.4
# This is a median based on a sample of properties
MEDIAN_WATTAGE_TO_AC = 0.965
MEDIAN_WATTAGE_TO_DC = 0.99
SOCIAL_TENURES = ["Rented (social)", "rental (social)"]
# Carbon intensity of electricity, as of 16th Jan 2025
ELECTRICITY_CARBON_INTENSITY = 0.232
DESCRIPTIONS_TO_FUEL_TYPES = {
"Air source heat pump, radiators, electric": {
"fuel": "Electricity", "cop": AVERAGE_ASHP_EFFICIENCY / 100
@ -50,4 +56,12 @@ DESCRIPTIONS_TO_FUEL_TYPES = {
},
"Gas instantaneous at point of use": {"fuel": "Natural Gas", "cop": 0.85},
"Room heaters, wood logs": {"fuel": "Wood Logs", "cop": 1},
"Boiler and radiators, coal": {"fuel": "Coal", "cop": 0.85},
"From main system, no cylinderstat": {"fuel": "Natural Gas", "cop": 0.85},
}
# These are the measure types where if there is a ventilation recommendation, we force the inclusion of it
# if one of these has been recommended.
measures_needing_ventilation = [
"internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation"
]

View file

@ -138,7 +138,7 @@ def upload_recommendations(session: Session, recommendations_to_upload, property
"recommendation_id": recommendation_id,
"material_id": part["id"],
"depth": int(part["depth"]) if part["depth"] else None,
"quantity": part["quantity"],
"quantity": float(part["quantity"]),
"quantity_unit": part["quantity_unit"],
"estimated_cost": part["total"],
}

View file

@ -19,6 +19,7 @@ class MaterialType(enum.Enum):
flat_roof_insulation = "flat_roof_insulation"
room_roof_insulation = "room_roof_insulation"
windows_glazing = "windows_glazing"
cavity_wall_extraction = "cavity_wall_extraction"
iwi_wall_demolition = "iwi_wall_demolition"
iwi_vapour_barrier = "iwi_vapour_barrier"

View file

@ -1,3 +1,4 @@
import ast
import json
from datetime import datetime
@ -27,9 +28,11 @@ from backend.app.dependencies import validate_token
from backend.app.plan.schemas import PlanTriggerRequest
from backend.app.plan.utils import get_cleaned
from backend.app.utils import epc_to_sap_lower_bound, sap_to_epc
import backend.app.assumptions as assumptions
from backend.ml_models.api import ModelApi
from backend.Property import Property
from backend.Funding import Funding
from backend.apis.GoogleSolarApi import GoogleSolarApi
from recommendations.optimiser.CostOptimiser import CostOptimiser
@ -42,6 +45,7 @@ from backend.ml_models.Valuation import PropertyValuation
from etl.bill_savings.KwhData import KwhData
from etl.spatial.OpenUprnClient import OpenUprnClient
from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc
logger = setup_logger()
@ -120,7 +124,7 @@ def extract_portfolio_aggregation_data(
# We can now calculate multiple outputs based on default recommendations
carbon_savings = sum([r["co2_equivalent_savings"] for r in default_recommendations])
pre_retrofit_co2 = p.data["co2-emissions-current"]
pre_retrofit_co2 = p.energy["co2_emissions"]
post_retrofit_co2 = pre_retrofit_co2 - carbon_savings
pre_retrofit_energy_bill = sum(p.current_energy_bill.values())
@ -337,7 +341,10 @@ def extract_property_request_data(
# Because we have some non-invasive recommendations that match on address and postcode, but not UPRN
# we need to check existence of uprn
has_uprn = "uprn" in non_invasive_recommendations[0] if non_invasive_recommendations else True
has_uprn = "uprn" in non_invasive_recommendations[0] if non_invasive_recommendations else False
if has_uprn:
has_uprn = non_invasive_recommendations[0]["uprn"] not in ["", None]
if has_uprn:
property_non_invasive_recommendations = next((
x for x in non_invasive_recommendations if
@ -352,7 +359,6 @@ def extract_property_request_data(
), {})
if isinstance(property_non_invasive_recommendations.get("recommendations"), str):
import ast
property_non_invasive_recommendations["recommendations"] = ast.literal_eval(
property_non_invasive_recommendations["recommendations"]
)
@ -363,16 +369,49 @@ def extract_property_request_data(
else:
transformed.append(rec)
property_non_invasive_recommendations["recommendations"] = str(transformed)
property_non_invasive_recommendations["recommendations"] = transformed
property_valution = next((
float(x["value"]) for x in valuation_data if
(str(x["uprn"]) == str(uprn))
), None)
# Check if the valuation data has uprn
valuation_has_uprn = "uprn" in valuation_data[0] if valuation_data else False
if valuation_has_uprn:
valuation_has_uprn = valuation_data[0]["uprn"] not in ["", None]
if valuation_has_uprn:
property_valution = next((
float(x["valuation"]) for x in valuation_data if
(str(x["uprn"]) == str(uprn))
), None)
else:
property_valution = next((
float(x["valuation"]) for x in valuation_data if
(x["address"] == config["address"]) and (x["postcode"] == config["postcode"])
), None)
return patch, property_already_installed, property_non_invasive_recommendations, property_valution
def get_funding_data():
"""
This function retrieves the eco project scores matrix and the warm homes local grant funding data
:return:
"""
project_scores_matrix = read_csv_from_s3(
bucket_name=get_settings().DATA_BUCKET,
filepath="funding/ECO4 Full Project Scores Matrix.csv",
)
project_scores_matrix = pd.DataFrame(project_scores_matrix)
project_scores_matrix.columns = ['Floor Area Segment', 'Starting Band', 'Finishing Band', 'Cost Savings']
project_scores_matrix["Cost Savings"] = project_scores_matrix["Cost Savings"].astype(float)
whlg_eligible_postcodes = read_csv_from_s3(
bucket_name=get_settings().DATA_BUCKET,
filepath="funding/whlg eligible postcodes.csv",
)
whlg_eligible_postcodes = pd.DataFrame(whlg_eligible_postcodes)
return project_scores_matrix, whlg_eligible_postcodes
router = APIRouter(
prefix="/plan",
tags=["plan"],
@ -393,6 +432,14 @@ async def trigger_plan(body: PlanTriggerRequest):
session.begin()
logger.info("Getting the inputs")
plan_input = read_csv_from_s3(bucket_name=get_settings().PLAN_TRIGGER_BUCKET, filepath=body.trigger_file_path)
# Check for duplicate UPRNS
input_uprns = [x.get("uprn") for x in plan_input if "uprn" in x and x.get("uprn")]
if input_uprns:
# Check for dupes
if len(input_uprns) != len(set(input_uprns)):
raise ValueError("Duplicate UPRNs in the input data")
# If we have patches or overrides, we should read them in here
patches, already_installed, non_invasive_recommendations, valuation_data = get_request_property_data(body)
@ -424,13 +471,22 @@ async def trigger_plan(body: PlanTriggerRequest):
# Create a record in db
property_id, is_new = create_property(
session, body.portfolio_id, epc_searcher.address_clean, epc_searcher.postcode_clean,
epc_searcher.uprn,
energy_assessment
session=session,
portfolio_id=body.portfolio_id,
address=epc_searcher.address_clean,
postcode=epc_searcher.postcode_clean,
uprn=epc_searcher.uprn,
energy_assessment=energy_assessment
)
if not is_new and not body.multi_plan:
continue
if epc_searcher.newest_epc is None:
raise ValueError(
"No EPCs found for this property and did not estimate - likely need to provide a"
"property type and built form"
)
if is_new:
create_property_targets(
session,
@ -459,6 +515,14 @@ async def trigger_plan(body: PlanTriggerRequest):
)
)
# if we have a remote assment data type, we pull the additional data and include it
if body.event_type == "remote_assessment":
logger.info("Retrieving find my epc data")
property_non_invasive_recommendations = RetrieveFindMyEpc.get_from_epc(
epc_searcher.newest_epc
)
# TODO: We need to determine if we should make a patch, if the EPC is new
epc_records = patch_epc(patch, epc_records)
prepared_epc = EPCRecord(
@ -489,7 +553,8 @@ async def trigger_plan(body: PlanTriggerRequest):
model_api = ModelApi(
portfolio_id=body.portfolio_id,
timestamp=created_at,
prediction_buckets=get_prediction_buckets()
prediction_buckets=get_prediction_buckets(),
max_retries=1
)
await model_api.async_warm_up_lambdas(
model_prefies=model_api.KWH_MODEL_PREFIXES + model_api.MODEL_PREFIXES
@ -501,6 +566,7 @@ async def trigger_plan(body: PlanTriggerRequest):
logger.info("Reading in materials and cleaned datasets")
materials = get_materials(session)
cleaned = get_cleaned()
eco_project_scores_matrix, whlg_eligible_postcodes = get_funding_data()
kwh_client = KwhData(bucket=get_settings().DATA_BUCKET, read_consumption_data=True)
@ -584,8 +650,10 @@ async def trigger_plan(body: PlanTriggerRequest):
recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data)
recommendations_scoring_data = recommendations_scoring_data.drop(
columns=["rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
"carbon_ending"]
columns=[
"rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
"carbon_ending"
]
)
all_predictions = await model_api.async_paginated_predictions(
@ -604,6 +672,7 @@ async def trigger_plan(body: PlanTriggerRequest):
property_instance=property_instance,
all_predictions=all_predictions,
recommendations=recommendations,
representative_recommendations=representative_recommendations
)
)
@ -625,8 +694,6 @@ async def trigger_plan(body: PlanTriggerRequest):
)
# We now insert kwh estimates and costs into the recommendations
# TODO: We should join the methodology which maps the heating and hot water descriptions to the fuel types in
# Recommendations, but also the Property class
logger.info("Calculating tenant savings - kwh and bills")
for property_id in tqdm([p.id for p in input_properties]):
property_recommendations = recommendations.get(property_id, [])
@ -636,59 +703,130 @@ async def trigger_plan(body: PlanTriggerRequest):
Recommendations.calculate_recommendation_tenant_savings(
property_instance=property_instance,
kwh_simulation_predictions=kwh_simulation_predictions,
property_recommendations=property_recommendations
property_recommendations=property_recommendations,
ashp_cop=body.ashp_cop
)
)
property_instance.current_energy_bill = property_current_energy_bill
# Insert the predictions into the recommendations and run the optimiser
# TODO: If a recommendation has a negative impact on SAP, we should remove it - this seems to have become a
# possibility with heating system
# TODO: After optimising, if there are any cheap, quick win measures (e.g. insulate water tank with hot water
# cylinder jacket), we should add these to the recommendations as default
for p in input_properties:
if not recommendations.get(p.id):
continue
input_measures = prepare_input_measures(recommendations[p.id], body.goal)
# we need to double unlist because we have a list of lists
property_measure_types = {rec["type"] for recs in recommendations[p.id] for rec in recs}
current_sap_points = int(p.data["current-energy-efficiency"])
target_sap_points = epc_to_sap_lower_bound(body.goal_value)
sap_gain = CostOptimiser.calculate_sap_gain_with_slack(target_sap_points - current_sap_points)
property_required_measures = [
m for m in recommendations[p.id] if m[0]["type"] in body.required_measures
]
measures_to_optimise = [
m for m in recommendations[p.id] if m[0]["type"] not in body.required_measures
]
if not body.optimise:
if body.goal != "Increasing EPC":
raise NotImplementedError("Only EPC optimisation is currently supported")
# If we have a wall insulation measure, we MUST include mechanical ventilation
# Additionally, if we have required measures, they should also be included. Therefore
# we can discount the number of points required to get to the target SAP band (or increase)
# in the case of ventilation
needs_ventilation = any(x in property_measure_types for x in assumptions.measures_needing_ventilation)
input_measures = prepare_input_measures(measures_to_optimise, body.goal, needs_ventilation)
if not input_measures[0]:
# This means that we have no defaults
selected_recommendations = {}
solution = []
for sub_list in input_measures:
# Select the entry with the highest gain, and if tied, choose the one with the lowest cost
best_measure = max(sub_list, key=lambda x: (x['gain'], -x['cost']))
solution.append(best_measure)
else:
if body.budget:
optimiser = GainOptimiser(
input_measures, max_cost=body.budget, max_gain=sap_gain if sap_gain > 0 else 0
fixed_gain = 0
if property_required_measures:
# We get the SAP points for the required measures
if body.goal != "Increasing EPC":
raise NotImplementedError("Only EPC optimisation is currently supported")
sap_by_type = [
{"type": rec["type"], "sap_points": rec["sap_points"]} for recs in property_required_measures
for rec in recs
]
# We get a MAX sap points per type
max_per_type = (
pd.DataFrame(sap_by_type).groupby("type")["sap_points"].max().to_dict()
)
fixed_gain = sum(max_per_type.values())
property_required_measure_types = {rec["type"] for rec in sap_by_type}
# if the property needs ventilation, but the measure we optimise didn't include
# venilation we add the points for ventilation as a fixed gain
if needs_ventilation and any(
r in property_required_measure_types for r in assumptions.measures_needing_ventilation
):
fixed_gain += next(
(r[0]["sap_points"] for r in recommendations[p.id] if
r[0]["type"] == "mechanical_ventilation"),
0
)
current_sap_points = int(p.data["current-energy-efficiency"])
sap_gain = CostOptimiser.calculate_sap_gain_with_slack(
epc_to_sap_lower_bound(body.goal_value) - current_sap_points
) - fixed_gain
if not body.optimise:
if body.goal != "Increasing EPC":
raise NotImplementedError("Only EPC optimisation is currently supported")
solution = []
for sub_list in input_measures:
# Select the entry with the highest gain, and if tied, choose the one with the lowest cost
best_measure = max(sub_list, key=lambda x: (x['gain'], -x['cost']))
solution.append(best_measure)
else:
# The minimum gain is the minimum number of SAP points required to get to the target SAP band
# If the gain is negative, the optimiser will return an empty solution
optimiser = CostOptimiser(
input_measures,
min_gain=sap_gain
)
optimiser.setup()
optimiser.solve()
solution = optimiser.solution
if body.budget:
optimiser = GainOptimiser(
input_measures, max_cost=body.budget, max_gain=sap_gain if sap_gain > 0 else 0
)
else:
# The minimum gain is the minimum number of SAP points required to get to the target SAP band
# If the gain is negative, the optimiser will return an empty solution
optimiser = CostOptimiser(
input_measures,
min_gain=sap_gain
)
selected_recommendations = {r["id"] for r in solution}
optimiser.setup()
optimiser.solve()
solution = optimiser.solution
selected_recommendations = {r["id"] for r in solution}
if property_required_measures:
# We select the cheapest of the required measures, into selected
for recs in property_required_measures:
# We select the cheapest of the required measures
cost_to_id = {
rec["recommendation_id"]: rec["total"] for rec in recs
if rec["recommendation_id"] not in selected_recommendations
}
# Take the recommendation id with the lowers cost
selected_recommendations.add(min(cost_to_id, key=cost_to_id.get))
# Update the solution with the selected recommendaitons
solution = []
for recs in recommendations[p.id]:
for rec in recs:
if rec["recommendation_id"] in selected_recommendations:
solution.append(
{
"id": rec["recommendation_id"],
"cost": rec["total"],
"gain": rec["sap_points"],
"type": rec["type"]
}
)
# If wall insulation is selected, we also include mechanical ventilation as a best practice measure
if any(x in [r["type"] for r in solution] for x in [
"internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation"
]):
if any(x in [r["type"] for r in solution] for x in assumptions.measures_needing_ventilation):
ventilation_rec = next(
(r[0] for r in recommendations[p.id] if r[0]["type"] == "mechanical_ventilation"),
None
@ -717,10 +855,57 @@ async def trigger_plan(body: PlanTriggerRequest):
]
# We'll also unlist the recommendations so they're a bit easier to handle from here onwards
final_recommendations = [
recommendations[p.id] = [
rec for recommendations_by_type in final_recommendations for rec in recommendations_by_type
]
recommendations[p.id] = final_recommendations
# when we have buildings, we tweak our solar PV recommendations as if one unit needs it, we apply it to all
# of them
# TODO: We can probably do better and optimise at the building level - this is temp
logger.info("Adjusting solar PV recommendations for buildings")
building_ids = set([p.building_id for p in input_properties if p.building_id is not None])
for bid in building_ids:
# We check if any of them have solar PV
building = [p for p in input_properties if p.building_id == bid]
has_solar = False
for unit in building:
# Get default recommendations
has_solar = len([r for r in recommendations[unit.id] if r["default"] and r["type"] == "solar_pv"]) > 0
if has_solar:
break
if has_solar:
# We adjust the units within the building
for unit in building:
for rec in recommendations[unit.id]:
if rec["type"] == "solar_pv":
# This is straightforward, we just set the default to True, since when we're at a building
# level, we only allow 1 solar PV option for each unit. If we change this, this logic will
# need to be updated
rec["default"] = True
# ~~~~~~~~~~~~~~~~
# Funding
# ~~~~~~~~~~~~~~~~
# for p in input_properties:
# funding_calulator = Funding(
# tenure=body.housing_type,
# starting_epc=p.data["current-energy-rating"],
# starting_sap=int(p.data["current-energy-efficiency"]),
# postcode=p.postcode,
# floor_area=p.floor_area,
# council_tax_band=None, # This is seemingly always None at the moment
# property_recommendations=recommendations[p.id],
# project_scores_matrix=eco_project_scores_matrix,
# whlg_eligible_postcodes=whlg_eligible_postcodes,
# gbis_abs_rate=15,
# eco4_abs_rate=15,
# )
# funding_calulator.check_eligibiltiy()
# # Insert finding
# p.insert_funding(funding_calulator)
logger.info("Uploading recommendations to the database")
# If we have any work to do, we create a new scenario
@ -759,7 +944,11 @@ async def trigger_plan(body: PlanTriggerRequest):
new_epc = sap_to_epc(new_sap_points)
new_epc_bands[p.id] = new_epc
valuations = PropertyValuation.estimate(property_instance=p, target_epc=new_epc)
total_cost = sum([r["total"] for r in default_recommendations])
valuations = PropertyValuation.estimate(
property_instance=p, target_epc=new_epc, total_cost=total_cost
)
property_value_increase_ranges[p.id] = valuations
if p.is_new:
@ -844,6 +1033,7 @@ async def trigger_plan(body: PlanTriggerRequest):
# Commit final changes
session.commit()
except IntegrityError:
logger.error("Database integrity error occurred", exc_info=True)
session.rollback()

View file

@ -37,6 +37,7 @@ MEASURE_MAP = {
VALID_GOALS = ["Increasing EPC"]
VALID_HOUSING_TYPES = ["Social", "Private"]
VALID_EVENT_TYPES = ["remote_assessment"]
# Define the validation function for inclusions/exclusions
@ -56,10 +57,16 @@ def check_housing_type(value: str) -> str:
return value
def check_event_type(value: str) -> str:
assert value in VALID_EVENT_TYPES, f"{value} is not a valid event type"
return value
# Use Annotated with BeforeValidator for each list item validation
InclusionOrExclusionItem = Annotated[str, BeforeValidator(check_inclusion_or_exclusion)]
Goal = Annotated[str, BeforeValidator(check_goals)]
HousingType = Annotated[str, BeforeValidator(check_housing_type)]
EventType = Annotated[str, BeforeValidator(check_event_type)]
class PlanTriggerRequest(BaseModel):
@ -75,8 +82,17 @@ class PlanTriggerRequest(BaseModel):
valuation_file_path: Optional[str] = None
exclusions: Optional[List[InclusionOrExclusionItem]] = Field(default=None, min_length=1)
inclusions: Optional[List[InclusionOrExclusionItem]] = Field(default=None, min_length=1)
# This is a list of measures that we want to be included, if they are options
# Default to empty
required_measures: Optional[List[InclusionOrExclusionItem]] = Field(default=[], min_length=1)
scenario_name: Optional[str] = ""
multi_plan: Optional[bool] = False
optimise: Optional[bool] = True
default_u_values: Optional[bool] = True
ashp_cop: Optional[float] = 2.8
# When performing a remote assessment, if this has been set, it will allow the engine to
# pull data from the find my epc website, to utilise as part of a remote assessment
event_type: Optional[float] = "remote_assessment",

View file

@ -1,9 +1,5 @@
import pandas as pd
from backend.Property import Property
from utils.s3 import read_from_s3
from recommendations.recommendation_utils import get_wall_u_value, get_floor_u_value, get_roof_u_value
from backend.app.config import get_settings
import msgpack

View file

@ -28,8 +28,8 @@ class AnnualBillSavings:
# Latest price cap figures from Ofgem are for April 2024
# https://www.ofgem.gov.uk/energy-price-cap
ELECTRICITY_PRICE_CAP = 0.2236
GAS_PRICE_CAP = 0.0548
ELECTRICITY_PRICE_CAP = 0.2486
GAS_PRICE_CAP = 0.0634
# This is the most recent export payment figure, at 9.28p/kWh
# Smart export guarantee rates can be found here:
# https://www.sunsave.energy/solar-panels-advice/exporting-to-the-grid/best-seg-rates
@ -39,8 +39,8 @@ class AnnualBillSavings:
PRICE_FACTOR = 0.09549999999999999
# Daily standard charge, based on average across England, Scotland and Wales, and includes VAT
DAILY_STANDARD_CHARGE_GAS = 0.3143
DAILY_STANDARD_CHARGE_ELECTRICITY = 0.601
DAILY_STANDARD_CHARGE_GAS = 0.3165
DAILY_STANDARD_CHARGE_ELECTRICITY = 0.6097
# Based on https://www.nottenergy.com/advice-and-tools/project-energy-cost-comparison
# For July 2024. These quotes are based on the east midlands region, so we

View file

@ -1,5 +1,4 @@
import numpy as np
from scipy.constants import value
class PropertyValuation:
@ -203,12 +202,43 @@ class PropertyValuation:
return msm_increase, lloyds_increase
@classmethod
def estimate(cls, property_instance, target_epc):
def estimate(cls, property_instance, target_epc, total_cost=None):
"""
This function estimates the value of a property based on the current EPC rating and the target EPC rating
:param property_instance: An instance of the Property class
:param target_epc: The target EPC rating
:param total_cost: The total cost of the retrofit
:return:
"""
current_value = (
property_instance.valuation if property_instance.valuation else
cls.UPRN_VALUE_LOOKUP.get(property_instance.uprn)
)
current_epc = property_instance.data["current-energy-rating"]
if not current_value:
return {
"current_value": 0,
"lower_bound_increased_value": 0,
"upper_bound_increased_value": 0,
"average_increased_value": 0,
"average_increase": 0
}
return cls.estimate_valuation_improvement(current_value, current_epc, target_epc, total_cost)
@classmethod
def estimate_valuation_improvement(cls, current_value, current_epc, target_epc, total_cost=None):
"""
This function estimates the value of a property based on the current EPC rating and the target EPC rating
:param current_value:
:param current_epc:
:param target_epc:
:param total_cost:
:return:
"""
if not current_value:
return {
"current_value": 0,
@ -218,7 +248,6 @@ class PropertyValuation:
"average_increase": 0
}
current_epc = property_instance.data["current-energy-rating"]
# We get the spectrum of ratings between the current and target EPC
epc_band_range = cls.EPC_BANDS[cls.EPC_BANDS.index(current_epc): cls.EPC_BANDS.index(target_epc) + 1]
@ -242,6 +271,19 @@ class PropertyValuation:
avg_increase = np.mean(all_increases)
if total_cost is not None:
# We CAP the retrofit ROI at 2
avg_increase_value = current_value * avg_increase
if avg_increase_value / total_cost > 2:
# We re-scale the % so that the average value increase is no more than 2 times the total cost
double_cost = 2 * total_cost
new_avg_increase = double_cost / current_value
scalar = new_avg_increase / avg_increase
# We scale the min and max increases by the same scalar
min_increase *= scalar
max_increase *= scalar
avg_increase = new_avg_increase
return {
"current_value": current_value,
"lower_bound_increased_value": float(current_value * (1 + min_increase)),

View file

@ -39,6 +39,7 @@ class ModelApi:
timestamp,
prediction_buckets,
base_url="https://api.dev.hestia.homes",
max_retries=2,
):
"""
This class handles the communication with the Model APIs. These models include SAP change, heat demain change
@ -54,6 +55,8 @@ class ModelApi:
self.timestamp = timestamp
self.prediction_buckets = prediction_buckets
self.max_retries = max_retries
@staticmethod
def predictions_template():
return {
@ -295,15 +298,33 @@ class ModelApi:
async def run_batches():
for chunk in tqdm(to_loop_over, total=len(to_loop_over)):
predictions_dict = await self.predict_all_async(
df=data.iloc[chunk:chunk + batch_size],
bucket=bucket,
model_prefixes=model_prefixes,
extract_ids=extract_ids
)
for key, scored in predictions_dict.items():
all_predictions[key] = pd.concat([all_predictions[key], scored])
attempts = 0
success = False
while attempts <= self.max_retries and not success:
try:
predictions_dict = await self.predict_all_async(
df=data.iloc[chunk:chunk + batch_size],
bucket=bucket,
model_prefixes=model_prefixes,
extract_ids=extract_ids
)
for key, scored in predictions_dict.items():
all_predictions[key] = pd.concat([all_predictions[key], scored])
success = True
except Exception as e:
attempts += 1
logger.error(
f"Batch {chunk}-{chunk + batch_size} failed (Attempt {attempts}/{self.max_retries}). "
f"Error: {e}"
)
if attempts > self.max_retries:
logger.error(
f"Skipping batch {chunk}-{chunk + batch_size} after {self.max_retries} failed attempts."
)
# Check if there is an existing event loop
try:

View file

@ -29,3 +29,5 @@ mip==1.15.0
pyarrow==17.0.0
fastparquet==2024.5.0
aiohttp==3.10.10
# find my epc
beautifulsoup4

View file

@ -0,0 +1,59 @@
import pytest
import os
from backend.SearchEpc import SearchEpc # Replace with your actual module name
from dotenv import load_dotenv
load_dotenv(dotenv_path="backend/.env")
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
class TestSearchEpcIntegration:
@pytest.mark.parametrize(
"address, postcode, uprn, skip_os, expected_partial_address",
[
# Test case 1: Valid address and postcode, skipping OS
# In this case, the property is an individual flat but the uprn associated to the
# EPC is for the building as a whole, possibly because there was a conversion of sorts
("Garden Flat, 48 Bedminster Parade", "BS3 4HS", 308249, True,
"260907a5431fa073d193cc6bbec51fbf1ba9a61845ab2503f85aa19ce3ed6afd", 1),
# Test case 2: Another valid address and postcode
# In this case, the newest EPC, does not have a uprn associated to it. If we did a search by
# uprn, we would get an old EPC
("Flat 8, Hainton House", "DN32 9AQ", 10090082018, True,
"bd1149a20a73397184f07a9955f872424826e70f4870c058d71be887766ee1f8", 3),
],
)
def test_find_property(self, address, postcode, uprn, skip_os, lmk_key, n_old_epcs):
"""
Integration test for `find_property`, making actual API calls.
"""
# Provide your actual API keys or tokens here
os_api_key = ""
# Initialize the SearchEpc instance
epc_searcher = SearchEpc(
address1=address,
postcode=postcode,
uprn=uprn,
auth_token=EPC_AUTH_TOKEN,
os_api_key=os_api_key,
)
# Execute the method
epc_searcher.find_property(skip_os=skip_os)
# We check that we have the correct epc
assert epc_searcher.newest_epc["lmk-key"] == lmk_key
assert epc_searcher.newest_epc["uprn"] == uprn
assert len(epc_searcher.older_epcs) == n_old_epcs
def test_search_housenumber(self):
eg1 = 'Flat A11, Mortimer House, Grendon Road, Exeter'
res1 = SearchEpc.get_house_number(eg1, None)
assert res1 == "A11"
eg2 = 'Flat A9, Mortimer House, Grendon Road, Exeter, EX1 2NL'
res2 = SearchEpc.get_house_number(eg2, None)
assert res2 == "A9"

440
etl/access_reporting/app.py Normal file
View file

@ -0,0 +1,440 @@
import os
from msal import ConfidentialClientApplication
from datetime import datetime, timedelta
import requests
from functools import wraps
import time
import logging
from io import BytesIO
import pandas as pd
# Configure logging
logger = logging.getLogger(__name__)
if not logger.handlers:
handler = logging.StreamHandler()
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)
logger.setLevel(logging.INFO)
def handle_error(response):
"""
Handle errors based on HTTP status codes and log detailed information.
"""
try:
error_json = response.json().get('error', {})
except ValueError:
error_json = {}
error_code = error_json.get('code', 'unknownError')
error_message = error_json.get('message', 'No detailed error message provided.')
inner_error = error_json.get('innererror', {})
details = error_json.get('details', [])
logger.error(f"Error Code: {error_code}")
logger.error(f"Error Message: {error_message}")
if inner_error:
logger.error(f"Inner Error: {inner_error}")
if details:
logger.error(f"Error Details: {details}")
if response.status_code == 401:
logger.error("Unauthorized. Token might be invalid.")
elif response.status_code == 403:
logger.error("Forbidden. Access denied to the requested resource.")
elif response.status_code == 404:
logger.error("Not Found. The requested resource doesnt exist.")
elif response.status_code == 429:
retry_after = int(response.headers.get('Retry-After', 5)) # Default to 5 seconds if not provided
logger.warning(f"Too Many Requests. Retrying after {retry_after} seconds...")
time.sleep(retry_after)
return 'retry'
elif response.status_code in (500, 503):
retry_after = int(response.headers.get('Retry-After', 5)) # Default to 5 seconds if not provided
logger.error(f"Server error. Retrying after {retry_after} seconds...")
time.sleep(retry_after)
return 'retry'
else:
raise ValueError(f"API request failed with status code {response.status_code} - {error_message}")
raise ValueError(f"API request failed with status code {response.status_code} - {error_message}")
def api_call_decorator(func):
"""
Handles various aspects of the API call, including refreshing the access token if needed and handling pagination.
:param func: The function to be decorated.
:return: The wrapped function.
"""
@wraps(func)
def wrapper(self, *args, **kwargs):
try:
# Check and refresh the access token if needed
if self.is_access_token_expired():
self.retrieve_access_token()
logger.info("Access token refreshed.")
# Get the HTTP method, URL, and optionally data from the function
http_method, url, data = func(self, *args, **kwargs)
# Initialize the results list and handle pagination if page_size is provided
results = []
page_size = kwargs.get('page_size', None)
response_data = {}
n_calls = 0
while url:
logger.info("Making call for page: " + str(n_calls + 1))
n_calls += 1
response = requests.request(http_method, url, headers=self.headers, json=data)
# Handle the response
if response.status_code == 200:
response_json = response.json() # Store the response JSON
if page_size:
results.extend(response_json.get('value', []))
url = response_json.get('@odata.nextLink', None)
logger.info(f"Next page URL: {url}")
else:
response_data = response_json # Capture the full response for consistency
break
else:
retry = handle_error(response)
if retry == 'retry':
continue
if page_size:
response_data = {'value': results}
return response_data
except Exception as e:
logger.exception("An error occurred during the API call.")
raise e
return wrapper
class SharePointClient:
access_token = None
access_token_request_timestamp = None
access_token_expiry = None
headers = None
TIMESTAMP_FORMAT = "%Y-%m-%dT%H:%M:%S.%fZ"
def __init__(self, tenant_id, client_id, client_secret, site_id, access_token=None,
access_token_expiration_details=None):
"""
Initializes the SharePointClient with necessary credentials and site information.
:param tenant_id: The tenant ID.
:param client_id: The client ID.
:param client_secret: The client secret.
:param site_id: The site ID.
:param access_token: The access token (optional)
:param access_token_expiration_details: The access token expiration details (optional)
"""
self.tenant_id = tenant_id
self.client_id = client_id
self.client_secret = client_secret
if access_token:
if not access_token_expiration_details:
raise ValueError("Access token expiration details must be provided.")
self.access_token = access_token
self.set_access_token_expiration_details(access_token_expiration_details)
self.headers = {
'Authorization': f"Bearer {self.access_token['access_token']}"
}
else:
self.retrieve_access_token()
# Retrieve static identifiers
self.site_id = site_id
self.document_drive = self.get_documents_drive()
def get_token_expiration_details(self):
"""
Returns the access token expiration details. Converts the datetime objects to strings for serialization.
:return:
"""
return {
'access_token_request_timestamp': datetime.strftime(
self.access_token_request_timestamp, self.TIMESTAMP_FORMAT
),
'access_token_expiry': datetime.strftime(self.access_token_expiry, self.TIMESTAMP_FORMAT)
}
def set_access_token_expiration_details(self, access_token_expiration_details):
"""
Sets the access token expiration details from a serialized dictionary.
:param access_token_expiration_details: The serialized access token expiration details.
:return:
"""
self.access_token_request_timestamp = datetime.strptime(
access_token_expiration_details['access_token_request_timestamp'], self.TIMESTAMP_FORMAT
)
self.access_token_expiry = datetime.strptime(
access_token_expiration_details['access_token_expiry'], self.TIMESTAMP_FORMAT
)
def is_access_token_expired(self):
"""
Checks if the access token has expired. If it has, a new access token is retrieved.
:return: True if expired, False otherwise.
"""
return datetime.now() >= self.access_token_expiry
def retrieve_access_token(self, refresh=False):
"""
Implements authentication using MSAL.
:param refresh: If True, force a refresh of the access token.
:return: None
"""
app = ConfidentialClientApplication(
self.client_id,
authority=f"https://login.microsoftonline.com/{self.tenant_id}",
client_credential=self.client_secret
)
scope = ["https://graph.microsoft.com/.default"]
access_token_request_timestamp = datetime.now()
if refresh:
logger.info("Forcing refresh of access token.")
token = app.acquire_token_for_client(scopes=scope)
else:
# Check if a token is already cached
token = app.acquire_token_silent(scope, account=None)
if not token:
token = app.acquire_token_for_client(scopes=scope)
if "access_token" not in token:
logger.error("Authentication failed.")
raise ValueError("Authentication failed")
access_token_expiry = access_token_request_timestamp + timedelta(
seconds=token['expires_in'] - 20
)
self.access_token = token
self.access_token_request_timestamp = access_token_request_timestamp
self.access_token_expiry = access_token_expiry
self.headers = {
'Authorization': f"Bearer {self.access_token['access_token']}"
}
logger.info("Access token retrieved successfully.")
@api_call_decorator
def get_documents_drive(self):
"""
Get the document drive of the SharePoint site.
:return: Tuple containing HTTP method, URL, and None for data.
"""
url = f"https://graph.microsoft.com/v1.0/sites/{self.site_id}/drive"
logger.info(f"Getting document drive from URL: {url}")
return 'GET', url, None
@api_call_decorator
def list_folder_contents(self, drive_id, folder_path: str, page_size: int = 100):
"""
This function will list the contents of a folder in SharePoint.
:param drive_id: The ID of the drive.
:param folder_path: The path of the folder.
:param page_size: The number of items per page (default is 100).
:return: Tuple containing HTTP method, URL, and None for data.
"""
url = f"https://graph.microsoft.com/v1.0/drives/{drive_id}/root:/{folder_path}:/children?$top={page_size}"
logger.info(f"Listing folder contents from URL: {url}")
return 'GET', url, None
@staticmethod
def download_sharepoint_file(download_url):
"""
Downloads a file from the given URL and returns its content.
:param download_url: The URL to download the file from.
:return: The content of the downloaded file.
"""
response = requests.get(download_url, stream=True)
response.raise_for_status() # Check if the request was successful
file_content = BytesIO()
# Read the file content into memory
for chunk in response.iter_content(chunk_size=8192):
file_content.write(chunk)
file_content.seek(0) # Reset the file pointer to the beginning
return file_content
def download_sharepoint_folder(self, drive_id, folder_path, download_dir, excluded_file_types=None):
"""
Downloads all files in a SharePoint folder to the specified local directory.
:param drive_id: The ID of the SharePoint drive.
:param folder_path: The path of the folder in SharePoint.
:param download_dir: The local directory to save the downloaded files.
:param excluded_file_types: A list of file types to exclude from download (default is None).
"""
excluded_file_types = [] if excluded_file_types is None else excluded_file_types
# Ensure the download directory exists
os.makedirs(download_dir, exist_ok=True)
# List folder contents
folder_contents = self.list_folder_contents(drive_id, folder_path)
files = folder_contents.get('value', [])
for item in files:
if item.get('folder'): # Check if it's a folder
# Recursively handle subfolders
subfolder_path = f"{folder_path}/{item['name']}"
subfolder_dir = os.path.join(download_dir, item['name'])
self.download_sharepoint_folder(drive_id, subfolder_path, subfolder_dir)
else:
# It's a file, download it
file_name = item['name']
if file_name.split(".")[-1] in excluded_file_types:
continue
download_url = item['@microsoft.graph.downloadUrl']
logger.info(f"Downloading file: {file_name}")
file_content = self.download_sharepoint_file(download_url)
# Save the file locally
file_path = os.path.join(download_dir, file_name)
with open(file_path, 'wb') as f:
f.write(file_content.read())
logger.info(f"File saved to: {file_path}")
def app():
# Customers for WC 18/11/2024
#
# ----- Eastlight location -----
# No data this week, low on data
# Housing Associations/Eastlight/Survey Outcomes/
#
# ----- Settle location -----
# No data this week, in separate files
# Housing Associations/Settle/Survey Outcomes/
#
# ----- Community Housing -----
# In separate files - will we get to a singular form?
# Housing Associations/Community Housing/Survey Outcomes/
#
# ----- ACIS location -----
# Doesn't have this week's data
# Housing Asociation/ACIS/Survey Outcomes/ACIS Group - 25.11.2024 - USE THIS.xlsx
#
# ----- Southern location -----
#
#
# ------ Unitas location ------
# Does have this week's data
# Unitas location: Housing Associations/Unitas/Survey Outcomes/Unitas.xlsx
locations = {
"Unitas": "Housing Associations/Unitas/Survey Outcomes/Unitas.xlsx",
"Eastlight": "Housing Associations/Eastlight/Survey Outcomes/",
"Settle": "Housing Associations/Settle/Survey Outcomes/",
"Community Housing": "Housing Associations/Community Housing/Survey Outcomes/",
"ACIS": "Housing Asociation/ACIS/Survey Outcomes/ACIS Group - 25.11.2024 - USE THIS.xlsx",
"Southern": None,
}
SHAREPOINT_CLIENT_ID = os.getenv("SHAREPOINT_CLIENT_ID", None)
SHAREPOINT_CLIENT_SECRET = os.getenv("SHAREPOINT_CLIENT_SECRET", None)
SHAREPOINT_TENANT_ID = os.getenv("SHAREPOINT_TENANT_ID", None)
WARMFRONT_SHAREPOINT_SITE_ID = os.getenv("WARMFRONT_SHAREPOINT_SITE_ID", None)
sharepoint_client = SharePointClient(
tenant_id=SHAREPOINT_TENANT_ID,
client_id=SHAREPOINT_CLIENT_ID,
client_secret=SHAREPOINT_CLIENT_SECRET,
site_id=WARMFRONT_SHAREPOINT_SITE_ID
)
results = []
for customer, location in locations.items():
if location is None:
continue
if location.endswith(".xlsx"):
# Read in the file
# List the contents of the folder
location_folder = os.path.dirname(location)
contents = sharepoint_client.list_folder_contents(
drive_id=sharepoint_client.document_drive["id"],
folder_path=location_folder
)
filepaths = contents["value"]
download_url = next(
(file['@microsoft.graph.downloadUrl'] for file in filepaths
if '@microsoft.graph.downloadUrl' in file and file['name'] == os.path.basename(location)),
None
)
if download_url is None:
raise ValueError("File not found in the SharePoint folder.")
file_content = sharepoint_client.download_sharepoint_file(download_url)
# Convert to pandas dataframe since file is an excel file
df = pd.read_excel(file_content)
df["Outcome"] = df["Outcome"].str.strip().str.lower()
# We cannot group by funding type accurately because any job that is not funded will have a NaN value
# and therefore we have a 100% acces rate for funded jobs and 0% otherwise
surveyor_outcomes = []
for (week, surveyor, funding), group in df.groupby(["Week Commencing", "DEA/REA"]):
funding_type = [x for x in group["Funding Type"].unique() if not pd.isnull(x)]
if funding_type:
funding_type = " + ".join(funding_type)
else:
funding_type = "No Funding"
surveyed = group[group["Outcome"] == "surveyed"]
no_answer = group[
group["Outcome"] == "no answer"
]
other_issue = group[~group["Outcome"].isin(["surveyed", "no answer"])]
surveyor_outcomes.append(
{
"Surveyor": surveyor,
"Week": week,
"Funding": funding_type,
"Surveyed": surveyed.shape[0],
"No Answer": no_answer.shape[0],
"Other Issue": other_issue.shape[0],
}
)
surveyor_outcomes = pd.DataFrame(surveyor_outcomes)
surveyor_outcomes["Week"] = pd.to_datetime(surveyor_outcomes["Week"])
weekly_access = (
surveyor_outcomes.drop(columns=["Surveyor"]).groupby(["Week", "Funding"]).sum().reset_index()
)
# Sort by week and surveyor ascending
surveyor_outcomes = surveyor_outcomes.sort_values(["Week", "Surveyor"], ascending=[True, True])
surveyor_outcomes["Access Rate"] = 100 * surveyor_outcomes["Surveyed"] / (
surveyor_outcomes["Surveyed"] + surveyor_outcomes["No Answer"] + surveyor_outcomes["Other Issue"]
)
weekly_access["Total"] = (
weekly_access["Surveyed"] + weekly_access["No Answer"] + weekly_access["Other Issue"]
)
weekly_access["Access Rate"] = 100 * weekly_access["Surveyed"] / (
weekly_access["Surveyed"] + weekly_access["No Answer"] + weekly_access["Other Issue"]
)

View file

@ -0,0 +1,11 @@
python-docx==0.8.11
PyPDF2==3.0.1
boto3
requests
pandas
pyarrow==12.0.1
openpyxl==3.1.2
usaddress==0.5.10
pdfplumber==0.10.3
msgpack==1.0.5
msal

View file

@ -11,7 +11,7 @@ import inspect
src_file_path = inspect.getfile(lambda: None)
DATA_DIRECTORY = Path(src_file_path).parent / "local_data" / "20240917 Hestia Materials.xlsx"
DATA_DIRECTORY = Path(src_file_path).parent / "local_data" / "20250316 Domna Materials.xlsx"
# Environment file is at the same level as this file
ENV_FILE = Path(src_file_path).parent / "etl" / "costs" / ".env"
dotenv.load_dotenv(ENV_FILE)
@ -91,6 +91,7 @@ def app():
lel_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="low_energy_lighting", header=0)
flat_roof_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="flat_roof_insulation", header=0)
window_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="window_glazing", header=0)
rir_insulation_costs = pd.read_excel(DATA_DIRECTORY, sheet_name="room_roof_insulation", header=0)
# Form a single table to be uploaded
costs = pd.concat(
@ -104,7 +105,8 @@ def app():
ewi_costs,
lel_costs,
flat_roof_costs,
window_costs
window_costs,
rir_insulation_costs,
]
)

View file

@ -0,0 +1,106 @@
"""
This is an adhoc script, used to pull together some of the figures that are being included in the
Warm Homes: Social Housing Wave 3 funding application
"""
import pandas as pd
import numpy as np
aiha_all_units = pd.read_excel(
"/Users/khalimconn-kowlessar/Downloads/AIHA Measures Packages 2024_11_13.xlsx",
sheet_name="All Properties - AIHA",
header=2
)
modelled_units = pd.read_excel(
"/Users/khalimconn-kowlessar/Downloads/AIHA Measures Packages 2024_11_13.xlsx",
sheet_name="Modelled Properties - Measures",
header=5
)
aiha_all_units = aiha_all_units.drop(columns=['Unnamed: 0', 'Unnamed: 1'])
aiha_extracted_property_data = pd.read_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/extracted_property_data.csv"
)
aiha_wave_3_units = aiha_all_units[aiha_all_units["Expected Package Cost"].astype(float) > 0]
# TODO: The EPC C property isn't a C!
aiha_epc_breakdown = aiha_wave_3_units["Expected EPC Rating"].replace({"D or E": "E"}).value_counts()
# For CAHA
caha_epc_breakdown = modelled_units[
modelled_units['Survey Key'].str.contains("CAHA")
]['Current EPC Rating'].value_counts()
# For Hornsey
hornsey_epc_breakdown = modelled_units[
modelled_units['Survey Key'].str.contains("HORNSEY")
]['Current EPC Rating'].value_counts()
aiha_original_asset_data = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/240924- KSQ & Domna Info Merge - AIHA - SHDF Wave 3 "
"bid - Supplementary information.xlsx",
sheet_name="Archetyping Data",
header=2
)
# Get the units in the bid:
aiha_wave_3_features = aiha_original_asset_data[
['Address letter or number', 'Street address', 'Postcode', "Wall type",
"Property type", "built-form", "floor"]
].merge(
aiha_wave_3_units[['Address letter or number', 'Street address', 'Postcode']],
how="inner",
on=["Address letter or number", "Street address", "Postcode"]
)
wall_type_breakdown = aiha_wave_3_features["Wall type"].value_counts()
property_type_breakdown = aiha_wave_3_features.groupby(["Property type", "floor"]).size().reset_index()
aiha_wave_3_features[aiha_wave_3_features["Property type"] == "Flat"][["Street address", "Postcode"]]
# 4 Yetev Lev Court  ... Semi-Detached mid - Medium
# B 86 Bethune Road ... Mid-Terrace top. - Low
# A 80 Bethune Road ... Mid-Terrace ground. - Low
# B 80 Bethune Road ... \n \n - Low
# A 9 Clapton Common ... Semi-Detached ground. - Low
# C 9 Clapton Common ... End-Terrace \n. - Low
# B 89 Manor Road ... \n \n. - Low
# A 6 Northfield Road ... Detached top. - Low
# 13 Northfield Rd ... Semi-Detached \n - Low
# A 73 Manor Road ... End-Terrace \n - Low
# B 73 Manor Road ... Detached top - Low
# Hornsey data - contained in original asset list
hornsey_asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/SHDF - Template - EOI - Hornsey Housing "
"Trust.xlsx",
sheet_name="Ksquared-All units information",
header=3
)
# We don't need the first row
hornsey_asset_list = hornsey_asset_list.iloc[1:]
# Fill NA values with empty strings
hornsey_asset_list = hornsey_asset_list.fillna("")
hornsey_asset_list["Address letter or number"] = hornsey_asset_list["Address letter or number"].astype(
str
).str.strip()
hornsey_asset_list["Postcode"] = hornsey_asset_list["Postcode"].astype(str).str.strip()
hornsey_asset_list["Street address"] = hornsey_asset_list["Street address"].astype(str).str.strip()
# Replace double spaces
for col in ["Address letter or number", "Street address", "Postcode"]:
hornsey_asset_list[col] = hornsey_asset_list[col].str.replace(" ", " ")
hornsey_asset_list = hornsey_asset_list[hornsey_asset_list["Address letter or number"] != ""]
hornsey_asset_list["Wall Type Cleaned"] = np.where(
hornsey_asset_list["Wall type"].str.contains("Cavity"),
"Cavity",
"Solid"
)
hornsey_asset_list["Property type"].value_counts()
# CAHA
caha_epc_data = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/caha_extracted_property_data.xlsx"
)
caha_epc_data[caha_epc_data["address"] != "33 Woodhouse Road"]["property_type"].value_counts()
caha_epc_data[caha_epc_data["address"] != "33 Woodhouse Road"]["wall_type"].value_counts()

View file

@ -0,0 +1,988 @@
import os
from io import BytesIO
import pandas as pd
from etl.xml_survey_extraction.XmlParser import XmlParser
SURVEY_FOLDER_PATH = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/RESIDENT SURVEYS"
CONTINGENCY_RATE = 0.26
def sap_to_epc(sap_points: int | float):
"""
Simple utility function to convert SAP points to EPC rating.
:param sap_points: numerical value of SAP points, typically between 0 and 100
:return:
"""
if sap_points <= 0:
raise ValueError("SAP points should be above 0.")
if sap_points >= 92:
return "A"
elif sap_points >= 81:
return "B"
elif sap_points >= 69:
return "C"
elif sap_points >= 55:
return "D"
elif sap_points >= 39:
return "E"
elif sap_points >= 21:
return "F"
else:
return "G"
def main():
"""
This script handles the extraction of data from the XML files in the survey folders.
:return:
"""
# Step 1: List all subfolders inside SURVEY_FOLDER_PATH.
subfolders = [f.path for f in os.scandir(SURVEY_FOLDER_PATH) if f.is_dir()]
# Step 2: Loop through each subfolder and find the XML files.
extracted_surveys = []
for subfolder in subfolders:
print(f"Searching in subfolder: {subfolder}")
# Find all XML files in the current subfolder.
xml_files = [f for f in os.listdir(subfolder) if f.endswith('.xml')]
if not xml_files:
print(f"No XML files found in subfolder: {subfolder}")
continue
# If any XML files are found, perform the data extraction. We use the subfolder name as the survey key.
for xml_file in xml_files:
xml_path = os.path.join(subfolder, xml_file)
print(f"Processing XML file: {xml_path}")
# Read in the XML and parse it using the XmlParser class.
with open(xml_path, 'rb') as file:
xml_data_io = BytesIO(file.read())
uprn = None # Set the UPRN if available.
# Create an XmlParser instance
xml_parser = XmlParser(
file=xml_data_io,
filekey=xml_path,
surveyor_company="",
uprn=uprn,
)
# Run the parser to extract the data
xml_parser.run()
if not xml_parser.epc:
# If we don't have a lig xml
continue
# Store the extracted data for further processing
extracted_surveys.append({
"survey_key": subfolder.split("/")[-1],
**xml_parser.epc,
**xml_parser.additional_data
})
print(f"Extracted {len(extracted_surveys)} surveys.")
# Process the extracted_surveys as needed, for example, save to a database or write to a file.
extracted_surveys = pd.DataFrame(extracted_surveys)
# THis is the data we need for the AIHA project
measures_data = extracted_surveys[
["survey_key", "address", "postcode", "current-energy-efficiency", "current-energy-rating",
"number_of_floors", "walls-description", "property-type", "built-form"]
]
measures_data = measures_data.sort_values("survey_key", ascending=True)
measures_data.to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/extracted_property_data.csv",
)
# Note:
# The properties will still have "Very poor" ratings for their hot water
# TODO
# - AIH001-03 has a loft that is inaccessible - ask Chenai about why this property didn't have access to the loft
# [Can't remember, not clear - Chenai will check]
# - AIH001-08 and AIH001-09, check if it's freehold - could solar work as both of these units are part of the same
# buulding [Question for Lewis & Kevin]
# - AIH001-09 - Is it not possible to install a loft hatch? [IT IS NOT, NO ACCESS - would need to accessed from
# the other unit]
# - AIH001-09 - Is there definitely an immersion water heater? Is this definitely the case for the other units?
# [Question for Lewis & Kevin] - [YES - ASHP!!!!]
# TODO: Check which properties are in a conservation area
# TODO: AIH001-16 - Is the loft insulation suitable (already has 100mm in the RIR)
# TODO: Adjust Archetype 14 homes to exclude double glazing? Or should we exclude entirely
recommended_measures = [
{
"survey_key": "AIH001-01",
"starting_sap": 69,
"recommended_measures": [],
"notes": "Is EPC C"
},
{
"survey_key": "AIH001-02",
"starting_sap": 65,
"recommended_measures": [
{
"measure": "Solar PV",
"description": "2.4kWp Solar PV system",
"config": [
{
"size": "2.4W",
"orientation": "Horizontal",
"elavation": 30,
"overshading": "None or little",
}
],
"sap_points": 7,
"ending_sap": 72,
"notes": "The array can be mounted on the flat roof, so that panels are south facing"
},
{
"measure": "TTZC",
"description": "Smart Thermostat",
"sap_points": 4,
"ending_sap": 76
}
],
},
{
"survey_key": "AIH001-03",
"starting_sap": 43,
"recommended_measures": [
{
"measure": "Cylinder Insulation",
"description": "80mm cylinder insulation",
"sap_points": 1,
"ending_sap": 44,
},
{
"measure": "Solar PV",
"description": "4kWp Solar PV system",
"config": [
{
"size": "4kWp",
"orientation": "East",
"elavation": 30,
"overshading": "None or little",
},
],
"sap_points": 10,
"ending_sap": 54
},
{
"measure": "Air Source Heat Pump",
"description": "Ecoforest ecoAIR EVI 4-20 20kW air source heat pump (+TTZC)",
"sap_points": 20,
"ending_sap": 74
},
{
"measure": "Tariff Review",
"description": "Switch to 24-hour tariff",
"sap_points": 15,
"ending_sap": 89
}
],
"notes": "Unclear if the loft is accessible"
},
{
"survey_key": "AIH001-04",
"starting_sap": 48,
"recommended_measures": [
{
"measure": "Flat Roof Insulation",
"description": "100mm flat roof insulation",
"floor_area": 39.1482, # based on area of top floor
"sap_points": 4,
"ending_sap": 52
},
{
"measure": "TTZC",
"description": "Smart Thermostat",
"sap_points": 3,
"ending_sap": 55
},
{
"measure": "Solar PV",
"description": "4kWp Solar PV system",
"config": [
{
"size": "4kWp",
"orientation": "South",
"elavation": 30,
"overshading": "None or little",
}
],
"sap_points": 15,
"ending_sap": 70
}
],
"notes": "Roof is flat, PV array should be installed south facing with elevation"
},
{
"survey_key": "AIH001-05",
"starting_sap": 54,
"recommended_measures": [
{
"measure": "Flat Roof Insulation",
"description": "100mm flat roof insulation",
"floor_area": 49.48, # based on area of top floor
"sap_points": 5,
"ending_sap": 59,
},
{
"measure": "Cylinder Insulation",
"description": "80mm cylinder insulation",
"sap_points": 2,
"ending_sap": 61,
},
{
"measure": "Solar PV",
"description": "4kWp Solar PV system",
"config": [
{
"size": "4kW",
"orientation": "Horizontal",
"elavation": 30,
"overshading": "Modest",
}
],
"sap_points": 9,
"ending_sap": 70
},
{
"measure": "TTZC",
"description": "Smart Thermostat",
"sap_points": 3,
"ending_sap": 73
}
],
"notes": ""
},
{
"survey_key": "AIH001-06",
"starting_sap": 62,
"recommended_measures": [
{
"measure": "Cylinder Insulation",
"description": "80mm cylinder insulation",
"sap_points": 2,
"ending_sap": 64,
},
{
"measure": "Solar PV",
"description": "2kWp Solar PV system",
"config": [
{
"size": "2kW",
"orientation": "South",
"elavation": 30,
"overshading": "Modest",
}
],
"sap_points": 6,
"ending_sap": 70
}
]
},
{
"survey_key": "AIH001-07",
"starting_sap": 74,
"recommended_measures": [],
"notes": "Is EPC C"
},
{
"survey_key": "AIH001-08",
"starting_sap": 56,
"recommended_measures": [
{
"measure": "Loft Insulation",
"description": "300mm loft insulation",
"floor_area": 54.2864, # Based on area of top floor
"sap_points": 2,
"ending_sap": 58,
},
{
"measure": "Cylinder Insulation",
"description": "80mm cylinder insulation",
"sap_points": 4,
"ending_sap": 62,
},
{
"measure": "Internal Wall Insulation",
"description": "100mm internal wall insulation",
"hlp": 24.13 * 2.63,
"sap_points": 7,
"ending_sap": 69,
},
{
"measure": "Ventilation",
"description": "2x DMEV fans",
"sap_points": 0,
"ending_sap": 69,
}
]
},
{
"survey_key": "AIH001-09",
"starting_sap": 44,
"recommended_measures": [
{
"measure": "Internal Wall Insulation",
"description": "100mm internal wall insulation",
"hlp": (22.35 * 3.24) + (22.13 * 2.53),
"sap_points": 8,
"ending_sap": 52,
},
{
"measure": "Cavity Wall Insulation",
"description": "CWI to rdSAP default standard",
"hlp": (2.68 * 2.39) + (5.93 * 2.63) + (6.13 * 2.39), # 1st & 2nd extension
"sap_points": 1,
"ending_sap": 53,
},
{
"measure": "Ventilation",
"description": "2x DMEV fans",
"sap_points": 0,
"ending_sap": 53,
},
{
"measure": "TTZC",
"description": "Smart Thermostat",
"sap_points": 3,
"ending_sap": 56,
},
{
"measure": "Solar PV",
"description": "1.6kWp Solar PV system",
"config": [
{
"size": "1.6W",
"orientation": "South-East",
"elavation": 30,
"overshading": "None or little",
}
],
"sap_points": 6,
"ending_sap": 62
},
{
"measure": "Loft Insulation",
"description": "300mm loft insulation",
"floor_area": 63.59 + 12.31, # Based on area of main building and 1st extension
"sap_points": 8,
"ending_sap": 70,
"notes": "Loft is inaccessible from this unit - would need to be accessed from the other unit, "
"which is also owned by AIHA"
}
],
"notes": "This property is a house split into 2 flats. We can install a PV array for both units (one array"
"per unit). Area on south-east part of roof is ~22m2 with no overshadowing. Flat roof area is 8m2"
"with modest overshadowing. We suggest a 3.2kWp system, across two units"
},
{
"survey_key": "AIH001-11",
"starting_sap": 59,
"recommended_measures": [
{
"measure": "TTZC",
"description": "Smart Thermostat",
"sap_points": 4,
"ending_sap": 63,
},
{
"measure": "Internal Wall Insulation",
"description": "100mm internal wall insulation",
"hlp": (18.50 * 3.12) + (19.00 * 2.75),
"sap_points": 5,
"ending_sap": 68,
},
{
"measure": "Cylinder Insulation",
"description": "80mm cylinder insulation",
"sap_points": 1,
"ending_sap": 69,
}
]
},
{
"survey_key": "AIH001-12",
"starting_sap": 46,
"recommended_measures": [
{
"measure": "Double Glazing",
"description": "Installation of double glazing",
"n_windows": 20, # Counted the bay windows each as 3
"windows_area": 10.66,
"sap_points": 3,
"ending_sap": 49,
},
# {
# "measure": "Solar PV",
# "description": "3.2kWp Solar PV system",
# "config": [
# {
# "size": "3.2W",
# "orientation": "East",
# "elavation": 30,
# "overshading": "Little or none",
# }
# ],
# "sap_points": 9,
# "ending_sap": 58
# },
{
"measure": "Air Source Heat Pump",
"description": "Ecoforest ecoAIR EVI 4-20 20kW air source heat pump (+TTZC)",
"sap_points": 15,
"ending_sap": 65
},
{
"measure": "Tariff Review",
"description": "Switch to 24-hour tariff",
"sap_points": 15,
"ending_sap": 80
}
]
},
{
"survey_key": "AIH001-13",
"starting_sap": 53,
"recommended_measures": [
{
"measure": "Roof Insulation",
"description": "100mm+ RIR insulation on all surfaces (ceiling u=0.16, walls u=0.3)",
"floor_area": 39.75, # based on the floor area of the RIR
"sap_points": 6,
"ending_sap": 59,
},
{
"measure": "Flat Roof Insulation",
"description": "100mm flat roof insulation",
"floor_area": 33.06, # Based on area of the extension
"sap_points": 2,
"ending_sap": 61,
},
{
"measure": "Cavity Wall Insulation",
"description": "CWI to rdSAP default standard",
"hlp": (35.40 * 2.65) + (26.70 * 2.73) + (16.30 * 2.71), # 1st & 2nd extension
"sap_points": 6,
"ending_sap": 67,
},
{
"measure": "Ventilation",
"description": "2x DMEV fans",
"sap_points": 0,
"ending_sap": 67,
},
{
"measure": "TTZC",
"description": "Smart Thermostat",
"sap_points": 2,
"ending_sap": 69,
},
{
"measure": "Solar PV",
"description": "4kWp Solar PV system",
"config": [
{
"size": "4kW",
"orientation": "Horizontal",
"elavation": 30,
"overshading": "None or little",
}
],
"sap_points": 9,
"ending_sap": 78
}
]
},
{
"survey_key": "AIH001-14",
"starting_sap": 63,
"recommended_measures": [
{
"measure": "Cavity Wall Insulation",
"description": "CWI to rdSAP default standard",
"hlp": (11.00 * 2.6) + (11.00 * 2.65) + (4.60 * 2.7),
"sap_points": 5,
"ending_sap": 68,
},
{
"measure": "Ventilation",
"description": "2x DMEV fans",
"sap_points": 0,
"ending_sap": 68,
},
{
"measure": "Loft Insulation",
"description": "300mm loft insulation", # Based on area of main building
"floor_area": 59.20,
"sap_points": 1,
"ending_sap": 69,
},
{
"measure": "Solar PV",
"description": "3.2kWp Solar PV system",
"sap_points": 10,
"ending_sap": 79,
}
]
},
{
"survey_key": "AIH001-15",
"starting_sap": 60,
"recommended_measures": [
{
"measure": "Loft Insulation",
"description": "300mm loft insulation",
"floor_area": 73.81, # Based on area of main building
"sap_points": 1,
"ending_sap": 61,
},
{
"measure": "TTZC",
"description": "Smart Thermostat",
"sap_points": 3,
"ending_sap": 64,
},
{
"measure": "Solar PV",
"description": "3.2kWp Solar PV system",
"config": [
{
"size": "3.2W",
"orientation": "North-West",
"elavation": 30,
"overshading": "None or little",
}
],
"sap_points": 7,
"ending_sap": 71,
"notes": "The array is North-west facing and therefore will be slightly less efficient than south"
"facing, however the impact is not so severe as to make the installation not worthwhile."
"Ground mounted"
}
]
},
{
"survey_key": "AIH001-16",
"starting_sap": 60,
"recommended_measures": [
{
"measure": "Cavity Wall Insulation",
"description": "CWI to rdSAP default standard",
"hlp": (21.56 * 2.60) + (26.79 * 2.8) + (6.74 * 2.60),
"sap_points": 4,
"ending_sap": 64,
},
{
"measure": "Ventilation",
"description": "2x DMEV fans",
"sap_points": 0,
"ending_sap": 64,
},
{
"measure": "Loft Insulation",
"description": "300mm loft insulation",
"floor_area": 20.92, # Based on floor area of RIR
"sap_points": 1,
"ending_sap": 65,
},
{
"measure": "Solar PV",
"description": "2.4kWp Solar PV system",
"config": [
{
"size": "2.4W",
"orientation": "South-East",
"elavation": 30,
"overshading": "Modest",
}
],
"sap_points": 5,
"ending_sap": 70,
}
]
},
{
"survey_key": "AIH001-17",
"starting_sap": 62,
"recommended_measures": [
{
"measure": "Cylinder Insulation",
"description": "80mm cylinder insulation",
"sap_points": 1,
"ending_sap": 63,
},
{
"measure": "TTZC",
"description": "Smart Thermostat",
"sap_points": 3,
"ending_sap": 66,
},
{
"measure": "Solar PV",
"description": "4kWp Solar PV system",
"config": [
{
"size": "3.2kW",
"orientation": "East",
"elavation": 30,
"overshading": "None or little",
},
{
"size": "0.8kW",
"orientation": "West",
"elavation": 30,
"overshading": "None or little",
}
],
"sap_points": 12,
"ending_sap": 78,
}
]
},
{
"survey_key": "AIH001-18",
"starting_sap": 58,
"recommended_measures": [
{
"measure": "Loft Insulation",
"description": "300mm loft insulation",
"floor_area": 37.52, # Based on area of main building and 1st extension
"sap_points": 7,
"ending_sap": 65,
},
{
"measure": "Cylinder Insulation",
"description": "80mm cylinder insulation",
"sap_points": 1,
"ending_sap": 66,
},
{
"measure": "TTZC",
"description": "Smart Thermostat",
"sap_points": 2,
"ending_sap": 68,
},
{
"measure": "Solar PV",
"description": "3.2kWp Solar PV system",
"config": [
{
"size": "3.2W",
"orientation": "North-East",
"elavation": 30,
"overshading": "None or little",
}
],
"sap_points": 7,
"ending_sap": 75,
}
],
},
{
"survey_key": "AIH001-19",
"starting_sap": 76,
"recommended_measures": []
},
{
"survey_key": "AIH001-20",
"starting_sap": 82,
"recommended_measures": []
},
{
"survey_key": "AIH001-21",
"starting_sap": 53,
"recommended_measures": [
{
"measure": "Cylinder Insulation",
"description": "80mm cylinder insulation",
"sap_points": 2,
"ending_sap": 55,
},
{
"measure": "Roof Insulation",
"description": "100mm+ RIR insulation on all surfaces (ceiling u=0.16, walls u=0.3)",
"floor_area": 22.80, # Based on floor area of RIR
"sap_points": 7,
"ending_sap": 62,
},
{
"measure": "Solar PV",
"description": "2.4kWp Solar PV system",
"config": [
{
"size": "1.6kWp",
"orientation": "Horizontal",
"elavation": 30,
"overshading": "None or little",
},
{
"size": "0.8kWp",
"orientation": "South-East",
"elavation": 30,
"overshading": "None or little",
}
],
"sap_points": 9,
"ending_sap": 71,
},
{
"measure": "TTZC",
"description": "Smart Thermostat",
"sap_points": 3,
"ending_sap": 74,
}
]
},
{
"survey_key": "AIH001-SIMULATED-01",
"elmhurst_reference": "000020",
"starting_sap": None,
"recommended_measures": [
{
"measure": "Internal Wall Insulation",
"description": "100mm internal wall insulation",
"hlp": (22.35 * 3.24) + (22.13 * 2.53),
"sap_points": 8,
"ending_sap": 52,
},
{
"measure": "Cavity Wall Insulation",
"description": "CWI to rdSAP default standard",
"hlp": (2.68 * 2.39) + (5.93 * 2.63) + (6.13 * 2.39), # 1st & 2nd extension
"sap_points": 1,
"ending_sap": 53,
},
{
"measure": "Ventilation",
"description": "2x DMEV fans",
"sap_points": 0,
"ending_sap": 53,
},
{
"measure": "TTZC",
"description": "Smart Thermostat",
"sap_points": 3,
"ending_sap": 56,
},
{
"measure": "Solar PV",
"description": "1.6kWp Solar PV system",
"config": [
{
"size": "1.6W",
"orientation": "South-East",
"elavation": 30,
"overshading": "None or little",
}
],
"sap_points": 6,
"ending_sap": 62
},
{
"measure": "Loft Insulation",
"description": "300mm loft insulation",
"floor_area": 63.59 + 12.31, # Based on area of main building and 1st extension
"sap_points": 8,
"ending_sap": 70,
"notes": "Loft is inaccessible from this unit - would need to be accessed from the other unit, "
"which is also owned by AIHA"
}
],
"notes": "This was cloned from 80A. There is no existing data for 80B"
},
{
"survey_key": "AIH001-SIMULATED-05",
"starting_sap": 68,
"recommended_measures": [
{
"measure": "Loft Insulation",
"description": "300mm loft insulation",
"floor_area": 42.5,
"sap_points": 1,
"ending_sap": 69,
},
{
"measure": "Solar PV",
"description": "3.2kWp Solar PV system",
"config": [
{
"size": "3.2W",
"orientation": "North-East",
"elavation": 30,
"overshading": "None or little",
}
],
"sap_points": 8,
"ending_sap": 77,
}
]
}
]
scaffolding_data = [
{
"number_of_floors": 2,
"price": 841,
},
{
"number_of_floors": 3,
"price": 1077,
}
]
# TODO - Need an update cost for cylinder insulation
pricing_data = [
{'item': '80mm cylinder insulation', 'unit_price': 50, 'unit': 'unit'},
{'item': '100mm internal wall insulation', 'unit_price': 244.8, 'unit': 'hlp_m2'},
{'item': 'CWI to rdSAP default standard', 'unit_price': 14.21, 'unit': 'hlp_m2'},
{'item': 'Window draught proofing improvements', 'unit_price': 63, 'unit': 'window'},
{'item': '100mm flat roof insulation', 'unit_price': 195, 'unit': 'floor_m2'},
{'item': 'Switch to 24-hour tariff', 'unit_price': 0, 'unit': None},
{'item': 'Installation of double glazing', 'unit_price': 1074, 'unit': 'window'},
{'item': 'Ecoforest ecoAIR EVI 4-20 20kW air source heat pump (+TTZC)', 'unit_price': 21189 + 1200,
'unit': 'unit'},
{'item': '100mm+ RIR insulation on all surfaces (ceiling u=0.16, walls u=0.3)', 'unit_price': 244.80,
'unit': 'floor_m2'},
{'item': '300mm loft insulation', 'unit_price': 16.07, 'unit': 'floor_m2'},
{'item': 'Smart Thermostat', 'unit_price': 1200, 'unit': 'unit'},
{'item': '2x DMEV fans', 'unit_price': 1070, 'unit': 'unit'},
{'item': '1.6kWp Solar PV system', 'unit_price': 3040, 'unit': 'unit_needs_scaffolding'},
{'item': '2kWp Solar PV system', 'unit_price': 3201, 'unit': 'unit_needs_scaffolding'},
{'item': '2.4kWp Solar PV system', 'unit_price': 3363, 'unit': 'unit_needs_scaffolding'},
{'item': '3.2kWp Solar PV system', 'unit_price': 3686, 'unit': 'unit_needs_scaffolding'},
{'item': '4kWp Solar PV system', 'unit_price': 4009, 'unit': 'unit_needs_scaffolding'},
{'item': '5.6kWp Solar PV system', 'unit_price': 5015, 'unit': 'unit_needs_scaffolding'},
]
pricing_data = pd.DataFrame(pricing_data)
for recommendation in recommended_measures:
property_data = measures_data[measures_data["survey_key"] == recommendation["survey_key"]].squeeze()
total_cost = 0
for measure in recommendation["recommended_measures"]:
measure_pricing = pricing_data[pricing_data["item"] == measure["description"]]
measure_unit = measure_pricing["unit"].values[0]
if measure_unit in ["unit", None]:
measure_cost = float(measure_pricing["unit_price"].values[0])
elif measure_unit == "unit_needs_scaffolding":
n_floors = property_data["number_of_floors"]
scaffolding_cost = [x for x in scaffolding_data if x["number_of_floors"] == n_floors][0]["price"]
measure_cost = float(measure_pricing["unit_price"].values[0]) + scaffolding_cost
elif measure_unit == "floor_m2":
measure_cost = float(measure_pricing["unit_price"].values[0]) * measure["floor_area"]
elif measure_unit == "hlp_m2":
measure_cost = float(measure_pricing["unit_price"].values[0]) * measure["hlp"]
elif measure_unit == "window":
measure_cost = float(measure_pricing["unit_price"].values[0]) * measure["n_windows"]
else:
raise Exception("Unknown unit type")
measure["Total Cost"] = measure_cost
total_cost += measure_cost
recommendation["total_cost"] = total_cost
# Step 1: Normalize the recommended_measures data into a DataFrame.
normalized_measures = []
for survey in recommended_measures:
survey_key = survey["survey_key"]
starting_sap = survey["starting_sap"]
total_cost = survey.get("total_cost", 0)
for measure in survey.get("recommended_measures", []):
# Include hlp and floor_area for each measure if available
hlp = measure.get("hlp", None)
floor_area = measure.get("floor_area", None)
normalized_measures.append({
"survey_key": survey_key,
"hlp": hlp,
"floor_area": floor_area,
"starting_sap": starting_sap,
"measure": measure["measure"],
"description": measure.get("description", ""),
"sap_points": measure.get("sap_points", 0),
"measure_cost": measure.get("Total Cost", 0),
"total_cost": total_cost
})
# Convert the normalized list into a DataFrame.
measures_df = pd.DataFrame(normalized_measures)
# Step 2: Pivot the measures_df to have a column for each measure type, using the description as values.
pivoted_measures = measures_df.pivot_table(
index="survey_key",
columns="measure",
values="description",
aggfunc=lambda x: ' '.join(x), # Concatenate descriptions if there are multiple entries.
fill_value=None
).reset_index()
measures_columns = [x for x in pivoted_measures.columns if x not in ["survey_key"]]
# We add a "Cost of" column for each measure
for measure in measures_columns:
pivoted_measures[f"Cost of {measure}"] = None
pivoted_floor_area = measures_df.pivot_table(
index="survey_key",
columns="measure",
values="floor_area",
aggfunc="first" # Use 'first' since each measure should only appear once per survey_key
).add_prefix("floor_area - ").reset_index()
pivoted_hlp = measures_df.pivot_table(
index="survey_key",
columns="measure",
values="hlp",
aggfunc="first"
).add_prefix("hlp - ").reset_index()
# Merge hlp and floor_area data
pivoted_measures = pivoted_measures.merge(pivoted_hlp, on="survey_key", how="left")
pivoted_measures = pivoted_measures.merge(pivoted_floor_area, on="survey_key", how="left")
# Step 3: Calculate the total sap points and total cost for each survey.
totals = measures_df.groupby("survey_key").agg(
total_sap_points=("sap_points", "sum"),
).reset_index()
# Merge total sap points into the pivoted measures.
pivoted_measures = pd.merge(pivoted_measures, totals, on="survey_key", how="left")
# pivoted_measures["Cost Contingency"] = pivoted_measures["total_cost_of_measures"] * CONTINGENCY_RATE
# pivoted_measures["Total Cost"] = pivoted_measures["total_cost_of_measures"] + pivoted_measures["Cost Contingency"]
# Step 4: Extract starting SAP for each survey key.
starting_sap_df = measures_df.drop_duplicates(subset=["survey_key"])[["survey_key", "starting_sap"]]
# Merge starting SAP back onto pivoted measures.
result_df = pd.merge(pivoted_measures, starting_sap_df, on="survey_key", how="left")
# Step 5: Calculate the ending SAP.
result_df["Ending SAP"] = result_df["starting_sap"] + result_df["total_sap_points"]
result_df["Ending EPC Rating"] = result_df["Ending SAP"].apply(sap_to_epc)
# Step 6: Merge the result with the measures_data to get the final DataFrame.
final_measures = measures_data.merge(
result_df, how="left", on="survey_key"
)
final_measures.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/Measures packages.csv")
# Store costs
pricing_data.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/Pricing data.csv")
# if __name__ == "__main__":
# main()

View file

@ -0,0 +1,71 @@
"""
Rough script to get the EPC data for Benyon
"""
import pandas as pd
import os
from dotenv import load_dotenv
from backend.SearchEpc import SearchEpc
from asset_list.utils import get_data
load_dotenv(dotenv_path="backend/.env")
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Benyon Estate/List of All Properties ecl Grd Rents in "
"Alphabetical Order.xlsx",
header=1
)
asset_list.columns = ["tennancy", "landlord_id", "landlord_address"]
# Get postcode as the last 2 parts of the address, split on space
asset_list["postcode"] = asset_list["landlord_address"].apply(lambda x: x.split(" ")[-2] + " " + x.split(" ")[-1])
asset_list["house_no"] = asset_list.apply(
lambda x: SearchEpc.get_house_number(address=x["landlord_address"], postcode=x["postcode"]), axis=1
)
epc_data, errors, no_epc = get_data(
df=asset_list,
manual_uprn_map={},
epc_auth_token=EPC_AUTH_TOKEN,
uprn_column=None,
fulladdress_column="landlord_address",
address1_column="house_no",
postcode_column="postcode",
property_type_column=None,
built_form_column=None,
epc_api_only=True,
row_id_name="landlord_id",
)
df = asset_list[asset_list["landlord_id"].isin(no_epc)]
epc_df = pd.DataFrame(epc_data)
epc_df["current-energy-rating"].value_counts()
epc_df["property-type"].value_counts()
epc_df["walls-description"].value_counts(normalize=True)
asset_list = asset_list.merge(
epc_df[
[
"landlord_id", "current-energy-rating", "property-type", "total-floor-area", "roof-description",
"walls-description", "co2-emissions-current"
]
],
how="left",
left_on="landlord_id",
right_on="landlord_id"
)
asset_list.to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Benyon Estate/asset_list.csv", index=False
)
asset_list_big = asset_list.merge(
epc_df,
how="left",
left_on="landlord_id",
right_on="landlord_id"
)
asset_list_big.to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Benyon Estate/asset_list_full_data.csv",
index=False
)

View file

@ -0,0 +1,192 @@
"""
12th April 2025
This script attempts to clean up the various pieces of data we have for Bromford, with the intention of producing a
standardised asset list
"""
import pandas as pd
# Step 1
# The inspectons data is spread across three different files. We attempt to produce one finalised asset list, with
# comprehensive inspections
# Primary asset list
asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Bromford Asset "
"List.xlsx",
sheet_name="Asset List"
)
#
inspections_1 = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Inspections/BROMFORD "
"MDS.xlsx",
sheet_name="Data list"
)
inspections_1["Heating Type"] = (inspections_1["Heating Type"] + " " + inspections_1["Heating fuel"]).str.strip()
inspections_2 = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Inspections/BROMFORD "
"MERLIN LANE.xlsx",
sheet_name="Report"
)
inspections_2["AssetTypeDesc"] = inspections_2["PropertyType"].str.split(" ").str[-1]
inspections_2["PropTypeDesc"] = inspections_2["PropertyType"].str.split(" ").str[:-1].str.join(" ")
inspections_3 = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Inspections/BROMFORD "
"SEVERN VALE - KLARKE.xlsx",
sheet_name="Asset report"
)
inspections_3["FullAddress"] = inspections_3["T1_Address1"] + ", " + inspections_3["T1_Address2"]
# On inspections 3, we have multiple sheets which describe the heating
heating_systems = []
for sheet_name in [
"Storage Heaters", "No Heating", "Underfloor Heating", "Rointe Electric Heating", "Air Source Heating",
"Gas Central Heating", "Electric Boiler", "Oil Fired Central Heating",
"Communal Boilers", "Panel Heaters"
]:
df = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme "
"Rebuild/Inspections/BROMFORD "
"SEVERN VALE - KLARKE.xlsx",
sheet_name=sheet_name
)
df = df[["UPRN"]]
df["Heating Type"] = sheet_name
heating_systems.append(df)
heating_systems = pd.concat(heating_systems)
# We have no clue which one is correct, we have some dupes
heating_systems = heating_systems.drop_duplicates("UPRN")
heating_systems = heating_systems.rename(columns={"UPRN": "Asset"})
heating_systems["Asset"] = heating_systems["Asset"].astype(int)
inspections_3 = inspections_3.merge(heating_systems, how="left", on="Asset")
# Create a consolidated inspections sheet
inspections = pd.concat(
[
inspections_1[["Asset", "Construction type", 'Heating Type', "WFT Findings", "Eligibility (Red/Yellow/Green)"]],
inspections_2[["Asset", "Construction type", "WFT Findings", "Eligibility (Red/Yellow/Green)"]],
inspections_3[["Asset", 'Heating Type', "WFT Findings", "Eligibility (Red/Yellow/Green)"]],
]
)
inspections_address_data = pd.concat(
[
inspections_1[
["Asset", "FullAddress", "PostCode", "ConYear", "Beds", "AssetTypeDesc", "PropTypeDesc", 'ManAreaDesc', ]
],
inspections_2[
['Asset', 'FullAddress', 'AccomType', "AssetTypeDesc", "PropTypeDesc", 'ConYear', 'Postcode']
].rename(columns={"Postcode": "PostCode"}),
inspections_3[
['Asset', "FullAddress", 'T1_Postcode', 'T1_Build Year', 'T1_AssetType']
].rename(
columns={"T1_Postcode": "PostCode", "T1_Build Year": "ConYear", "T1_AssetType": "AssetTypeDesc"}
),
]
)
# Remove some error values
inspections = inspections[~inspections["Asset"].isin(
[
"They're all green partial fill they're all green this",
"South Staffordshire District Council",
'Blk Milton Crt F9-10, Perton, Wolverhampton'
]
)]
inspections["Asset"] = inspections["Asset"].astype(str)
asset_list["Asset"] = asset_list["Asset"].astype(str)
inspections_address_data["Asset"] = inspections_address_data["Asset"].astype(str)
inspections['WFT Findings'] = inspections['WFT Findings'].replace(r'^\s*$', pd.NA, regex=True)
# We have some cases where the inspetions data has dupes on Asset (the ID column). We take the instance that is
# populated
inspections = inspections.sort_values(by='WFT Findings', na_position='last')
inspections = inspections.drop_duplicates(subset='Asset', keep='first')
# We have dupes in the asset list
asset_list = asset_list.drop_duplicates("Asset")
# Merge on
missed_asset_ids = inspections[
~inspections["Asset"].isin(asset_list["Asset"].values)
]["Asset"].values
missed_assets = inspections_address_data[
inspections_address_data["Asset"].isin(missed_asset_ids)
]
missed_assets = missed_assets.drop_duplicates("Asset")
# We produce a larger asset list
asset_list = pd.concat([asset_list, missed_assets])
asset_list = asset_list.merge(
inspections, how="left", on="Asset"
)
asset_list["WFT Findings"] = asset_list["WFT Findings"].fillna("No Inspections Note")
# Store
# asset_list.to_excel(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Prepared "
# "data/asset_list.xlsx"
# )
# We now prepare outcomes into a single file
pv_outcomes = pd.read_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Bromford PV "
"Outcomes.csv",
encoding='cp1252'
)
pv_outcomes["measure_type"] = "solar"
other_outcomes = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/(Bromford) "
"15.04.2024.xlsx",
sheet_name="ECO4 & GBIS",
header=1
)
other_outcomes["measure_type"] = "cwi"
combined_outcomes = pd.concat(
[
other_outcomes[["NO", "ADDRESS", "POSTCODE", "WEEK COMMENCING", "OUTCOMES", "NOTES"]].rename(
columns={
"NO": "No", "ADDRESS": "Address", "POSTCODE": "Postcode", "WEEK COMMENCING": "Week Commencing",
"OUTCOMES": "Outcome", "NOTES": "Notes"
}
),
pv_outcomes[['No', 'Address', 'Postcode', "Week Commencing", "Outcome", "Notes"]]
]
)
# Store
# combined_outcomes.to_excel(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/Prepared "
# "data/outcomes.xlsx"
# )
# Submissions sheet -
eco3_submissions = pd.read_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/ECO 3 Submissions.csv",
encoding='cp1252'
)
# Get rid of the unnamed columns
unnamed_columns = [c for c in eco3_submissions.columns if "Unnamed: " in c]
eco3_submissions = eco3_submissions.drop(columns=unnamed_columns)
# Store
eco3_submissions.to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/ECO 3 submissions.csv",
index=False
)
eco4_submissions = pd.read_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Bromford/Apr 2025 Programme Rebuild/ECO 4 submissions.csv",
)
same_cols = [c for c in eco4_submissions.columns if c in eco3_submissions.columns]

View file

@ -0,0 +1,138 @@
import os
import time
from tqdm import tqdm
import pandas as pd
from dotenv import load_dotenv
from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc
from backend.SearchEpc import SearchEpc
from utils.s3 import save_csv_to_s3
load_dotenv(dotenv_path="backend/.env")
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
USER_ID = 8
PORTFOLIO_ID = 122
def app():
asset_list = [
{
"address": "12 Church Lane", "postcode": "CB23 8AF", "uprn": 100090136018,
"property_type": "House", "built-form": "Semi-Detached"
},
{
"address": "21 High Street", "postcode": "CB23 8AB", "uprn": 100090144815
},
{
"address": "22 High Street", "postcode": "CB23 8AB", "uprn": 100090144816
},
{
"address": "5 Bunkers Hill", "postcode": "CB3 0LY", "uprn": 10008078615
},
{
"address": "6 Bunkers Hill", "postcode": "CB3 0LY", "uprn": 10008078616
},
{
"address": "7 Bunkers Hill", "postcode": "CB3 0LY", "uprn": 10008078617
},
{
"address": "32 George Nuttall Close", "postcode": "CB4 1YE", "uprn": 200004200075
},
{
"address": "33 George Nuttall Close", "postcode": "CB4 1YE", "uprn": 200004200076
},
{
"address": "35 George Nuttall Close", "postcode": "CB4 1YE", "uprn": 200004200078
},
{
"address": "36 George Nuttall Close", "postcode": "CB4 1YE", "uprn": 200004200079
}
]
asset_list = pd.DataFrame(asset_list)
valuations_data = [
{'uprn': 100090136018, "valuation": 586_000},
{'uprn': 100090144815, "valuation": 446_000},
{'uprn': 100090144816, "valuation": 448_000},
{'uprn': 10008078615, "valuation": 763_000},
{'uprn': 10008078616, "valuation": 616_000},
{'uprn': 10008078617, "valuation": 593_000},
{'uprn': 200004200075, "valuation": 450_000},
{'uprn': 200004200076, "valuation": 457_000},
{'uprn': 200004200078, "valuation": 304_000},
{'uprn': 200004200079, "valuation": 313_000}
]
# Pull the additional data
extracted_data = []
for _, home in tqdm(asset_list.iterrows(), total=len(asset_list)):
add1 = home["address"]
pc = home["postcode"]
# Retrieve the EPC data
epc_searcher = SearchEpc(
address1=add1,
postcode=pc, uprn=home["uprn"], auth_token=EPC_AUTH_TOKEN, os_api_key=""
)
epc_searcher.find_property(skip_os=True)
if epc_searcher.newest_epc is None:
continue
find_epc_searcher = RetrieveFindMyEpc(address=epc_searcher.newest_epc["address1"],
postcode=epc_searcher.newest_epc["postcode"])
find_epc_data = find_epc_searcher.retrieve_newest_find_my_epc_data()
time.sleep(0.5)
# We need uprn
extracted_data.append(
{
"uprn": home["uprn"],
**find_epc_data,
}
)
non_invasive_recommendations = [
{
"uprn": r["uprn"],
"recommendations": r["recommendations"]
} for r in extracted_data
]
filename = f"{USER_ID}/{PORTFOLIO_ID}/asset_list.csv"
save_csv_to_s3(
dataframe=pd.DataFrame(asset_list),
bucket_name="retrofit-plan-inputs-dev",
file_name=filename
)
# Store the non-invasive recommendations in s3
non_invasive_recommendations_filename = f"{USER_ID}/{PORTFOLIO_ID}/non_invasive_recommendations.csv"
save_csv_to_s3(
dataframe=pd.DataFrame(non_invasive_recommendations),
bucket_name="retrofit-plan-inputs-dev",
file_name=non_invasive_recommendations_filename
)
# Store the valuations data in s3
valuations_filename = f"{USER_ID}/{PORTFOLIO_ID}/valuations.csv"
save_csv_to_s3(
dataframe=pd.DataFrame(valuations_data),
bucket_name="retrofit-plan-inputs-dev",
file_name=valuations_filename
)
body = {
"portfolio_id": str(PORTFOLIO_ID),
"housing_type": "Private",
"goal": "Increasing EPC",
"goal_value": "B",
"trigger_file_path": filename,
"already_installed_file_path": "",
"patches_file_path": "",
"non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
"valuation_file_path": valuations_filename,
"scenario_name": "Wave 3 Packages",
"multi_plan": True,
"budget": None,
"exclusions": []
}
print(body)

View file

@ -0,0 +1,108 @@
import os
import time
from tqdm import tqdm
import pandas as pd
from dotenv import load_dotenv
from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc
from backend.SearchEpc import SearchEpc
from utils.s3 import save_csv_to_s3
load_dotenv(dotenv_path="backend/.env")
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
USER_ID = 8
PORTFOLIO_ID = 123
def app():
asset_list = [
{"address": "1 Raven Crescent", "postcode": "WV11 2EX", "uprn": 100071188496},
{"address": "13 Bayliss Avenue", "postcode": "WV11 2EX", "uprn": 100071136271},
{"address": "30 Southbourne Road", "postcode": "WV10 6ET", "uprn": 100071194376},
{"address": "96 Marsh Lane", "postcode": "WV10 6RX", "uprn": 100071176297},
]
asset_list = pd.DataFrame(asset_list)
valuations_data = [
{'uprn': 100071188496, "valuation": 175_000},
{'uprn': 100071136271, "valuation": 183_000},
{'uprn': 100071194376, "valuation": 221_000},
{'uprn': 100071176297, "valuation": 208_000},
]
# Pull the additional data
extracted_data = []
for _, home in tqdm(asset_list.iterrows(), total=len(asset_list)):
add1 = home["address"]
pc = home["postcode"]
# Retrieve the EPC data
epc_searcher = SearchEpc(
address1=add1,
postcode=pc, uprn=home["uprn"], auth_token=EPC_AUTH_TOKEN, os_api_key=""
)
epc_searcher.find_property(skip_os=True)
if epc_searcher.newest_epc is None:
continue
find_epc_searcher = RetrieveFindMyEpc(address=epc_searcher.newest_epc["address1"],
postcode=epc_searcher.newest_epc["postcode"])
find_epc_data = find_epc_searcher.retrieve_newest_find_my_epc_data()
time.sleep(0.5)
# We need uprn
extracted_data.append(
{
"uprn": home["uprn"],
**find_epc_data,
}
)
non_invasive_recommendations = [
{
"uprn": r["uprn"],
"recommendations": r["recommendations"]
} for r in extracted_data
]
filename = f"{USER_ID}/{PORTFOLIO_ID}/asset_list.csv"
save_csv_to_s3(
dataframe=pd.DataFrame(asset_list),
bucket_name="retrofit-plan-inputs-dev",
file_name=filename
)
# Store the non-invasive recommendations in s3
non_invasive_recommendations_filename = f"{USER_ID}/{PORTFOLIO_ID}/non_invasive_recommendations.csv"
save_csv_to_s3(
dataframe=pd.DataFrame(non_invasive_recommendations),
bucket_name="retrofit-plan-inputs-dev",
file_name=non_invasive_recommendations_filename
)
# Store the valuations data in s3
valuations_filename = f"{USER_ID}/{PORTFOLIO_ID}/valuations.csv"
save_csv_to_s3(
dataframe=pd.DataFrame(valuations_data),
bucket_name="retrofit-plan-inputs-dev",
file_name=valuations_filename
)
body = {
"portfolio_id": str(PORTFOLIO_ID),
"housing_type": "Private",
"goal": "Increasing EPC",
"goal_value": "B",
"trigger_file_path": filename,
"already_installed_file_path": "",
"patches_file_path": "",
"non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
"valuation_file_path": valuations_filename,
"scenario_name": "Wave 3 Packages",
"multi_plan": True,
"budget": None,
"exclusions": []
}
print(body)

View file

@ -0,0 +1,64 @@
import re
import pandas as pd
from PyPDF2 import PdfReader
# Paths to the uploaded files
file_paths = [
"/Users/khalimconn-kowlessar/Downloads/Managed Properties List (dragged).pdf",
"/Users/khalimconn-kowlessar/Downloads/Managed Properties List (dragged) 2.pdf",
"/Users/khalimconn-kowlessar/Downloads/Managed Properties List (dragged) 3.pdf",
"/Users/khalimconn-kowlessar/Downloads/Managed Properties List (dragged) 4.pdf",
"/Users/khalimconn-kowlessar/Downloads/Managed Properties List (dragged) 5.pdf",
"/Users/khalimconn-kowlessar/Downloads/Managed Properties List (dragged) 6.pdf"
]
# Function to extract text from PDFs
def extract_text_from_pdf_with_pypdf2(file_path):
text = ""
reader = PdfReader(file_path)
for page in reader.pages:
text += page.extract_text()
return text
# Initialize a list to hold all parsed data
all_parsed_data = []
# Process each PDF individually
for i, path in enumerate(file_paths):
# Extract text from the PDF
extracted_text = extract_text_from_pdf_with_pypdf2(path)
# Step 1: Remove titles and repeated headers
cleaned_text = re.sub(r"Managed Property Report as at \d+ \w+ \d+", "", extracted_text)
cleaned_text = re.sub(r"Code Property Address Management Type", "", cleaned_text)
# Step 2: Extract rows ending with "Managed"
rows = re.findall(r".*?Managed", cleaned_text)
# Step 3: Parse rows into structured data
parsed_data = []
for row in rows:
match = re.match(r"(\S+)\s+(.+?)\s+Managed", row.strip())
if match:
code = match.group(1).strip()
address = match.group(2).strip()
parsed_data.append((code, address, "Managed"))
# Append parsed data to the global list
all_parsed_data.extend(parsed_data)
# Provide feedback for debugging
print(f"File {i + 1} processed: {len(parsed_data)} rows")
# Step 4: Create a unified DataFrame
final_df = pd.DataFrame(all_parsed_data, columns=["Code", "Property Address", "Management Type"])
# Step 5: Save the unified DataFrame to an Excel file
final_output_file_path = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Unified_Managed_Properties_List.xlsx"
final_df.to_excel(final_output_file_path, index=False)
# Provide feedback
print(f"All files processed and combined. Total rows: {len(final_df)}")
print(f"Unified file saved to: {final_output_file_path}")

View file

@ -0,0 +1,15 @@
import pandas as pd
df = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Cottons/Cottons Asset List.xlsx"
)
# split up the address on commas. First section is address1, last seciton is postcode
df["address1"] = df["Property Address"].apply(lambda x: x.split(",")[0].strip())
df["postcode"] = df["Property Address"].apply(lambda x: x.split(",")[-1].strip())
# Re-save
df.to_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Cottons/Cottons Asset List.xlsx",
index=False,
)

View file

@ -0,0 +1,124 @@
import os
import time
from tqdm import tqdm
import pandas as pd
from dotenv import load_dotenv
from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc
from backend.SearchEpc import SearchEpc
from utils.s3 import save_csv_to_s3
load_dotenv(dotenv_path="backend/.env")
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
USER_ID = 8
PORTFOLIO_ID = 121
def app():
"""
Prepares the inputs to produce the remote assessments for Cottons
:return:
"""
# Read in the asset list
cottons_asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Cottons/Cottons Asset List EPC Data Pull with "
"valuations.xlsx"
)
# A number are missing EPCs due to the space in the postcode
# Breakdowns:
# C 119
# D 106
# E 26
# B 5
#
# Take the EPC D/E properties
asset_list = cottons_asset_list[
cottons_asset_list["EPC rating on register"].isin(["D", "E"])
]
asset_list = asset_list.reset_index(drop=True)
asset_list["row_id"] = asset_list.index
asset_list["uprn"] = asset_list["uprn"].astype(int)
extracted_data = []
model_asset_list = []
for _, home in tqdm(asset_list.iterrows(), total=len(asset_list)):
add1 = home["address1"]
pc = home["postcode"]
# Retrieve the EPC data
epc_searcher = SearchEpc(
address1=add1,
postcode=pc, uprn=home["uprn"], auth_token=EPC_AUTH_TOKEN, os_api_key=""
)
epc_searcher.find_property(skip_os=True)
find_epc_searcher = RetrieveFindMyEpc(address=epc_searcher.newest_epc["address1"],
postcode=epc_searcher.newest_epc["postcode"])
find_epc_data = find_epc_searcher.retrieve_newest_find_my_epc_data()
time.sleep(0.5)
# We need uprn
extracted_data.append(
{
"uprn": home["uprn"],
**find_epc_data,
}
)
model_asset_list.append(
{
"uprn": home["uprn"],
"address": epc_searcher.newest_epc["address1"],
"postcode": epc_searcher.newest_epc["postcode"],
}
)
non_invasive_recommendations = [
{
"uprn": r["uprn"],
"recommendations": r["recommendations"]
} for r in extracted_data
]
valuations_data = asset_list[["uprn", "Zoopla Valuation"]].copy().rename(columns={"Zoopla Valuation": "valuation"})
valuations_data = valuations_data[~pd.isnull(valuations_data["valuation"])]
filename = f"{USER_ID}/{PORTFOLIO_ID}/asset_list.csv"
save_csv_to_s3(
dataframe=pd.DataFrame(model_asset_list),
bucket_name="retrofit-plan-inputs-dev",
file_name=filename
)
# Store the non-invasive recommendations in s3
non_invasive_recommendations_filename = f"{USER_ID}/{PORTFOLIO_ID}/non_invasive_recommendations.csv"
save_csv_to_s3(
dataframe=pd.DataFrame(non_invasive_recommendations),
bucket_name="retrofit-plan-inputs-dev",
file_name=non_invasive_recommendations_filename
)
# Store the valuations data in s3
valuations_filename = f"{USER_ID}/{PORTFOLIO_ID}/valuations.csv"
save_csv_to_s3(
dataframe=valuations_data,
bucket_name="retrofit-plan-inputs-dev",
file_name=valuations_filename
)
body = {
"portfolio_id": str(PORTFOLIO_ID),
"housing_type": "Social",
"goal": "Increasing EPC",
"goal_value": "C",
"trigger_file_path": filename,
"already_installed_file_path": "",
"patches_file_path": "",
"non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
"valuation_file_path": valuations_filename,
"scenario_name": "Wave 3 Packages",
"multi_plan": True,
"budget": None,
"exclusions": ['air_source_heat_pump', 'boiler_upgrade', 'floor_insulation']
}
print(body)

View file

@ -0,0 +1,77 @@
import inspect
import pandas as pd
from pathlib import Path
from tqdm import tqdm
from etl.epc.settings import EARLIEST_EPC_DATE
from etl.spatial.OpenUprnClient import OpenUprnClient
src_file_path = inspect.getfile(lambda: None)
EPC_DIRECTORY = Path("/Users/khalimconn-kowlessar/Downloads/all-domestic-certificates")
epc_directories = [entry for entry in EPC_DIRECTORY.iterdir() if entry.is_dir()]
aggregation = []
for directory in tqdm(epc_directories):
data = pd.read_csv(directory / "certificates.csv", low_memory=False)
# Rename the columns to the same format as the api returns
data.columns = [c.replace("_", "-").lower() for c in data.columns]
data = data[data["posttown"].str.contains("London", case=False, na=False)]
if data.empty:
continue
# Take just date before the date threshold
data = data[data["lodgement-date"] >= EARLIEST_EPC_DATE]
data = data[~pd.isnull(data["uprn"])]
data["uprn"] = data["uprn"].astype(int)
# Take just the newest EPC per uprn, based on lodgement-date
data = data.sort_values("lodgement-date", ascending=False).drop_duplicates("uprn")
# Take EPC D and below
data = data[data["current-energy-rating"].isin(["D", "E", "F", "G"])]
data["postal_region"] = data["postcode"].str.split(" ").str[0]
# Take homes that don't have a gas boiler
off_gas = data[~data["main-fuel"].str.contains("mains gas", case=False, na=False)]
if off_gas.empty:
continue
# Remote properties with conservation area issues
uprns = off_gas["uprn"].unique()
# Get data
ca_data = OpenUprnClient.get_spatial_data(uprns, "retrofit-data-dev")
off_gas = off_gas.merge(
ca_data[["UPRN", "conservation_status", "is_listed_building", "is_heritage_building"]].rename(
columns={"UPRN": "uprn"}
),
how="left",
on="uprn",
)
# Remove any restricted units
off_gas = off_gas[
(off_gas["conservation_status"] != True)
& (off_gas["is_listed_building"] != True)
& (off_gas["is_heritage_building"] != True)
]
off_gas = off_gas[
off_gas["tenure"].isin(["rental (private)", "Rented (private)", "owner-occupied", "Owner-occupied"])
]
region_summary = off_gas.groupby("postal_region").size().reset_index(name="count")
aggregation.append(region_summary)
postal_region_aggregation = pd.concat(aggregation)
# Re-aggregate
postal_region_aggregation = postal_region_aggregation.groupby("postal_region")["count"].sum().reset_index()
postal_region_aggregation = postal_region_aggregation.sort_values("count", ascending=False)
postal_region_aggregation = postal_region_aggregation.rename(
columns={"postal_region": "Postcode Region", "count": "Number of Homes"}
)
postal_region_aggregation.to_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/GLA/Off Gas EPC D-G Postal Regions - without conservation "
"area.xlsx",
index=False
)

View file

@ -0,0 +1,425 @@
import os
import time
import re
from etl.epc.settings import EARLIEST_EPC_DATE
from dotenv import load_dotenv
from tqdm import tqdm
import pandas as pd
import numpy as np
from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc
from etl.spatial.OpenUprnClient import OpenUprnClient
from backend.SearchEpc import SearchEpc
from utils.s3 import save_csv_to_s3
load_dotenv(dotenv_path="backend/.env")
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
USER_ID = 8
PORTFOLIO_ID = 117
CAHA_PORTFOLIO_ID = 118
def hornsey():
"""
This script prepares the asset lists for the additional housing associations, CAHA and Hornsey Housing Trust,
that are forming a consortium led by AIHA
:return:
"""
hornsey_asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/SHDF - Template - EOI - Hornsey Housing "
"Trust.xlsx",
sheet_name="Ksquared-All units information",
header=3
)
# We don't need the first row
hornsey_asset_list = hornsey_asset_list.iloc[1:]
# Fill NA values with empty strings
hornsey_asset_list = hornsey_asset_list.fillna("")
hornsey_asset_list["Address letter or number"] = hornsey_asset_list["Address letter or number"].astype(
str
).str.strip()
hornsey_asset_list["Postcode"] = hornsey_asset_list["Postcode"].astype(str).str.strip()
hornsey_asset_list["Street address"] = hornsey_asset_list["Street address"].astype(str).str.strip()
# Replace double spaces
for col in ["Address letter or number", "Street address", "Postcode"]:
hornsey_asset_list[col] = hornsey_asset_list[col].str.replace(" ", " ")
hornsey_asset_list = hornsey_asset_list[hornsey_asset_list["Address letter or number"] != ""]
hornsey_asset_list["Wall Type Cleaned"] = np.where(
"Cavity" in hornsey_asset_list["Wall type"],
"Cavity",
"Solid"
)
missed_uprns = {
"Flat 13A Stowell House": 100021213098,
"Flat 24 Stowell House": 100021213110,
"Flat 1 36 Haringey Park": None
}
extracted_data = []
asset_list = []
hornsey_asset_list["row_id"] = hornsey_asset_list.index
for _, home in tqdm(hornsey_asset_list.iterrows(), total=len(hornsey_asset_list)):
if home["Address letter or number"] == "Flat 1 36 Haringey Park":
continue
# Some properties do not have an epc
if not home["Energy starting band (EPC)"]:
asset_list.append(
{
"uprn": missed_uprns[home["Address letter or number"]],
"address": home["Address letter or number"],
"postcode": home["Postcode"],
"property_type": "Flat", # They're all flats
}
)
continue
unit_number = home["Address letter or number"]
street = home["Street address"]
postcode = home["Postcode"]
address = ", ".join([x for x in [unit_number, street] if x])
find_epc_searcher = RetrieveFindMyEpc(address=address, postcode=postcode)
find_epc_data = find_epc_searcher.retrieve_newest_find_my_epc_data()
time.sleep(0.5)
# We need uprn
searcher = SearchEpc(
address1=address,
postcode=postcode,
auth_token=EPC_AUTH_TOKEN,
os_api_key="",
full_address=address,
)
searcher.find_property(skip_os=True)
newest_epc = searcher.newest_epc
if newest_epc["current-energy-efficiency"] != home["Energy starting band (EPC)"].split("-")[1]:
raise Exception("Something went wrong with the EPC data")
extracted_data.append(
{
"uprn": newest_epc["uprn"],
**find_epc_data,
"hotwater-description": newest_epc["hotwater-description"],
}
)
asset_list.append(
{
"uprn": newest_epc["uprn"],
"row_id": home["row_id"],
"address": home["Address letter or number"],
"postcode": home["Postcode"],
"property_type": "Flat", # They're all flats
}
)
# Get conservation area data
# uprns = [x["uprn"] for x in extracted_data]
# conservation_area_data = OpenUprnClient.get_spatial_data(uprns, "retrofit-data-dev")
#
# addresses = pd.DataFrame(asset_list)
# addresses["uprn"] = addresses["uprn"].astype(int)
# conservation_area_df = conservation_area_data.merge(addresses, how="left", right_on="uprn", left_on="UPRN")
# conservation_area_df.to_csv(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/hornsey_conservation_area_data.csv"
# )
# We format the extracted data so that is has the same structure as non-intrusive recommendations
# We then get the UPRNs and create the asset list
non_invasive_recommendations = [
{
"uprn": r["uprn"],
"recommendations": r["recommendations"]
} for r in extracted_data
]
for r in non_invasive_recommendations:
new_recommendations = []
extracted = [r for r in extracted_data if r["uprn"] == r["uprn"]][0]
for rec in r["recommendations"]:
if extracted["hotwater-description"] == "Gas boiler/circulator, no cylinder thermostat":
if rec["type"] in ["hot_water_tank_insulation", "cylinder_thermostat"]:
continue
rec["survey"] = False
new_recommendations.append(rec)
r["recommendations"] = new_recommendations
# Store the asset list in s3
filename = f"{USER_ID}/{PORTFOLIO_ID}/asset_list.csv"
save_csv_to_s3(
dataframe=pd.DataFrame(asset_list),
bucket_name="retrofit-plan-inputs-dev",
file_name=filename
)
# Store the non-invasive recommendations in s3
non_invasive_recommendations_filename = f"{USER_ID}/{PORTFOLIO_ID}/non_invasive_recommendations.csv"
save_csv_to_s3(
dataframe=pd.DataFrame(non_invasive_recommendations),
bucket_name="retrofit-plan-inputs-dev",
file_name=non_invasive_recommendations_filename
)
body = {
"portfolio_id": str(PORTFOLIO_ID),
"housing_type": "Social",
"goal": "Increasing EPC",
"goal_value": "C",
"trigger_file_path": filename,
"already_installed_file_path": "",
"patches_file_path": "",
"non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
"valuation_file_path": "",
"scenario_name": "Wave 3 Packages",
"multi_plan": True,
"budget": None,
"exclusions": ["boiler_upgrade"]
}
print(body)
def caha():
caha_asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/Copy of AIHA - WHSHF Wave 3 bid - Consortium "
"member properties - CAHA.xlsx",
sheet_name="Ksquared-All units information",
header=3
)
caha_asset_list = caha_asset_list.iloc[1:]
# Fill NA values with empty strings
caha_asset_list = caha_asset_list.fillna("")
caha_asset_list["Address letter or number"] = caha_asset_list["Address letter or number"].astype(
str
).str.strip()
# We Add POstcode as it wasn't populated - split on space and take the last two entries and re-concatenate on space
caha_asset_list["Street address"] = caha_asset_list["Street address"].str.strip()
caha_asset_list["Postcode"] = caha_asset_list["Street address"].str.split(" ").str[-2:].str.join(" ")
# Take just the columns we need
caha_asset_list = caha_asset_list[["Address letter or number", "Street address", "Postcode"]]
for col in ["Address letter or number", "Street address", "Postcode"]:
caha_asset_list[col] = caha_asset_list[col].str.replace(" ", " ")
# Pull the data from find my epc
remap = {
"Flat A, 50 Talbot Road N6 4QP": "50a Talbot Road",
"Flat A, 51 First Avenue EN1 1BN": "51a, First Avenue",
"Flat B, 51 First Avenue EN1 1BN": "51b, First Avenue"
}
def remap_address(address):
# Match patterns like 'Flat A, 30 Grove Park Road'
match = re.match(r'Flat (\w), (\d+) (.+)', address)
if match:
flat_letter = match.group(1) # e.g., 'A'
number = match.group(2) # e.g., '30'
rest_of_address = match.group(3) # e.g., 'Grove Park Road'
# Format the new address as '30A Grove Park Road'
return f"{number}{flat_letter} {rest_of_address}"
# If pattern doesn't match, return original address
return address
caha_asset_list["row_id"] = caha_asset_list.index
extracted_data = []
asset_list = []
for _, home in tqdm(caha_asset_list.iterrows(), total=len(caha_asset_list)):
if home["Street address"] == "35 Stanford road N11 3HY" and home["Address letter or number"] == "":
continue
if home["Street address"] == "29 Victoria Avenue N3 1BD" and home["Address letter or number"] == "":
continue
if home["Street address"] == "11 Victoria Avenue N3 1BD" and home["Address letter or number"] == "Flat A":
continue
if home["Street address"] == "11 Victoria Avenue N3 1BD" and home["Address letter or number"] == "Flat C":
continue
if home["Street address"] == "10 Forest Gardens N17 6XA" and home["Address letter or number"] == "Flat C":
continue
if home["Street address"] == "219 Cann Hall Road E11 3NJ" and home["Address letter or number"] == "Flat B":
continue
unit_number = home["Address letter or number"]
street = home["Street address"]
postcode = home["Postcode"]
address = ", ".join([x for x in [unit_number, street] if x])
address = remap.get(address, address)
address = address.replace(postcode, "").strip()
if "Victoria Avenue" not in address:
address = remap_address(address)
find_epc_searcher = RetrieveFindMyEpc(address=address, postcode=postcode)
find_epc_data = find_epc_searcher.retrieve_newest_find_my_epc_data(sap_2012_date=EARLIEST_EPC_DATE)
time.sleep(0.5)
# We need uprn
searcher = SearchEpc(
address1=address,
postcode=postcode,
auth_token=EPC_AUTH_TOKEN,
os_api_key="",
full_address=address,
)
searcher.find_property(skip_os=True)
newest_epc = searcher.newest_epc
uprn = newest_epc["uprn"]
if address in ["Flat D, 11 Victoria Avenue", "Flat B, 11 Victoria Avenue"]:
uprn = None
extracted_data.append(
{
"uprn": uprn,
**find_epc_data,
}
)
asset_list.append(
{
"row_id": home["row_id"],
"uprn": uprn,
"address": address,
"postcode": home["Postcode"],
"property_type": newest_epc["property-type"],
"wall_type": newest_epc["walls-description"],
"built_form": newest_epc["built-form"],
"flat_storey_count": newest_epc['flat-storey-count'],
}
)
# Missing row ids
missed = [r for r in caha_asset_list["row_id"].tolist() if r not in [x["row_id"] for x in asset_list]]
no_data = [x for x in asset_list if x["uprn"] in [None, ""]]
no_data = pd.DataFrame(no_data)
# Get conservation area data
uprns = [x["uprn"] for x in extracted_data if x["uprn"] not in ["", None]]
conservation_area_data = OpenUprnClient.get_spatial_data([36284], "retrofit-data-dev")
addresses = pd.DataFrame(asset_list)
addresses["uprn"] = addresses["uprn"].astype(str)
conservation_area_data["UPRN"] = conservation_area_data["UPRN"].astype(str)
conservation_area_df = conservation_area_data.merge(addresses, how="left", right_on="uprn", left_on="UPRN")
conservation_area_df.to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/AIHA/caha_conservation_area_data.csv"
)
non_invasive_recommendations = [
{
"uprn": r["uprn"],
"recommendations": r["recommendations"]
} for r in extracted_data
]
# for r in non_invasive_recommendations:
# new_recommendations = []
# extracted = [r for r in extracted_data if r["uprn"] == r["uprn"]][0]
# for rec in r["recommendations"]:
# if extracted["hotwater-description"] == "Gas boiler/circulator, no cylinder thermostat":
# if rec["type"] in ["hot_water_tank_insulation", "cylinder_thermostat"]:
# continue
# rec["survey"] = False
# new_recommendations.append(rec)
# r["recommendations"] = new_recommendations
# We model the two properties separately
asset_list = pd.DataFrame(asset_list)
# Drop Flat D, 11 Victoria Avenue
asset_list1 = asset_list[asset_list["address"] != "Flat D, 11 Victoria Avenue"]
asset_list2 = asset_list[asset_list["address"] == "Flat D, 11 Victoria Avenue"]
# Store the asset list in s3
filename = f"{USER_ID}/{CAHA_PORTFOLIO_ID}/asset_list1.csv"
save_csv_to_s3(
dataframe=asset_list1,
bucket_name="retrofit-plan-inputs-dev",
file_name=filename
)
filename2 = f"{USER_ID}/{CAHA_PORTFOLIO_ID}/asset_list2.csv"
save_csv_to_s3(
dataframe=asset_list2,
bucket_name="retrofit-plan-inputs-dev",
file_name=filename2
)
# Store the non-invasive recommendations in s3
non_invasive_recommendations_filename = f"{USER_ID}/{CAHA_PORTFOLIO_ID}/non_invasive_recommendations.csv"
save_csv_to_s3(
dataframe=pd.DataFrame(non_invasive_recommendations),
bucket_name="retrofit-plan-inputs-dev",
file_name=non_invasive_recommendations_filename
)
body = {
"portfolio_id": str(CAHA_PORTFOLIO_ID),
"housing_type": "Social",
"goal": "Increasing EPC",
"goal_value": "C",
"trigger_file_path": filename,
"already_installed_file_path": "",
"patches_file_path": "",
"non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
"valuation_file_path": "",
"scenario_name": "Wave 3 Packages",
"multi_plan": True,
"budget": None,
"exclusions": ["boiler_upgrade"]
}
print(body)
body2 = {
"portfolio_id": str(CAHA_PORTFOLIO_ID),
"housing_type": "Social",
"goal": "Increasing EPC",
"goal_value": "C",
"trigger_file_path": filename2,
"already_installed_file_path": "",
"patches_file_path": "",
"non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
"valuation_file_path": "",
"scenario_name": "Wave 3 Packages",
"multi_plan": True,
"budget": None,
"exclusions": ["boiler_upgrade"]
}
print(body2)
#
asset_list3 = [
{
"address": "10b Forest Gardens", "postcode": "N17 6XA", "uprn": 100021180197
}
]
filename3 = f"{USER_ID}/{CAHA_PORTFOLIO_ID}/asset_list3.csv"
save_csv_to_s3(
dataframe=pd.DataFrame(asset_list3),
bucket_name="retrofit-plan-inputs-dev",
file_name=filename3
)
body3 = {
"portfolio_id": str(119),
"housing_type": "Social",
"goal": "Increasing EPC",
"goal_value": "C",
"trigger_file_path": filename3,
"already_installed_file_path": "",
"patches_file_path": "",
"non_invasive_recommendations_file_path": "",
"valuation_file_path": "",
"scenario_name": "Wave 3 Packages",
"multi_plan": True,
"budget": None,
"exclusions": ["boiler_upgrade"]
}
print(body3)

View file

@ -0,0 +1,166 @@
"""
This script prepares the asset list for modelling the properties from the L&Q dataset, for their January IC
"""
import pandas as pd
import numpy as np
from etl.route_march_data_pull.app import get_data
from utils.s3 import save_csv_to_s3
PORTFOLIO_ID = 124
USER_ID = 8
def app():
asset_data = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/L&G/Basildon information for Domna/Basildon MDS v1.4 "
"(1).xlsx",
sheet_name="Basildon",
header=5
)
asset_data = asset_data.head(-3)
asset_data["address1"] = np.where(
pd.isnull(asset_data["Address 1"]),
asset_data["Address 2"],
asset_data["Address 1"]
)
asset_data["full_address"] = np.where(
pd.isnull(asset_data["Address 1"]),
asset_data["Address 2"] + ", " + asset_data["Address 3"],
asset_data["Address 1"] + ", " + asset_data["Address 2"] + ", " + asset_data["Address 3"],
)
asset_list = asset_data[["address1", "PostCode", "full_address", "Bedrooms"]]
asset_list = asset_list.reset_index(drop=True)
asset_list["row_id"] = asset_list.index
# L&G's focus:
# Measures: loft and cavity insulation, replacement thermally efficient windows, PV cells, AS heat pumps.
epc_data, errors, no_epc = get_data(
asset_list=asset_list,
fulladdress_column="full_address",
address1_column="address1",
postcode_column="PostCode",
manual_uprn_map={}
)
missed = asset_list[
asset_list["row_id"].isin(no_epc)
]
# We merge on the property types, where we have them
missed = missed.merge(
asset_data[["address1", "PostCode", "Property Type"]],
how="left",
on=["address1", "PostCode"]
)
# Remap Block: Residential to Flat
missed["Property Type"] = np.where(
missed["Property Type"] == "Block: Residential",
"Flat",
missed["Property Type"]
)
# We create the asset list - we have some properties that genuninely never had an EPC
epc_df = pd.DataFrame(epc_data)
fetched_asset_list = epc_df[["address1", "postcode", "uprn", "row_id"]]
fetched_asset_list = fetched_asset_list.merge(
asset_list[["row_id", "Bedrooms"]],
how="left",
on=["row_id"]
)
missed = missed.rename(columns={"PostCode": "postcode"}).drop(columns=["row_id"])
# missed.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/L&G/missed_epcs.csv")
missed_uprns = pd.read_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/L&G/missed_epcs_uprn.csv",
)
missed = missed.merge(
missed_uprns[["address1", "postcode", "UPRN"]].rename(
columns={"UPRN": "uprn"},
),
how="left",
on=["address1", "postcode"]
)
fetched_asset_list = fetched_asset_list.drop(columns=["row_id"])
# We concatename them
final_asset_list = pd.concat(
[fetched_asset_list, missed[["address1", "postcode", "Property Type", "Bedrooms", "uprn"]]]
)
final_asset_list = final_asset_list.rename(
columns={
"address1": "address",
"Property Type": "property_type",
"Bedrooms": "n_bedrooms"
}
)
# Finally, we merge on the numeber of bedrooms
# Extract the non-invasive recommendations:
non_invasive_recommendations = []
for x in epc_data:
non_invasive_recommendations.append(
{
"uprn": x["uprn"],
"recommendations": x["find_my_epc_data"]["recommendations"]
}
)
filename = f"{USER_ID}/{PORTFOLIO_ID}/asset_list.csv"
save_csv_to_s3(
dataframe=pd.DataFrame(final_asset_list),
bucket_name="retrofit-plan-inputs-dev",
file_name=filename
)
# Store the non-invasive recommendations in s3
non_invasive_recommendations_filename = f"{USER_ID}/{PORTFOLIO_ID}/non_invasive_recommendations.csv"
save_csv_to_s3(
dataframe=pd.DataFrame(non_invasive_recommendations),
bucket_name="retrofit-plan-inputs-dev",
file_name=non_invasive_recommendations_filename
)
# Store the valuations data in s3
# valuations_filename = f"{USER_ID}/{PORTFOLIO_ID}/valuations.csv"
# save_csv_to_s3(
# dataframe=pd.DataFrame(valuations_data),
# bucket_name="retrofit-plan-inputs-dev",
# file_name=valuations_filename
# )
body = {
"portfolio_id": str(PORTFOLIO_ID),
"housing_type": "Private",
"goal": "Increasing EPC",
"goal_value": "A",
"trigger_file_path": filename,
"already_installed_file_path": "",
"patches_file_path": "",
"non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
"valuation_file_path": "",
"scenario_name": "Retrofit Packages",
"multi_plan": True,
"budget": None,
"inclusions": [
"cavity_wall_insulation",
"loft_insulation",
"windows",
"solar_pv",
"air_source_heat_pump"
]
}
print(body)

View file

@ -0,0 +1,246 @@
import pandas as pd
from backend.app.utils import sap_to_epc
data = pd.read_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/L&G/basildon_age_breakdowns/property_202501170837.csv"
)
data["year_built"].value_counts()
# 1950-1966 26
# 1967-1975 37
# 1976-1982 37
# 1983-1990 33
# 1991-1995 139
# 1996-2002 42
# 2003-2006 50
data["full_property_type"] = data["property_type"] + ": " + data["built_form"]
houses = data[data["property_type"].isin(["House", "Bungalow"])]
houses["built_form"].value_counts()
data["property_type"].value_counts()
data["full_property_type"].value_counts()
# House: Mid-Terrace 136
# House: End-Terrace 83
# House: Semi-Detached 55
# Flat: Semi-Detached 24
# Flat: End-Terrace 19
# House: Detached 10
# Flat: Mid-Terrace 9
# Maisonette: Mid-Terrace 9
# Maisonette: Semi-Detached 8
# Maisonette: End-Terrace 6
# Flat: Detached 4
# Bungalow: Detached 1
epc_data = pd.read_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/L&G/basildon_age_breakdowns/basildon EPC Data.csv"
)
# Classify floor area in <73m2, 73-98, 99-200, 200+
epc_data["floor_area_bracket"] = epc_data["total_floor_area"].apply(
lambda x: "<73" if x < 73 else "73-98" if x < 99 else "99-200" if x < 200 else "200+")
# 73-98 185
# <73 156
# 99-200 23
epc_data["wall_type"] = epc_data["walls"].str.split(",").str[0]
epc_data["wall_type"].value_counts()
# Cavity wall 343
# Timber frame 15
# System built 6
# we pull some additional data
# We want:
# 1) The list of properties included in the portfolio, with uprn
# 2) The recommendations against each property with costs, and whether or not the recommendation was defaulted
# 3) The properties without recommendations and why
from tqdm import tqdm
import pandas as pd
import numpy as np
from sqlalchemy.orm import sessionmaker
from backend.app.db.connection import db_engine
from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations
from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
def get_data(portfolio_id, scenario_ids):
session = sessionmaker(bind=db_engine)()
session.begin()
# Get properties and their details for a specific portfolio
properties_query = session.query(
PropertyModel,
PropertyDetailsEpcModel
).join(
PropertyDetailsEpcModel, PropertyModel.id == PropertyDetailsEpcModel.property_id
).filter(
PropertyModel.portfolio_id == portfolio_id # Filter by portfolio ID
).all()
# Transform properties data to include all fields dynamically
properties_data = [
{**{col.name: getattr(prop.PropertyModel, col.name) for col in PropertyModel.__table__.columns},
**{col.name: getattr(prop.PropertyDetailsEpcModel, col.name) for col in
PropertyDetailsEpcModel.__table__.columns}}
for prop in properties_query
]
# Get property IDs from fetched properties
# Get plans linked to the fetched properties
plans_query = session.query(Plan).filter(Plan.scenario_id.in_(scenario_ids)).all()
# Transform plans data to include all fields dynamically
plans_data = [
{col.name: getattr(plan, col.name) for col in Plan.__table__.columns}
for plan in plans_query
]
# Extract plan IDs for filtering recommendations through PlanRecommendations
plan_ids = [plan['id'] for plan in plans_data]
# Get recommendations through PlanRecommendations for those plans and that are default
recommendations_query = session.query(
Recommendation,
Plan.scenario_id
).join(
PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id
).join(
Plan, Plan.id == PlanRecommendations.plan_id # Join with Plan to access scenario_id
).filter(
PlanRecommendations.plan_id.in_(plan_ids),
Recommendation.default == True # Filtering for default recommendations
).all()
# Transform recommendations data to include all fields dynamically and include scenario_id
recommendations_data = [
{**{col.name: getattr(rec.Recommendation, col.name) if hasattr(rec, 'Recommendation') else getattr(rec,
col.name) for
col in Recommendation.__table__.columns},
"Scenario ID": rec.scenario_id}
for rec in recommendations_query
]
session.close()
return properties_data, plans_data, recommendations_data
properties_data, plans_data, recommendations_data = get_data(portfolio_id=124, scenario_ids=[205])
properties_df = pd.DataFrame(properties_data)
plans_df = pd.DataFrame(plans_data)
recommendations_df = pd.DataFrame(recommendations_data)
recommended_measures_df = recommendations_df[
["property_id", "measure_type", "estimated_cost", "default"]
]
recommended_measures_df = recommended_measures_df[recommended_measures_df["default"]]
recommended_measures_df = recommended_measures_df.drop(columns=["default"])
post_install_sap = recommendations_df[["property_id", "default", "sap_points"]]
post_install_sap = post_install_sap[post_install_sap["default"]]
# Sum up the sap points by property id
post_install_sap = post_install_sap.groupby("property_id")[["sap_points"]].sum().reset_index()
recommendations_measures_pivot = recommended_measures_df.pivot(
index='property_id',
columns='measure_type',
values='estimated_cost'
)
recommendations_measures_pivot = recommendations_measures_pivot.reset_index()
recommendations_measures_pivot = recommendations_measures_pivot.rename(
columns={
"air_source_heat_pump": "Cost: Air Source Heat Pump",
"cavity_wall_insulation": "Cost: Cavity Wall Insulation",
"double_glazing": "Cost: Double Glazing",
"loft_insulation": "Cost: Loft Insulation",
"mechanical_ventilation": "Cost: Ventilation",
"solar_pv": "Cost: Solar PV"
}
)
recommendations_measures_pivot = recommendations_measures_pivot.fillna(0)
recommendations_measures_pivot["Recommendation: Air Source Heat Pump"] = (
recommendations_measures_pivot["Cost: Air Source Heat Pump"] > 0
)
recommendations_measures_pivot["Recommendation: Cavity Wall Insulation"] = (
recommendations_measures_pivot["Cost: Cavity Wall Insulation"] > 0
)
recommendations_measures_pivot["Recommendation: Double Glazing"] = (
recommendations_measures_pivot["Cost: Double Glazing"] > 0
)
recommendations_measures_pivot["Recommendation: Loft Insulation"] = (
recommendations_measures_pivot["Cost: Loft Insulation"] > 0
)
recommendations_measures_pivot["Recommendation: Ventilation"] = (
recommendations_measures_pivot["Cost: Ventilation"] > 0
)
recommendations_measures_pivot["Recommendation: Solar PV"] = (
recommendations_measures_pivot["Cost: Solar PV"] > 0
)
df = properties_df[
[
"property_id", "uprn", "address", "postcode", "property_type", "walls", "roof", "heating", "windows",
"current_epc_rating",
"current_sap_points", "total_floor_area", "number_of_rooms",
]
].merge(
recommendations_measures_pivot, how="left", on="property_id"
).merge(
post_install_sap, how="left", on="property_id"
)
df = df.drop(columns=["property_id"])
df["sap_points"] = df["sap_points"].fillna(0)
df = df.rename(
columns={
"uprn": "UPRN",
"address": "Address",
"postcode": "Postcode",
"walls": "Walls",
"roof": "Roof",
"heating": "Heating",
"windows": "Windows",
"current_epc_rating": "Current EPC Rating",
"current_sap_points": "Current SAP Points",
"total_floor_area": "Total Floor Area",
"number_of_rooms": "Number of Habitable Rooms",
"floor_height": "Floor Height",
}
)
df["Has Recommendations"] = ~pd.isnull(df["Cost: Air Source Heat Pump"])
# We fill missings:
for col in [
"Recommendation: Air Source Heat Pump", "Recommendation: Cavity Wall Insulation",
"Recommendation: Double Glazing", "Recommendation: Loft Insulation", "Recommendation: Ventilation",
"Recommendation: Solar PV"
]:
df[col] = df[col].fillna(False)
for col in [
"Cost: Air Source Heat Pump", "Cost: Cavity Wall Insulation",
"Cost: Double Glazing", "Cost: Loft Insulation", "Cost: Ventilation",
"Cost: Solar PV"
]:
df[col] = df[col].fillna(0)
# Calculate post SAP
df["Predicted Post Works SAP"] = df["Current SAP Points"] + df["sap_points"]
df["Predicted Post Works SAP"] = df["Predicted Post Works SAP"].round()
df["Predicted Post Works EPC"] = df["Predicted Post Works SAP"].apply(lambda x: sap_to_epc(x))
df["Recommendation: Air Source Heat Pump"].sum()
df["Cost: Air Source Heat Pump"].sum()
df.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/L&G/Basildon Data Export - 2.csv", index=False)

View file

@ -0,0 +1,23 @@
import pandas as pd
data = pd.read_excel(
"/Users/khalimconn-kowlessar/Downloads/Lambeth Reknocks.xlsx", sheet_name="Possible Route",
header=1
)
data["Outcomes"].value_counts()
# Strip out: No
df = data[data["Outcomes"] == "See notes"]
notes_df = df[
("Notes (If 'no answer' under outcomes, have you checked around the property for access issues where "
"possible?)")].value_counts().to_frame()
example = df[df["Notes (If 'no answer' under outcomes, have you checked around the property for access issues where "
"possible?)"] == ('Access to rear of property only through number 10. Overgrown athe rear of property '
'installer wont be able to access')
]
# 18 did not attend
#

View file

@ -0,0 +1,225 @@
import os
import time
import pandas as pd
from tqdm import tqdm
from dotenv import load_dotenv
from utils.s3 import read_excel_from_s3
from backend.SearchEpc import SearchEpc
from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
from recommendations.recommendation_utils import (
estimate_perimeter,
estimate_external_wall_area,
estimate_number_of_floors
)
load_dotenv(dotenv_path="backend/.env")
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
def get_data(asset_list):
epc_data = []
errors = []
for _, home in tqdm(asset_list.iterrows(), total=len(asset_list)):
try:
postcode = home["Postcode"]
house_number = home["Number"]
full_address = home["Full Address"]
searcher = SearchEpc(
address1=str(house_number),
postcode=postcode,
auth_token=EPC_AUTH_TOKEN,
os_api_key="",
property_type=None,
fast=True,
full_address=full_address,
max_retries=5
)
# Force the skipping of estimating the EPC
searcher.ordnance_survey_client.property_type = None
searcher.ordnance_survey_client.built_form = None
searcher.find_property(skip_os=True)
if searcher.newest_epc is None:
continue
# Look for EPC recommendatons
try:
property_recommendations = searcher.client.domestic.recommendations(searcher.newest_epc["lmk-key"])
except:
property_recommendations = {"rows": []}
epc = {
"row_id": home["row_id"],
**searcher.newest_epc.copy(),
"recommendations": property_recommendations["rows"]
}
epc_data.append(epc)
except Exception as e:
errors.append(home["row_id"])
time.sleep(5)
return epc_data, errors
def app():
"""
This app is EPC pulling data for some properties owned by Livewest
Data request contents:
Date of last EPC
Reason for EPC
SAP score on register
Property Type
Property Area
Property Age
Any Dimensions (HLP,PW,RH)
Property Wall Construction
Heating Type
Secondary Heating
Loft Insulation Depth
Additional if possible:
Heat loss calculations
EPC recommendations
Property UPRN
"""
asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Downloads/LIVEWEST 3578 ECO4 ECO PLUS GBIS.xlsx", header=0
)
asset_list["row_id"] = asset_list.index
epc_data, errors = get_data(asset_list)
# We now retrieve any failed properties
asset_list_failed = asset_list[asset_list["row_id"].isin(errors)]
epc_data_failed, _ = get_data(asset_list_failed)
# Append the failed data to the main data
epc_data.extend(epc_data_failed)
epc_df = pd.DataFrame(epc_data)
# We expand out the recommendations
recommendations_df = epc_df[["row_id", "recommendations"]]
unique_recommendations = set()
for _, row in recommendations_df.iterrows():
unique_recommendations.update([rec["improvement-summary-text"] for rec in row["recommendations"]])
columns = ["row_id"] + list(unique_recommendations)
transformed_data = []
for _, row in recommendations_df.iterrows():
# Initialize a dictionary for this row with False for all recommendations
row_data = {col: False for col in columns}
row_data["row_id"] = row["row_id"]
# Set True for each recommendation present in this row
for rec in row["recommendations"]:
recommendation_text = rec["improvement-summary-text"]
row_data[recommendation_text] = True
# Append the row data to transformed_data
transformed_data.append(row_data)
transformed_df = pd.DataFrame(transformed_data)
# Drop the column that is ""
transformed_df = transformed_df.drop(columns=[""])
# Retrieve just the data we need
epc_df = epc_df[
[
"row_id",
"uprn",
"property-type",
"built-form",
"inspection-date",
"current-energy-rating",
"current-energy-efficiency",
"roof-description",
"walls-description",
"transaction-type",
# New fields needed
"secondheat-description",
"total-floor-area",
"construction-age-band",
"floor-height",
"number-habitable-rooms",
"mainheat-description",
#
"energy-consumption-current", # kwh/m2
]
]
asset_list = asset_list.merge(
epc_df,
how="left",
on="row_id"
).merge(
transformed_df,
how="left",
on="row_id"
)
asset_list = asset_list.drop(columns=["row_id"])
# Rename the columns
asset_list = asset_list.rename(columns={
"inspection-date": "Date of last EPC",
"current-energy-efficiency": "SAP score on register",
"current-energy-rating": "EPC rating on register",
"property-type": "Property Type",
"built-form": "Archetype",
"total-floor-area": "Property Floor Area",
"construction-age-band": "Property Age Band",
"floor-height": "Property Floor Height",
"number-habitable-rooms": "Number of Habitable Rooms",
"walls-description": "Wall Construction",
"roof-description": "Roof Construction",
"mainheat-description": "Heating Type",
"secondheat-description": "Secondary Heating",
"transaction-type": "Reason for last EPC",
"energy-consumption-current": "Heat Demand (kWh/m2)"
})
asset_list["Estimated Number of Floors"] = asset_list.apply(
lambda x: estimate_number_of_floors(property_type=x["Property Type"]) if not pd.isnull(
x["Property Type"]) else None, axis=1
)
asset_list["Property Floor Area"] = asset_list["Property Floor Area"].astype(float)
# Replace "" value with None
asset_list["Number of Habitable Rooms"] = asset_list["Number of Habitable Rooms"].replace("", None)
asset_list["Number of Habitable Rooms"] = asset_list["Number of Habitable Rooms"].astype(float)
asset_list["Estimated Perimeter (m)"] = asset_list.apply(
lambda x: estimate_perimeter(
floor_area=x["Property Floor Area"] / x["Estimated Number of Floors"],
num_rooms=x["Number of Habitable Rooms"] / x["Estimated Number of Floors"],
), axis=1
)
asset_list["Estimated Heat Loss Perimeter (m2)"] = asset_list.apply(
lambda x: estimate_external_wall_area(
num_floors=x["Estimated Number of Floors"],
floor_height=float(x["Property Floor Height"]) if x["Property Floor Height"] else 2.5,
perimeter=x["Estimated Perimeter (m)"],
built_form=x["Archetype"]
),
axis=1
)
asset_list["Roof Insulation Thickness"] = asset_list.apply(
lambda x: RoofAttributes(description=x["Roof Construction"]).process()["insulation_thickness"] if not pd.isnull(
x["Roof Construction"]) else None,
axis=1
)
# Store as an excel
filename = "livewest EPC Data pull - 29 Oct.xlsx"
asset_list.to_excel(filename, index=False)

View file

@ -0,0 +1,205 @@
import os
import pandas as pd
from tqdm import tqdm
from dotenv import load_dotenv
from backend.SearchEpc import SearchEpc
from etl.spatial.OpenUprnClient import OpenUprnClient
from asset_list.utils import get_data
from utils.s3 import save_csv_to_s3
PORTFOLIO_ID = 139
USER_ID = 8
load_dotenv(dotenv_path="backend/.env")
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
def app():
"""
Given the sample data and additonal properties, this function prepares the data
:return:
"""
folder_path = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MOD/Pilot Programme"
sample_list = pd.read_excel(f"{folder_path}/20250227_DIO_Accommodation_Sample_Properties.xlsx")
asset_data = pd.read_excel(f"{folder_path}/20250303_DIO_Accommodation_Property_Attribution.xlsx")
sample_list = sample_list[sample_list["BLDNG_COUNTRY_NAME"].isin(["ENGLAND", "WALES"])]
# Merge on the UPRN
sample_list = sample_list.merge(
asset_data[["BLDNG_ID", "BLNDG_GOVERMENT_UPRN"]].drop_duplicates(),
how="left", on="BLDNG_ID"
)
sample_list["BLNDG_GOVERMENT_UPRN"] = sample_list["BLNDG_GOVERMENT_UPRN"].astype("Int64")
# Use the EPC API to get corrected postcodes
model_asset_list = []
missed = []
for _, x in tqdm(sample_list.iterrows(), total=len(sample_list)):
if pd.isnull(x["BLNDG_GOVERMENT_UPRN"]):
continue
searcher = SearchEpc(
address1="",
postcode="",
uprn=x["BLNDG_GOVERMENT_UPRN"],
auth_token=EPC_AUTH_TOKEN,
os_api_key=""
)
searcher.find_property(skip_os=True)
newest_epc = searcher.newest_epc
if newest_epc is None:
missed.append(x["BLNDG_GOVERMENT_UPRN"])
continue
model_asset_list.append(newest_epc)
model_asset_list = pd.DataFrame(model_asset_list)
model_asset_list["uprn"] = model_asset_list["uprn"].astype(int)
spatial_data = OpenUprnClient.get_spatial_data(
uprns=model_asset_list["uprn"].tolist(), bucket_name="retrofit-data-dev"
)
# We determine if the building is listed, heritage or in a conservation area
# Merge on the property features
features = asset_data.drop(
columns=["BUILDING_SYSTEM_ITEM_NAME", "OBSERVED_CONDITION_DESCRIPTION"]
).drop_duplicates()
df = features.merge(
model_asset_list, how="inner", right_on="uprn", left_on="BLNDG_GOVERMENT_UPRN"
).merge(
pd.DataFrame(spatial_data).rename(columns={"UPRN": "uprn"}), how="left", on="uprn"
)
# Store data locally
# df.to_csv(folder_path + "/MOD property data.csv", index=False)
# Produce as asset list for analysis
df["row_id"] = df.index
epc_data, errors, no_epc = get_data(
df=df,
manual_uprn_map={},
epc_auth_token=EPC_AUTH_TOKEN,
uprn_column="uprn",
fulladdress_column="address",
address1_column="address1",
postcode_column="postcode",
property_type_column=None,
built_form_column=None,
epc_api_only=False,
row_id_name="row_id",
)
non_invasive_recommendations = []
for x in epc_data:
non_invasive_recommendations.append(
{
"uprn": x["uprn"],
"recommendations": x["find_my_epc_data"]["recommendations"]
}
)
# also include the floor area
asset_list = df[
["uprn", "address1", "postcode", "NUMBER_OF_BEDROOMS", "BLDNG_STOREYS_QTY", "BLDNG_MSRMNT_VAL"]
].rename(
columns={
"address1": "address",
"NUMBER_OF_BEDROOMS": "n_bedrooms",
"BLDNG_STOREYS_QTY": "number_of_floors",
"BLDNG_MSRMNT_VAL": "floor_area"
}
)
filename = f"{USER_ID}/{PORTFOLIO_ID}/asset_list.csv"
save_csv_to_s3(
dataframe=asset_list,
bucket_name="retrofit-plan-inputs-dev",
file_name=filename
)
# Store the non-invasive recommendations in s3
non_invasive_recommendations_filename = f"{USER_ID}/{PORTFOLIO_ID}/non_invasive_recommendations.csv"
save_csv_to_s3(
dataframe=pd.DataFrame(non_invasive_recommendations),
bucket_name="retrofit-plan-inputs-dev",
file_name=non_invasive_recommendations_filename
)
# Scenario 1 - EPC C
body = {
"portfolio_id": str(PORTFOLIO_ID),
"housing_type": "Private",
"goal": "Increasing EPC",
"goal_value": "C",
"trigger_file_path": filename,
"already_installed_file_path": "",
"patches_file_path": "",
"non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
"valuation_file_path": "",
"scenario_name": "Hit EPC C",
"multi_plan": True,
"budget": None,
# "inclusions": [
# "cavity_wall_insulation",
# "loft_insulation",
# "windows",
# "solar_pv",
# "air_source_heat_pump"
# ]
}
print(body)
# Scenario 2 - EPC B
body = {
"portfolio_id": str(PORTFOLIO_ID),
"housing_type": "Private",
"goal": "Increasing EPC",
"goal_value": "B",
"trigger_file_path": filename,
"already_installed_file_path": "",
"patches_file_path": "",
"non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
"valuation_file_path": "",
"scenario_name": "Hit EPC B",
"multi_plan": True,
"budget": None,
# "inclusions": [
# "cavity_wall_insulation",
# "loft_insulation",
# "windows",
# "solar_pv",
# "air_source_heat_pump"
# ]
}
print(body)
# Scenario 3 - EPC B, 3.5 COP ASHP
body = {
"portfolio_id": str(PORTFOLIO_ID),
"housing_type": "Private",
"goal": "Increasing EPC",
"goal_value": "B",
"trigger_file_path": filename,
"already_installed_file_path": "",
"patches_file_path": "",
"non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
"valuation_file_path": "",
"scenario_name": "Hit EPC B - 3.5 COP ASHP",
"multi_plan": True,
"budget": None,
"ashp_cop": 3.5
# "inclusions": [
# "cavity_wall_insulation",
# "loft_insulation",
# "windows",
# "solar_pv",
# "air_source_heat_pump"
# ]
}
print(body)

View file

@ -0,0 +1,652 @@
from pprint import pprint
import pandas as pd
import numpy as np
from backend.app.utils import sap_to_epc
from sqlalchemy.orm import sessionmaker
from backend.app.db.connection import db_engine
from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations
from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
def get_data(portfolio_id, scenario_ids):
session = sessionmaker(bind=db_engine)()
session.begin()
# Get properties and their details for a specific portfolio
properties_query = session.query(
PropertyModel,
PropertyDetailsEpcModel
).join(
PropertyDetailsEpcModel, PropertyModel.id == PropertyDetailsEpcModel.property_id
).filter(
PropertyModel.portfolio_id == portfolio_id # Filter by portfolio ID
).all()
# Transform properties data to include all fields dynamically
properties_data = [
{**{col.name: getattr(prop.PropertyModel, col.name) for col in PropertyModel.__table__.columns},
**{col.name: getattr(prop.PropertyDetailsEpcModel, col.name) for col in
PropertyDetailsEpcModel.__table__.columns}}
for prop in properties_query
]
# Get property IDs from fetched properties
# Get plans linked to the fetched properties
plans_query = session.query(Plan).filter(Plan.scenario_id.in_(scenario_ids)).all()
# Transform plans data to include all fields dynamically
plans_data = [
{col.name: getattr(plan, col.name) for col in Plan.__table__.columns}
for plan in plans_query
]
# Extract plan IDs for filtering recommendations through PlanRecommendations
plan_ids = [plan['id'] for plan in plans_data]
# Get recommendations through PlanRecommendations for those plans and that are default
recommendations_query = session.query(
Recommendation,
Plan.scenario_id
).join(
PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id
).join(
Plan, Plan.id == PlanRecommendations.plan_id # Join with Plan to access scenario_id
).filter(
PlanRecommendations.plan_id.in_(plan_ids),
Recommendation.default == True # Filtering for default recommendations
).all()
# Transform recommendations data to include all fields dynamically and include scenario_id
recommendations_data = [
{**{col.name: getattr(rec.Recommendation, col.name) if hasattr(rec, 'Recommendation')
else getattr(rec, col.name) for
col in Recommendation.__table__.columns},
"Scenario ID": rec.scenario_id}
for rec in recommendations_query
]
session.close()
return properties_data, plans_data, recommendations_data
def app():
"""
Given a portfolio and a scenario, this function prepares an excel model to present the data
"""
# Set the inputs:
portfolio_id = 139
scenario_ids = [237, 238]
properties_data, plans_data, recommendations_data = get_data(
portfolio_id=portfolio_id, scenario_ids=scenario_ids
)
properties_df = pd.DataFrame(properties_data)
plans_df = pd.DataFrame(plans_data)
recommendations_df = pd.DataFrame(recommendations_data)
# Merge on the orignal data
mod_property_data = pd.read_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MOD/Pilot Programme/MOD property data.csv"
)
property_asset_data = properties_df.merge(
mod_property_data.drop(columns=["address", "postcode", "tenure"]), how="left", on="uprn"
)
property_asset_data["is_pitched"] = property_asset_data["roof"].str.contains("pitched", case=False)
property_asset_data["pre_1970"] = property_asset_data["BUILD_YEAR"] < 1970
property_asset_data["wall_type"] = property_asset_data["walls"].str.split(" ").str[0].str.strip()
property_asset_data["is_insulated"] = (
property_asset_data["walls"].str.split(",").str[1].str.strip().isin(
["filled cavity", "with external insulation", "filled cavity and external insulation"]
) | property_asset_data["walls"].str.split(",").str[2].str.strip().isin(["insulated"])
)
property_asset_data["is_insulated"] = np.where(
property_asset_data["is_insulated"], "Insulated", "Uninsulated"
)
property_asset_data["is_pitched"] = np.where(
property_asset_data["is_pitched"], "Pitched roof", "Not Pitched Roof"
)
property_asset_data["pre_1970"] = np.where(
property_asset_data["pre_1970"], "Pre 1970", "Post 1970"
)
archetype_variables = ["property_type", "wall_type", "is_insulated", "is_pitched", "pre_1970"]
assigned_archetypes = (
property_asset_data.groupby(
archetype_variables
).size().reset_index().rename(columns={0: "n_properties"}).sort_values("n_properties", ascending=False)
)
# Make the archetype ID a concatenation of the variables
assigned_archetypes["archetype_id"] = assigned_archetypes[archetype_variables].apply(
lambda x: "_".join(x.astype(str)), axis=1
)
# Most prominent archetypes
prominent_archetypes = assigned_archetypes.head(6)
other_archetypes = assigned_archetypes.tail(-6)
# 2 or fewer properties in the other archetypes
property_asset_data = property_asset_data.merge(
assigned_archetypes[archetype_variables + ["archetype_id"]],
how="left",
on=archetype_variables
)
# Create age bands:
# 1960-1969
# 1970-1979
# 1980-1989
# 1990-1999
# 2000+
property_asset_data["age_band"] = pd.cut(
property_asset_data["BUILD_YEAR"],
bins=[1959, 1969, 1979, 1989, 1999, 2022],
labels=["1960-1969", "1970-1979", "1980-1989", "1990-1999", "2000+"]
)
# Create floor area bands
# 0-73
# 74-97
# 98-199
# 200+
property_asset_data["floor_area_band"] = pd.cut(
property_asset_data["total_floor_area"],
bins=[0, 73, 97, 199, 10000],
labels=["0-73", "74-97", "98-199", "200+"]
)
property_asset_data["archetype_group"] = property_asset_data["archetype_id"].copy()
property_asset_data["archetype_group"] = np.where(
property_asset_data["archetype_id"].isin(other_archetypes["archetype_id"].values),
"other",
property_asset_data["archetype_group"]
)
# For colour
wall_types = (
property_asset_data[["wall_type"]].value_counts().to_frame().reset_index().rename(
columns={"wall_type": "Wall Type"}
)
)
# Group into age bands
ages = (
property_asset_data[["age_band"]].value_counts()
.to_frame()
.reset_index().sort_values("age_band", ascending=True)
.rename(columns={"age_band": "Age Band"})
)
floor_area_bands = (
property_asset_data[["floor_area_band"]].value_counts()
.to_frame()
.reset_index().sort_values("floor_area_band", ascending=True)
.rename(columns={"floor_area_band": "Floor Area Band"})
)
archetype_counts = (
property_asset_data[["archetype_group"]].
value_counts().
to_frame().
reset_index()
.rename(columns={"archetype_group": "Archetype"})
)
property_types = (
(property_asset_data["property_type"] + ": " + property_asset_data["built_form"]).
value_counts().
to_frame().
reset_index()
.rename(columns={"index": "Property Type", 0: "Count"})
)
# epc breakdown
epc_breakdown = (
property_asset_data["current_epc_rating"]
.apply(lambda x: x.value)
.value_counts()
.to_frame()
.reset_index()
)
# Figures for the deck
# Carbon per property
totals = property_asset_data[
[
"Total_household_members",
"co2_emissions", "current_energy_demand", "current_energy_demand_heating_hotwater",
"heating_cost_current", "hot_water_cost_current", "lighting_cost_current",
"appliances_cost_current", "gas_standing_charge", "electricity_standing_charge"
]
].copy()
totals["total_cost"] = (
totals["heating_cost_current"] +
totals["hot_water_cost_current"] +
totals["lighting_cost_current"] +
totals["appliances_cost_current"] +
totals["gas_standing_charge"] +
totals["electricity_standing_charge"]
)
print(
totals[
[
"Total_household_members",
"co2_emissions",
"current_energy_demand",
"total_cost",
]
].mean()
)
# Store these to an excel
# with pd.ExcelWriter(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MOD/Pilot Programme/MOD archetype breakdowns.xlsx"
# ) as writer:
# wall_types.to_excel(writer, sheet_name="Wall Types", index=False)
# ages.to_excel(writer, sheet_name="Ages", index=False)
# floor_area_bands.to_excel(writer, sheet_name="Floor Area Bands", index=False)
# archetype_counts.to_excel(writer, sheet_name="Archetype Counts", index=False)
# epc_breakdown.to_excel(writer, sheet_name="EPC Rating", index=False)
contingency = 0.26
# We prepare the outputs, by scenario
scenario_data = {}
for scenario in scenario_ids:
scenario_recommendations_df = recommendations_df[
recommendations_df["Scenario ID"] == scenario
].copy()
scenario_recommendations_df["contingency"] = contingency * scenario_recommendations_df["estimated_cost"]
scenario_recommendations_df["total_cost"] = (
scenario_recommendations_df["estimated_cost"] + scenario_recommendations_df["contingency"]
)
recommended_measures_df = scenario_recommendations_df[
["property_id", "measure_type", "estimated_cost", "default"]
]
recommended_measures_df = recommended_measures_df[recommended_measures_df["default"]]
recommended_measures_df = recommended_measures_df.drop(columns=["default"])
# Metrics by property ID
aggregated_metrics = scenario_recommendations_df[
[
"property_id", "type", "default", "sap_points",
"energy_cost_savings", "kwh_savings", "co2_equivalent_savings", "estimated_cost", "contingency",
"total_cost"
]
]
aggregated_metrics = aggregated_metrics[aggregated_metrics["default"]]
aggregated_metrics = aggregated_metrics.groupby("property_id")[
["sap_points", "co2_equivalent_savings", "energy_cost_savings", "kwh_savings", "estimated_cost",
"total_cost", "contingency"]
].sum().reset_index()
recommendations_measures_pivot = recommended_measures_df.pivot(
index='property_id',
columns='measure_type',
values='estimated_cost'
)
recommendations_measures_pivot = recommendations_measures_pivot.reset_index()
recommendations_measures_pivot = recommendations_measures_pivot.fillna(0)
# We flag with boolean if the measure is recommended
for c in recommendations_measures_pivot.columns:
if c == "property_id":
continue
recommendations_measures_pivot["Recommendation: " + c] = recommendations_measures_pivot[c] > 0
# We now create a final output
df = properties_df[
[
"property_id", "uprn", "address", "postcode", "property_type", "walls", "roof", "heating", "windows",
"current_epc_rating", "current_sap_points", "total_floor_area", "number_of_rooms",
"co2_emissions", "current_energy_demand", "current_energy_demand_heating_hotwater",
"heating_cost_current", "hot_water_cost_current", "lighting_cost_current",
"appliances_cost_current", "gas_standing_charge", "electricity_standing_charge"
]
].merge(
recommendations_measures_pivot, how="left", on="property_id"
).merge(
aggregated_metrics, how="left", on="property_id"
)
df["bills_total_cost"] = (
df["heating_cost_current"] + df["hot_water_cost_current"] + df["lighting_cost_current"] +
df["appliances_cost_current"] + df["gas_standing_charge"] + df["electricity_standing_charge"]
)
df = df.drop(columns=["property_id"])
for c in ["sap_points", "co2_equivalent_savings", "energy_cost_savings", "kwh_savings"]:
df[c] = df[c].fillna(0)
df = df.rename(
columns={
"uprn": "UPRN",
"address": "Address",
"postcode": "Postcode",
"walls": "Walls",
"roof": "Roof",
"heating": "Heating",
"windows": "Windows",
"current_epc_rating": "Current EPC Rating",
"current_sap_points": "Current SAP Points",
"total_floor_area": "Total Floor Area",
"number_of_rooms": "Number of Habitable Rooms",
"floor_height": "Floor Height",
}
)
# Calculate post SAP
df["Predicted Post Works SAP"] = df["Current SAP Points"] + df["sap_points"]
df["Predicted Post Works SAP"] = df["Predicted Post Works SAP"].round()
df["Predicted Post Works EPC"] = df["Predicted Post Works SAP"].apply(lambda x: sap_to_epc(x))
# Calculate the relative savings on carbon, kwh, and bills
df["relative_carbon_savings"] = df["co2_equivalent_savings"] / df["co2_emissions"]
df["relative_kwh_savings"] = df["kwh_savings"] / df["current_energy_demand"]
df["relative_bill_savings"] = df["energy_cost_savings"] / df["bills_total_cost"]
# Add on the archetype
df = df.merge(
property_asset_data[["uprn", "archetype_group"]], how="left", left_on="UPRN", right_on="uprn"
)
# For properties that don't make it to EPC B, check why. E.g. for a property that has an oil boiler, it
# the bills go up recommending HHRSH, so it doesn't make it to EPC B
# For mid-terrace units, use the ordnance survey API to check if there is space for a heat pump?
# DO it manually???
# Doesn't make it
# misses = df[df["Predicted Post Works EPC"] == "C"]
# # 5 of them are flats and so are difficult to get to EPC B without renewables. Possibly not worth it from an
# # ROI perspective
#
# misses[["UPRN", "Address", "Postcode", "property_type"]]
# UPRN Address Postcode property_type
# 2 100120988937 13 Sidbury Circular Road SP9 7HX Flat No further action
# 3 100120988998 74 Sidbury Circular Road SP9 7JA Flat No further action
# 4 100120989416 47 Zouch Avenue SP9 7LR Flat No further action
# 6 100060585002 42, Muscott Close, Shipton Bellinger SP9 7TX House Can probably take a heat pump
# 37 10000801072 34 Luffenham Place, Chicksands SG17 5XH House Already surveyed as having
# an ASHP - should be looked at
# 121 100120988259 8, Karachi Close SP9 7LW Flat
# 122 100121101217 599, Pepper Place BA12 0DW Flat
# 140 100021455241 33 Blenheim Crescent, Ruislip HA4 7HA House - Solar isnt recommended
# due to bug
# 149 100120915656 10 Bower Green, Shrivenham SN6 8TU House - Solar isn't recommended
# due to bug
scenario_data[scenario] = df
printing_scenario_id = scenario_ids[0]
# EPC breakdown
print(scenario_data[printing_scenario_id]['Predicted Post Works EPC'].value_counts())
# Cost
# Total cost
print(scenario_data[printing_scenario_id]["total_cost"].sum())
# Base cost
print(scenario_data[printing_scenario_id]["estimated_cost"].sum())
# Contingency
print(scenario_data[printing_scenario_id]["contingency"].sum())
# Costs averaged per unit
print(scenario_data[printing_scenario_id]["total_cost"].mean())
print(scenario_data[printing_scenario_id]["estimated_cost"].mean())
print(scenario_data[printing_scenario_id]["contingency"].mean())
# Average relative savings
print(scenario_data[printing_scenario_id]["relative_carbon_savings"].mean())
print(scenario_data[printing_scenario_id]["relative_kwh_savings"].mean())
print(scenario_data[printing_scenario_id]["relative_bill_savings"].mean())
measure_details = {}
for scenario in scenario_ids:
measure_details[scenario] = {}
recommendation_cols = [c for c in scenario_data[scenario].columns if "Recommendation:" in c]
measure_details[scenario]["count"] = scenario_data[scenario][recommendation_cols].sum().to_dict()
# Get average cost per measure
measure_columns = [
c.split("Recommendation: ")[1] for c in scenario_data[scenario].columns if "Recommendation:" in c
]
# Take the mean, drop zero columns
measure_costs = {}
for m in measure_columns:
measure_costs[m] = float(scenario_data[scenario][scenario_data[scenario][m] > 0][m].mean())
measure_details[scenario]["cost_per_measure"] = measure_costs
pprint(measure_details[scenario_ids[0]]["count"])
pprint(measure_details[scenario_ids[1]]["count"])
# Cost per measures
pprint(measure_details[scenario_ids[0]]["cost_per_measure"])
pprint(measure_details[scenario_ids[1]]["cost_per_measure"])
# Do not get to EPC B:
# 5 are flats
# 1) 34 Luffenham Place, Chicksands SG17 5XH, has been surveyed as having a low performing heat pump -
# should be looked at but several surrounding properties have been surveyed in a similar fashion
# 2) 42, Muscott Close, Shipton Bellinger SP9 7TX, has an oil boiler and the bills go up recommending HHRSH.
# we could non-intrusively recommend a heat pump.
# 3) 33 Blenheim Crescent, Ruislip, HA4 7HA, 100021455241 Solar potential modelling returned nothing -
# manual review indicates that there are multiple trees surrouding the south facing side of the property
# 4) 10 Bower Green, Shrivenham, SN6 8TU - Solar isn't recommended without further survey due to the local
# area being surrounded by trees
# Scenario adjustments:
# Exclude: boiler_upgrade
# Make ASHP COP 3.5
# Metrics we need by scenario:
# Cost
# contingency
# Carbon
# kwh
# bill savings
scenario_metrics = {}
for scenario in scenario_ids:
df = scenario_data[scenario].copy()
avg_savings = df[
["sap_points", "co2_equivalent_savings", "energy_cost_savings", "kwh_savings", "estimated_cost",
"total_cost", "contingency"]
].mean().to_dict()
avg_savings["cost_per_sap_point"] = avg_savings["total_cost"] / avg_savings["sap_points"]
avg_savings["cost_per_carbon"] = avg_savings["total_cost"] / avg_savings["co2_equivalent_savings"]
scenario_metrics[scenario] = avg_savings
pprint(scenario_metrics[scenario_ids[0]])
pprint(scenario_metrics[scenario_ids[1]])
scenario_data[scenario_ids[0]]["loft_insulation"][
scenario_data[scenario_ids[0]]["loft_insulation"] > 0
].mean()
scenario_data[scenario_ids[0]]["cavity_wall_insulation"][
scenario_data[scenario_ids[0]]["cavity_wall_insulation"] > 0
].mean()
# Testing checking floor risk
import requests
def get_flood_risk(lat, lon, radius_km=1):
url = "https://environment.data.gov.uk/flood-monitoring/id/floods"
params = {
'lat': lat,
'long': lon,
'dist': radius_km # search radius in km
}
response = requests.get(url, params=params)
response.raise_for_status()
data = response.json()
flood_warnings = data.get("items", [])
if not flood_warnings:
print("No active flood warnings near this location.")
else:
print(f"{len(flood_warnings)} warning(s) found near the location:")
for warning in flood_warnings:
print(f"- Area: {warning.get('description')}")
print(f" Severity: {warning.get('severity')} (Level {warning.get('severityLevel')})")
print(f" Message changed at: {warning.get('timeMessageChanged')}")
print()
return flood_warnings
from shapely.geometry import shape, Point
def get_flood_areas_near_point(lat, lon, radius_km=2):
url = "https://environment.data.gov.uk/flood-monitoring/id/floodAreas"
params = {
'lat': lat,
'long': lon,
'dist': radius_km
}
response = requests.get(url, params=params)
response.raise_for_status()
return response.json().get("items", [])
def point_in_flood_area(lat, lon):
flood_areas = get_flood_areas_near_point(lat, lon, radius_km=1)
point = Point(lon, lat) # GeoJSON uses (lon, lat) format
for area in flood_areas:
polygon_url = area.get("polygon")
if not polygon_url:
continue
polygon_response = requests.get(polygon_url)
polygon_response.raise_for_status()
polygon_geojson = polygon_response.json()
features = polygon_geojson.get("features", [])
if not features:
continue
flood_polygon = shape(features[0]['geometry'])
try:
is_inside = flood_polygon.contains(point)
except:
is_inside = False
if is_inside:
print(f"📍 Point is inside flood area: {area['label']} ({area['notation']})")
return area
from tqdm import tqdm
floor_warnings_data = []
for _, property in tqdm(property_asset_data.iterrows(), total=len(property_asset_data)):
# warnings = floor_warnings_data.extend(
# get_flood_risk(lat=property["LATITUDE"], lon=property["LONGITUDE"], radius_km=1)
# )
resp = point_in_flood_area(lat=property["LATITUDE"], lon=property["LONGITUDE"])
if resp:
floor_warnings_data.append(
{
"uprn": property["uprn"],
"address": property["address"],
"postcode": property["postcode"],
"area": resp
}
)
continue
import plotly.graph_objects as go
labels = [
"House_Cavity_Insulated_Pitched roof_Pre 1970",
"House_Cavity_Insulated_Pitched roof_Post 1970",
"House_Cavity_Uninsulated_Pitched roof_Pre 1970",
"House_Cavity_Uninsulated_Pitched roof_Post 1970",
"other",
"House_System_Uninsulated_Pitched roof_Pre 1970",
"House_Solid_Uninsulated_Not Pitched Roof_Pre 1970"
]
values = [62, 36, 21, 16, 16, 4, 2]
hovertext = [
"Loft insulation, draft proofing",
"Top-up loft insulation",
"Cavity wall insulation, loft insulation",
"Cavity wall insulation, ventilation",
"Bespoke retrofit measures",
"External wall insulation, roof insulation",
"Flat roof insulation, internal wall insulation"
]
fig = go.Figure(go.Treemap(
labels=labels,
parents=[""] * len(labels), # No root
values=values,
hovertext=hovertext,
hoverinfo="text",
textinfo="none",
marker=dict(
line=dict(color="white", width=4),
colors=values,
colorscale="Blues"
)
))
fig.update_layout(
margin=dict(t=10, l=10, r=10, b=10),
plot_bgcolor="white",
paper_bgcolor="white"
)
fig.show()
# Get the recommended measures by scenario id
recommendation_cols = [c for c in scenario_data[scenario_ids[1]].columns if "Recommendation:" in c]
measure_counts_by_scenario = scenario_data[scenario_ids[1]].groupby("archetype_group")[
recommendation_cols
].sum().reset_index()
measure_counts_by_scenario.to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MOD/Pilot Programme/measure_counts_by_scenario.csv"
)
# Estimate average valuation improvment by scenarios
valuation_data = pd.read_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MOD/Pilot Programme/property_valuation.csv"
)
from backend.ml_models.Valuation import PropertyValuation
uplift = []
for _, x in valuation_data.iterrows():
uprn = x["uprn"]
to_append = {"uprn": uprn}
for _id in scenario_ids:
scenario = scenario_data[_id][
scenario_data[_id]["uprn"] == uprn
].squeeze()
val = PropertyValuation.estimate_valuation_improvement(
current_value=x["valuation"],
current_epc=scenario["Current EPC Rating"].value,
target_epc=scenario["Predicted Post Works EPC"],
total_cost=None
)
to_append[_id] = val["average_increase"]
uplift.append(to_append)
uplift = pd.DataFrame(uplift)
print(uplift[scenario_ids[0]].mean())
# £8,161
print(uplift[scenario_ids[1]].mean())
# £16,938

View file

@ -0,0 +1,76 @@
import pandas as pd
# Get the wave 2 costing data and produce some breakdowns
costs = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MOD/Pilot Programme/Measure cost study for MOD.xlsx",
header=2
)
# Get the EPC data for these
# Cavity
cwi_costs = costs[
['Model', 'Total invoiced (including VAT)']
].copy()
cwi_costs["Model"] = "CWI - " + cwi_costs["Model"]
cwi_costs = cwi_costs[~pd.isnull(cwi_costs["Total invoiced (including VAT)"])]
# Loft
li_costs = costs[
['Model.2', 'Total invoiced (including VAT).2']
].copy()
li_costs["Model.2"] = "LI - " + li_costs["Model.2"]
li_costs = li_costs[~pd.isnull(li_costs["Total invoiced (including VAT).2"])]
# Rename
li_costs.columns = ["Model", "Total invoiced (including VAT)"]
# Windows
windows_costs = costs[
['Model.3', 'Total invoiced (including VAT).3']
].copy()
windows_costs["Model.3"] = "Windows - " + windows_costs["Model.3"]
windows_costs = windows_costs[~pd.isnull(windows_costs["Total invoiced (including VAT).3"])]
# Rename
windows_costs.columns = ["Model", "Total invoiced (including VAT)"]
# Doors
doors_costs = costs[
['Model.4', 'Total invoiced (including VAT).4']
].copy()
doors_costs["Model.4"] = "Doors - " + doors_costs["Model.4"]
doors_costs = doors_costs[~pd.isnull(doors_costs["Total invoiced (including VAT).4"])]
# Rename
doors_costs.columns = ["Model", "Total invoiced (including VAT)"]
# ASHP
ashps_costs = costs[
['Model.5', 'Total invoiced (including VAT).5']
].copy()
ashps_costs["Model.5"] = "ASHP - " + ashps_costs["Model.5"]
ashps_costs = ashps_costs[~pd.isnull(ashps_costs["Total invoiced (including VAT).5"])]
# Rename
ashps_costs.columns = ["Model", "Total invoiced (including VAT)"]
# Solar
solar_costs = costs[
['Model.6', 'Total invoiced (including VAT).6']
].copy()
solar_costs["Model.6"] = "Solar - " + solar_costs["Model.6"]
solar_costs = solar_costs[~pd.isnull(solar_costs["Total invoiced (including VAT).6"])]
# Rename
solar_costs.columns = ["Model", "Total invoiced (including VAT)"]
fabric_costing_data = pd.concat([cwi_costs, li_costs])
windows_doors_costing_data = pd.concat([windows_costs, doors_costs])
windows_doors_costing_data.to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MOD/Pilot Programme/windows_doors_costs.csv"
)
fabric_costing_data.to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MOD/Pilot Programme/fabric_costing_data.csv"
)
ashps_costs.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/MOD/Pilot Programme/ashps_costs.csv")
solar_costs.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/MOD/Pilot Programme/solar_costs.csv")
project_cost_by_age = costs[["Property age ", "TOTAL Cost of Works"]].groupby("Property age ").mean().reset_index()

View file

@ -0,0 +1,61 @@
import os
import pandas as pd
from dotenv import load_dotenv
from etl.spatial.OpenUprnClient import OpenUprnClient
from etl.route_march_data_pull.app import get_data
load_dotenv(dotenv_path="backend/.env")
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
addresses = [
{"address": "3 Willis Road", "postcode": "CB1 2AQ"},
{"address": "22 Catharine Street", "postcode": "CB1 3AW"},
{"address": "332 Mill Road", "postcode": "CB1 3NN"},
{"address": "330 Mill Road", "postcode": "CB1 3NN"},
{"address": "328 Mill Road", "postcode": "CB1 3NN"},
{"address": "71 Mill Road", "postcode": "CB1 2AS"},
{"address": "78 Argyle Street", "postcode": "CB1 3LZ"},
{"address": "9 Graham Road", "postcode": "CB4 2ZE"},
{"address": "217 Mill Road", "postcode": "CB1 3BE"},
{"address": "374 Mill Road", "postcode": "CB1 3NN"},
{"address": "174 Thoday Street", "postcode": "CB1 3AX"},
{"address": "37 Abbey Road", "postcode": "CB5 8HH"},
{"address": "18 Upper Gwydir Street", "postcode": "CB1 2LR"},
{"address": "21 Fulbourn Road Fulbourn", "postcode": "CB1 9JL"},
{"address": "108 Argyle Street", "postcode": "CB1 3LS"},
{"address": "115 Victoria Road", "postcode": "CB4 3BS"},
{"address": "55 Ross Street", "postcode": "CB1 3BP"},
{"address": "16 Kingston Street", "postcode": "CB1 2NU"},
{"address": "13 Thoday Street", "postcode": "CB1 3AS"},
{"address": "103 York Street", "postcode": "CB1 2PZ"},
]
asset_list = pd.DataFrame(addresses)
asset_list["row_id"] = asset_list.index
epc_data, _, _ = get_data(
asset_list=asset_list, fulladdress_column="address", postcode_column="postcode", address1_column="address",
manual_uprn_map={}, epc_api_only=True
)
epc_df = pd.DataFrame(epc_data)
epc_df.shape
asset_list = asset_list.merge(
epc_df, how="left", on="row_id"
)
asset_list = asset_list.rename(columns={"address_x": "Address", "postcode_x": "Postcode"})
asset_list["uprn"] = asset_list["uprn"].astype(str)
spatial_data = OpenUprnClient.get_spatial_data([x["uprn"] for x in epc_data], bucket_name="retrofit-data-dev")
spatial_data["UPRN"] = spatial_data["UPRN"].astype(str)
asset_list = asset_list.merge(
spatial_data, how="left", left_on="uprn", right_on="UPRN"
)
asset_list.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Panacap/Acquisitions EPC Data.csv",
index=False)

View file

@ -1,9 +1,15 @@
import os
import pandas as pd
from dotenv import load_dotenv
from utils.s3 import save_csv_to_s3
from etl.find_my_epc.AssetListEpcData import AssetListEpcData
PORTFOLIO_ID = 111
PORTFOLIO_ID = 141
USER_ID = 8
load_dotenv(dotenv_path="backend/.env")
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
def app():
"""
@ -13,10 +19,21 @@ def app():
asset_list = [
{
"uprn": 100050770761,
"address": "12 Sheardown Street",
"postcode": "DN4 0BH"
}
"address": "196 Merrow Street",
"postcode": "SE17 2NP",
"uprn": 200003423454,
"patch": True
},
{
"address": "65 Liverpool Grove",
"postcode": "SE17 2HP",
"uprn": 200003423194
},
{
"address": "2 Brettell Street",
"postcode": "SE17 2NZ",
"uprn": 200003423607
},
]
asset_list = pd.DataFrame(asset_list)
@ -28,30 +45,46 @@ def app():
file_name=filename
)
non_invasive_recommendations = [
{
"uprn": 100050770761,
"recommendations": [
{
"type": "extension_cavity_wall_insulation",
"sap_points": 2,
}
]
}
]
# Pull the non-invasive recommendations automatically
asset_list_epc_client = AssetListEpcData(
asset_list=asset_list,
epc_auth_token=EPC_AUTH_TOKEN
)
asset_list_epc_client.get_data()
asset_list_epc_client.get_non_invasive_recommendations()
asset_list_epc_client.get_patch()
# Store non-invasive recommendations in S3
non_invasive_recommendations_filename = f"{USER_ID}/{PORTFOLIO_ID}/non_invasive_recommendations.csv"
save_csv_to_s3(
dataframe=pd.DataFrame(non_invasive_recommendations),
dataframe=pd.DataFrame(asset_list_epc_client.non_invasive_recommendations),
bucket_name="retrofit-plan-inputs-dev",
file_name=non_invasive_recommendations_filename
)
# Store patches in S3
patches_filename = ""
if asset_list_epc_client.patches:
patches_filename = f"{USER_ID}/{PORTFOLIO_ID}/patches.csv"
save_csv_to_s3(
dataframe=pd.DataFrame(asset_list_epc_client.patches),
bucket_name="retrofit-plan-inputs-dev",
file_name=patches_filename
)
valuation_data = [
{
"uprn": 100050770761,
"value": 67_000
}
"valuation": 339_000,
"uprn": 200003423454,
},
{
"valuation": 374_000,
"uprn": 200003423194
},
{
"valuation": 719_000,
"uprn": 200003423607
},
]
# Store valuation data to s3
valuation_filename = f"{USER_ID}/{PORTFOLIO_ID}/valuation.csv"
@ -68,7 +101,7 @@ def app():
"goal_value": "C",
"trigger_file_path": filename,
"already_installed_file_path": "",
"patches_file_path": "",
"patches_file_path": patches_filename,
"non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
"valuation_file_path": valuation_filename,
"scenario_name": "Full package remote assessment",

View file

@ -0,0 +1,226 @@
import os
import time
import pandas as pd
from tqdm import tqdm
from dotenv import load_dotenv
from utils.s3 import read_excel_from_s3
from backend.SearchEpc import SearchEpc
from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
from recommendations.recommendation_utils import (
estimate_perimeter,
estimate_external_wall_area,
estimate_number_of_floors
)
load_dotenv(dotenv_path="backend/.env")
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
def get_data(asset_list):
epc_data = []
errors = []
for _, home in tqdm(asset_list.iterrows(), total=len(asset_list)):
try:
postcode = home["Postcode"]
house_number = home["AddressLine1"]
full_address = ", ".join([home["AddressLine1"], home["AddressLine4"], home["AddressLine5"]])
searcher = SearchEpc(
address1=str(house_number),
postcode=postcode,
auth_token=EPC_AUTH_TOKEN,
os_api_key="",
property_type=None,
fast=True,
full_address=full_address,
max_retries=5
)
# Force the skipping of estimating the EPC
searcher.ordnance_survey_client.property_type = None
searcher.ordnance_survey_client.built_form = None
searcher.find_property(skip_os=True)
if searcher.newest_epc is None:
continue
# Look for EPC recommendatons
try:
property_recommendations = searcher.client.domestic.recommendations(searcher.newest_epc["lmk-key"])
except:
property_recommendations = {"rows": []}
epc = {
"row_id": home["row_id"],
**searcher.newest_epc.copy(),
"recommendations": property_recommendations["rows"]
}
epc_data.append(epc)
except Exception as e:
errors.append(home["row_id"])
time.sleep(5)
return epc_data, errors
def app():
"""
This app is EPC pulling data for some properties owned by Livewest
Data request contents:
Date of last EPC
Reason for EPC
SAP score on register
Property Type
Property Area
Property Age
Any Dimensions (HLP,PW,RH)
Property Wall Construction
Heating Type
Secondary Heating
Loft Insulation Depth
Additional if possible:
Heat loss calculations
EPC recommendations
Property UPRN
"""
asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Settle/SETTLE FULL PROPOSED PROGRAMME.xlsx",
header=0
)
asset_list["row_id"] = asset_list.index
epc_data, errors = get_data(asset_list)
# We now retrieve any failed properties
asset_list_failed = asset_list[asset_list["row_id"].isin(errors)]
epc_data_failed, _ = get_data(asset_list_failed)
# Append the failed data to the main data
epc_data.extend(epc_data_failed)
epc_df = pd.DataFrame(epc_data)
# We expand out the recommendations
recommendations_df = epc_df[["row_id", "recommendations"]]
unique_recommendations = set()
for _, row in recommendations_df.iterrows():
unique_recommendations.update([rec["improvement-summary-text"] for rec in row["recommendations"]])
columns = ["row_id"] + list(unique_recommendations)
transformed_data = []
for _, row in recommendations_df.iterrows():
# Initialize a dictionary for this row with False for all recommendations
row_data = {col: False for col in columns}
row_data["row_id"] = row["row_id"]
# Set True for each recommendation present in this row
for rec in row["recommendations"]:
recommendation_text = rec["improvement-summary-text"]
row_data[recommendation_text] = True
# Append the row data to transformed_data
transformed_data.append(row_data)
transformed_df = pd.DataFrame(transformed_data)
# Drop the column that is ""
transformed_df = transformed_df.drop(columns=[""])
# Retrieve just the data we need
epc_df = epc_df[
[
"row_id",
"uprn",
"property-type",
"built-form",
"inspection-date",
"current-energy-rating",
"current-energy-efficiency",
"roof-description",
"walls-description",
"transaction-type",
# New fields needed
"secondheat-description",
"total-floor-area",
"construction-age-band",
"floor-height",
"number-habitable-rooms",
"mainheat-description",
#
"energy-consumption-current", # kwh/m2
]
]
asset_list = asset_list.merge(
epc_df,
how="left",
on="row_id"
).merge(
transformed_df,
how="left",
on="row_id"
)
asset_list = asset_list.drop(columns=["row_id"])
# Rename the columns
asset_list = asset_list.rename(columns={
"inspection-date": "Date of last EPC",
"current-energy-efficiency": "SAP score on register",
"current-energy-rating": "EPC rating on register",
"property-type": "Property Type",
"built-form": "Archetype",
"total-floor-area": "Property Floor Area",
"construction-age-band": "Property Age Band",
"floor-height": "Property Floor Height",
"number-habitable-rooms": "Number of Habitable Rooms",
"walls-description": "Wall Construction",
"roof-description": "Roof Construction",
"mainheat-description": "Heating Type",
"secondheat-description": "Secondary Heating",
"transaction-type": "Reason for last EPC",
"energy-consumption-current": "Heat Demand (kWh/m2)"
})
asset_list["Estimated Number of Floors"] = asset_list.apply(
lambda x: estimate_number_of_floors(property_type=x["Property Type"]) if not pd.isnull(
x["Property Type"]) else None, axis=1
)
asset_list["Property Floor Area"] = asset_list["Property Floor Area"].astype(float)
# Replace "" value with None
asset_list["Number of Habitable Rooms"] = asset_list["Number of Habitable Rooms"].replace("", None)
asset_list["Number of Habitable Rooms"] = asset_list["Number of Habitable Rooms"].astype(float)
asset_list["Estimated Perimeter (m)"] = asset_list.apply(
lambda x: estimate_perimeter(
floor_area=x["Property Floor Area"] / x["Estimated Number of Floors"],
num_rooms=x["Number of Habitable Rooms"] / x["Estimated Number of Floors"],
), axis=1
)
asset_list["Estimated Heat Loss Perimeter (m2)"] = asset_list.apply(
lambda x: estimate_external_wall_area(
num_floors=x["Estimated Number of Floors"],
floor_height=float(x["Property Floor Height"]) if x["Property Floor Height"] else 2.5,
perimeter=x["Estimated Perimeter (m)"],
built_form=x["Archetype"]
),
axis=1
)
asset_list["Roof Insulation Thickness"] = asset_list.apply(
lambda x: RoofAttributes(description=x["Roof Construction"]).process()["insulation_thickness"] if not pd.isnull(
x["Roof Construction"]) else None,
axis=1
)
# Store as an excel
filename = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Settle/Settle EPC Data pull - 08 Nov 2024.xlsx"
asset_list.to_excel(filename, index=False)

View file

@ -0,0 +1,231 @@
import os
import time
import pandas as pd
from tqdm import tqdm
from dotenv import load_dotenv
from utils.s3 import read_excel_from_s3
from backend.SearchEpc import SearchEpc
from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
from recommendations.recommendation_utils import (
estimate_perimeter,
estimate_external_wall_area,
estimate_number_of_floors
)
load_dotenv(dotenv_path="backend/.env")
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
def get_data(asset_list):
epc_data = []
errors = []
for _, home in tqdm(asset_list.iterrows(), total=len(asset_list)):
try:
postcode = home["Postcode"]
address1 = home["address1"].split(",")[0]
full_address = home["Address"]
searcher = SearchEpc(
address1=str(address1),
postcode=postcode,
auth_token=EPC_AUTH_TOKEN,
os_api_key="",
property_type=None,
fast=True,
full_address=full_address,
max_retries=5
)
# Force the skipping of estimating the EPC
searcher.ordnance_survey_client.property_type = None
searcher.ordnance_survey_client.built_form = None
searcher.find_property(skip_os=True)
if searcher.newest_epc is None:
continue
# Look for EPC recommendatons
try:
property_recommendations = searcher.client.domestic.recommendations(searcher.newest_epc["lmk-key"])
except:
property_recommendations = {"rows": []}
epc = {
"row_id": home["row_id"],
**searcher.newest_epc.copy(),
"recommendations": property_recommendations["rows"]
}
epc_data.append(epc)
except Exception as e:
errors.append(home["row_id"])
time.sleep(5)
return epc_data, errors
def app():
"""
This app is EPC pulling data for some properties owned by Livewest
Data request contents:
Date of last EPC
Reason for EPC
SAP score on register
Property Type
Property Area
Property Age
Any Dimensions (HLP,PW,RH)
Property Wall Construction
Heating Type
Secondary Heating
Loft Insulation Depth
Additional if possible:
Heat loss calculations
EPC recommendations
Property UPRN
"""
asset_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/southend/Southend Planned programme.xlsx",
header=0,
sheet_name="Planned RM"
)
asset_list["row_id"] = asset_list.index
asset_list["address1"] = asset_list["Address"].str.split(",").str[0]
epc_data, errors = get_data(asset_list)
# We now retrieve any failed properties
asset_list_failed = asset_list[asset_list["row_id"].isin(errors)]
epc_data_failed, _ = get_data(asset_list_failed)
# Append the failed data to the main data
epc_data.extend(epc_data_failed)
epc_df = pd.DataFrame(epc_data)
# We expand out the recommendations
recommendations_df = epc_df[["row_id", "recommendations"]]
unique_recommendations = set()
for _, row in recommendations_df.iterrows():
unique_recommendations.update([rec["improvement-summary-text"] for rec in row["recommendations"]])
columns = ["row_id"] + list(unique_recommendations)
transformed_data = []
for _, row in recommendations_df.iterrows():
# Initialize a dictionary for this row with False for all recommendations
row_data = {col: False for col in columns}
row_data["row_id"] = row["row_id"]
# Set True for each recommendation present in this row
for rec in row["recommendations"]:
recommendation_text = rec["improvement-summary-text"]
row_data[recommendation_text] = True
# Append the row data to transformed_data
transformed_data.append(row_data)
transformed_df = pd.DataFrame(transformed_data)
# Drop the column that is ""
transformed_df = transformed_df.drop(columns=[""])
# Retrieve just the data we need
epc_df = epc_df[
[
"row_id",
"uprn",
"property-type",
"built-form",
"inspection-date",
"current-energy-rating",
"current-energy-efficiency",
"roof-description",
"walls-description",
"transaction-type",
# New fields needed
"secondheat-description",
"total-floor-area",
"construction-age-band",
"floor-height",
"number-habitable-rooms",
"mainheat-description",
#
"energy-consumption-current", # kwh/m2
"photo-supply",
]
]
asset_list = asset_list.merge(
epc_df,
how="left",
on="row_id"
).merge(
transformed_df,
how="left",
on="row_id"
)
asset_list = asset_list.drop(columns=["row_id"])
# Rename the columns
asset_list = asset_list.rename(columns={
"inspection-date": "Date of last EPC",
"current-energy-efficiency": "SAP score on register",
"current-energy-rating": "EPC rating on register",
"property-type": "Property Type",
"built-form": "Archetype",
"total-floor-area": "Property Floor Area",
"construction-age-band": "Property Age Band",
"floor-height": "Property Floor Height",
"number-habitable-rooms": "Number of Habitable Rooms",
"walls-description": "Wall Construction",
"roof-description": "Roof Construction",
"mainheat-description": "Heating Type",
"secondheat-description": "Secondary Heating",
"transaction-type": "Reason for last EPC",
"energy-consumption-current": "Heat Demand (kWh/m2)",
"photo-supply": "% of the Roof with PV"
})
asset_list["Estimated Number of Floors"] = asset_list.apply(
lambda x: estimate_number_of_floors(property_type=x["Property Type"]) if not pd.isnull(
x["Property Type"]) else None, axis=1
)
asset_list["Property Floor Area"] = asset_list["Property Floor Area"].astype(float)
# Replace "" value with None
asset_list["Number of Habitable Rooms"] = asset_list["Number of Habitable Rooms"].replace("", None)
asset_list["Number of Habitable Rooms"] = asset_list["Number of Habitable Rooms"].astype(float)
asset_list["Estimated Perimeter (m)"] = asset_list.apply(
lambda x: estimate_perimeter(
floor_area=x["Property Floor Area"] / x["Estimated Number of Floors"],
num_rooms=x["Number of Habitable Rooms"] / x["Estimated Number of Floors"],
), axis=1
)
asset_list["Estimated Heat Loss Perimeter (m2)"] = asset_list.apply(
lambda x: estimate_external_wall_area(
num_floors=x["Estimated Number of Floors"],
floor_height=float(x["Property Floor Height"]) if x["Property Floor Height"] else 2.5,
perimeter=x["Estimated Perimeter (m)"],
built_form=x["Archetype"]
),
axis=1
)
asset_list["Roof Insulation Thickness"] = asset_list.apply(
lambda x: RoofAttributes(description=x["Roof Construction"]).process()["insulation_thickness"] if not pd.isnull(
x["Roof Construction"]) else None,
axis=1
)
# Store as an excel
filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/southend/southend EPC Data pull - 14 Nov "
"2024.xlsx")
asset_list.to_excel(filename, index=False)

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,155 @@
import os
import shutil
from tqdm import tqdm
from etl.access_reporting.app import SharePointClient
def delete_large_files():
"""
This function deletes photos, designs and other files which we don't need
:return:
"""
folder_path = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Wave 2.1 Surveys"
# List the contents of this folder since in each sub-folder we have the property folders
contents = os.listdir(folder_path)
for subfolder in contents:
if not os.path.isdir(os.path.join(folder_path, subfolder)):
continue
subfolder_path = os.path.join(folder_path, subfolder)
# List the contents
property_folders = os.listdir(subfolder_path)
for property in tqdm(property_folders):
# Check if it's a directory
if not os.path.isdir(os.path.join(subfolder_path, property)):
continue
property_path = os.path.join(subfolder_path, property)
property_contents = os.listdir(property_path)
# We delete the contents of the following folders:
# '1. RA Property Pics'
# '4. Air Tightness Tests'
# '5. RD Design Info'
for folder_to_delete in ["1. RA Property Pics", "4. Air Tightness Tests", "5. RD Design Info",
"1. RA Property PIcs", "Post EPC Photos", "4. RD Design Info",
"5. Installer Info", "6. Trustmark lodgement", "7.Post Install Inspection Photos",
"6. Trustmark Lodgement", "7. Post Inspection Photos"]:
if folder_to_delete not in property_contents:
continue
folder_to_delete_path = os.path.join(property_path, folder_to_delete)
if os.path.isdir(folder_to_delete_path):
# Delete the folder, even if it's not empty
shutil.rmtree(folder_to_delete_path)
# We now check the '2. RA Coordinator Info' folder for any .MOV files and delete them
if "2. RA Coordinator Info" not in property_contents:
coordinator_folder = "1. RA Coordinator Info"
else:
coordinator_folder = "2. RA Coordinator Info"
coordinator_info_path = os.path.join(property_path, coordinator_folder)
coordinator_info_contents = os.listdir(coordinator_info_path)
# Look for .MOV files and .jpg files
for file in coordinator_info_contents:
if file.endswith(".MOV"):
os.remove(os.path.join(coordinator_info_path, file))
if file.endswith(".jpg"):
os.remove(os.path.join(coordinator_info_path, file))
if "Property Pics" in coordinator_info_contents:
# Delete folder and contents
shutil.rmtree(os.path.join(coordinator_info_path, "Property Pics"))
def download_data_from_sharepoint():
# Given a sharepoint location, this function will download the retrofit assessment folders from the locations
# specified in the sharepoint location
SHAREPOINT_CLIENT_ID = os.getenv("SHAREPOINT_CLIENT_ID", None)
SHAREPOINT_CLIENT_SECRET = os.getenv("SHAREPOINT_CLIENT_SECRET", None)
SHAREPOINT_TENANT_ID = os.getenv("SHAREPOINT_TENANT_ID", None)
OSMOSIS_SHAREPOINT_SITE_ID = os.getenv("OSMOSIS_SHAREPOINT_SITE_ID", None)
sharepoint_client = SharePointClient(
tenant_id=SHAREPOINT_TENANT_ID,
client_id=SHAREPOINT_CLIENT_ID,
client_secret=SHAREPOINT_CLIENT_SECRET,
site_id=OSMOSIS_SHAREPOINT_SITE_ID
)
# Retrieve the data from Sharepoint and write to local machine
contents = sharepoint_client.list_folder_contents(
drive_id=sharepoint_client.document_drive["id"],
folder_path="Osmosis ACD/Osmosis ACD Projects/Stonewater/Stonewater Property ID Folders"
)
folders_to_keep = [
"1. Herefordshire", "2. Bedfordshire", "3. Wiltshire", "4. Bournemouth",
"5. Coventry", "6. West Sussex", "7. Dorset", "8. Cambridgeshire",
"9. Guildford", "10. Little Island", "11. CCS Dorset",
]
folders_to_pull = [
folder for folder in contents["value"] if folder["name"] in folders_to_keep
]
for folder_to_pull in folders_to_pull:
# Get the contents
folder_contents = sharepoint_client.list_folder_contents(
drive_id=sharepoint_client.document_drive["id"],
folder_path="Osmosis ACD/Osmosis ACD Projects/Stonewater/Stonewater Property ID Folders" + "/" +
folder_to_pull["name"],
page_size=100
)
property_folders = [f for f in folder_contents["value"]]
for property_folder in property_folders:
# We go into each property folder and get the contents
property_folder_contents = sharepoint_client.list_folder_contents(
drive_id=sharepoint_client.document_drive["id"],
folder_path="Osmosis ACD/Osmosis ACD Projects/Stonewater/Stonewater Property ID Folders" + "/" +
folder_to_pull["name"] + "/" + property_folder["name"]
)
if not property_folder_contents.get("value"):
continue
# We look for the retrofit assessment folder or mtp folders:
property_sub_folders = [
f for f in property_folder_contents["value"] if
"ra coordinator info" in f["name"].lower() or
"retrofit assessment" in f["name"].lower() or
"ra info" in f["name"].lower() or
"mtp" in f["name"].lower() or
"mid-term" in f["name"].lower()
]
if not property_sub_folders:
continue
for property_sub_folder in property_sub_folders:
# if we have this, we download the folder and store it on my laptop!
property_folder_path = os.path.join(
"Osmosis ACD/Osmosis ACD Projects/Stonewater/Stonewater Property ID Folders",
folder_to_pull["name"],
property_folder["name"],
property_sub_folder["name"]
)
download_dir = os.path.join(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Wave 2.1 Surveys - 2",
folder_to_pull["name"],
property_folder["name"],
property_sub_folder["name"]
)
# We download the folder
sharepoint_client.download_sharepoint_folder(
drive_id=sharepoint_client.document_drive["id"],
folder_path=property_folder_path,
download_dir=download_dir,
excluded_file_types=["MOV", "jpg"]
)

View file

@ -0,0 +1,542 @@
import os
import time
import json
import pandas as pd
import numpy as np
from tqdm import tqdm
from dotenv import load_dotenv
from backend.SearchEpc import SearchEpc
from utils.s3 import read_from_s3, read_pickle_from_s3
import msoffcrypto
from io import BytesIO
load_dotenv(dotenv_path="backend/.env")
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
def get_data(asset_list):
epc_data = []
errors = []
for _, home in tqdm(asset_list.iterrows(), total=len(asset_list)):
try:
postcode = home["Postcode"]
house_number = home["Number"]
full_address = home["Full Address"]
searcher = SearchEpc(
address1=str(house_number),
postcode=postcode,
auth_token=EPC_AUTH_TOKEN,
os_api_key="",
property_type=None,
fast=True,
full_address=full_address,
max_retries=5
)
# Force the skipping of estimating the EPC
searcher.ordnance_survey_client.property_type = None
searcher.ordnance_survey_client.built_form = None
searcher.find_property(skip_os=True)
if searcher.newest_epc is None:
continue
# Look for EPC recommendatons
try:
property_recommendations = searcher.client.domestic.recommendations(searcher.newest_epc["lmk-key"])
except:
property_recommendations = {"rows": []}
epc = {
"row_id": home["row_id"],
**searcher.newest_epc.copy(),
"recommendations": property_recommendations["rows"]
}
epc_data.append(epc)
except Exception as e:
errors.append(home["row_id"])
time.sleep(5)
return epc_data, errors
def app():
"""
This code creates a list of cavity properties, for review
"""
# Read in the password protected master
# TODO: This file should be deleted!
# Path to the password-protected Excel file
file_path = ("/Users/khalimconn-kowlessar/Downloads/STONEWATER MASTER SHEET - UPDATED 20.5.24 - K- PASSWORD "
"PROTECTED.xlsx")
password = "STONE123" # Replace with the actual password
# Open the file and decrypt it
with open(file_path, "rb") as f:
decrypted_file = BytesIO()
office_file = msoffcrypto.OfficeFile(f)
office_file.load_key(password=password)
office_file.decrypt(decrypted_file)
# Read the decrypted file into a DataFrame
eco_rolling_master = pd.read_excel(decrypted_file, sheet_name="Sheet1", engine="openpyxl")
eco_rolling_master = eco_rolling_master[
~eco_rolling_master['INSTALL/CANCELLATION DATE'].str.contains("CANCELLED")
]
archetyped_properties = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater SHDF_3_0_Board Triage 22.05.24 - "
"Archetyped V3.1.xlsx",
header=4
)
cavity_descriptions = [
"Cavity: AsBuilt (1983-1995)",
"Cavity: AsBuilt (Post 1995)",
"Cavity: AsBuilt (Pre 1976)",
"Cavity: AsBuilt (1976-1982)",
]
archetyped_properties["Is Cavity Property"] = archetyped_properties["Wall Type"].isin(cavity_descriptions)
# We also identify any properties where properties were found to need cavity wall insulation
costed_packages = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater - Costed Retrofit Packages "
"20241030 (WIP) Single Model V2.xlsx",
sheet_name="Modelled Packages",
header=13
)
needs_cwi = costed_packages[
costed_packages["Main Wall Insulation"].isin(
[
"Poss Extract CWI & Refill (issues identified)",
"CWI RdSAP Default"
]
)
][["Address ID", "Address", "Current SAP Rating", "Current EPC Band", "Postcode", "Archetype ID",
"Main Wall Insulation",
"Main Roof Type", "Main Roof Insulation", "Main Roof Insulation Thickness"]]
# We flag these properties
archetyped_properties["Survey shows CWI needed for Archetype"] = archetyped_properties["Archetype ID"].isin(
needs_cwi["Archetype ID"]
)
archetyped_properties = archetyped_properties[~pd.isnull(archetyped_properties["Address ID"])]
archetyped_properties = archetyped_properties[archetyped_properties["Address ID"] != "Address ID"]
# this is the big list!!!
features = pd.read_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Osmosis Reviewed - Parity Download 18.7 - "
"master sheet.csv",
encoding='latin1'
)
features["Address ID"] = features["Address ID"].astype(str)
features_to_merge = features[
[
"Address ID", "Organisation Reference", "Age", "Property Type", "Walls", "Roofs", "Glazing", "Heating",
"Main Fuel",
"Hot Water",
"Renewables", "Total Floor Area"
]
]
stonewater_cavity_properties = archetyped_properties[
["Name", "Postcode", "Osm. ID", "Org. ref.", "Address ID", "UPRN", "UDPRN", "Archetype ID", "House no",
"Street name",
"Address line 2", "City/Town", "Is Cavity Property", "Survey shows CWI needed for Archetype"]
].merge(
features_to_merge, how="left", on="Address ID"
)
# We filter this down to the properties that are cavity properties
stonewater_cavity_properties = stonewater_cavity_properties[
stonewater_cavity_properties["Is Cavity Property"] |
stonewater_cavity_properties["Survey shows CWI needed for Archetype"]
]
stonewater_cavity_properties["Reason Included"] = "As Built Cavity Property"
stonewater_cavity_properties["Reason Included"] = np.where(
stonewater_cavity_properties["Survey shows CWI needed for Archetype"] &
~stonewater_cavity_properties["Is Cavity Property"],
"Survey revealed potential need for CWI or extract and re-fill",
stonewater_cavity_properties["Reason Included"]
)
stonewater_cavity_properties["Reason Included"] = np.where(
stonewater_cavity_properties["Survey shows CWI needed for Archetype"] &
stonewater_cavity_properties["Is Cavity Property"],
"Surveyed revealed potential need for CWI or extract and re-fill and is an as built cavity property",
stonewater_cavity_properties["Reason Included"]
)
# We indicate the exact properties that need CWI, based on survey findings
stonewater_cavity_properties["Reason Included"] = np.where(
stonewater_cavity_properties["Address ID"].isin(
needs_cwi[needs_cwi["Main Wall Insulation"] == "CWI RdSAP Default"]["Address ID"].astype(int).astype(
str).values
),
"Survey showed this property needs CWI",
stonewater_cavity_properties["Reason Included"]
)
stonewater_cavity_properties["Reason Included"] = np.where(
stonewater_cavity_properties["Address ID"].isin(
needs_cwi[needs_cwi["Main Wall Insulation"] == "Poss Extract CWI & Refill (issues identified)"][
"Address ID"].astype(int).astype(str).values
),
"Survey showed this property could need extract and re-fill",
stonewater_cavity_properties["Reason Included"]
)
# We flag units that were installed under ECO3
numeric_ids = eco_rolling_master[eco_rolling_master["STONEWATER UPRN"] != "NOT ON ASSET LIST"]
numeric_ids = numeric_ids[~pd.isnull(numeric_ids["STONEWATER UPRN"])]
numeric_ids["STONEWATER UPRN"] = numeric_ids["STONEWATER UPRN"].astype(int)
stonewater_cavity_properties["Installed under ECO3"] = stonewater_cavity_properties["Org. ref."].isin(
numeric_ids['STONEWATER UPRN'].values
)
# Which postcodes were installed under ECO3
priority_list_eco3 = stonewater_cavity_properties[
stonewater_cavity_properties["Installed under ECO3"]
]["Postcode"].unique()
# These are properties that were not installed under ECO3, that have the same postcodes as properties
# installed under ECO3
# These are 66 properties we might want to start with as an immediate priority
stonewater_cavity_properties["Same Postcode as Installed under ECO3"] = (
~stonewater_cavity_properties["Installed under ECO3"] & (
stonewater_cavity_properties["Postcode"].isin(priority_list_eco3)
)
)
stonewater_cavity_properties["UPRN"] = stonewater_cavity_properties["UPRN"].astype("Int64").astype(str)
# Find the postcodes where an Osmosis survey revealed a need for CWI
postcodes_found_needing_cwi = stonewater_cavity_properties[
stonewater_cavity_properties["Reason Included"].isin(
[
"Survey revealed potential need for CWI or extract and re-fill",
"Surveyed revealed potential need for CWI or extract and re-fill and is an as built cavity property",
"Survey showed this property needs CWI",
"Survey showed this property could need extract and re-fill"
]
)
]["Postcode"].unique()
stonewater_cavity_properties["Suspected Needs CWI - not surveyed"] = (
(
stonewater_cavity_properties[
"Postcode"].isin(
postcodes_found_needing_cwi)
) & (
~stonewater_cavity_properties[
"Reason Included"].isin(
[
"Survey revealed potential need "
"for CWI or extract and re-fill",
"Surveyed revealed potential "
"need for CWI or extract and "
"re-fill and is an as built "
"cavity property",
"Survey showed this property "
"needs CWI",
"Survey showed this property "
"could need extract and re-fill"
]
)
)
)
# Merge the EPCs on, with the data we need
stonewater_cavity_properties = stonewater_cavity_properties.rename(
columns={
"Age": "Parity - Build Age",
"Property Type": "Parity - Property Type",
"Walls": "Parity - Wall Construction",
"Roofs": "Parity - Roof Construction",
"Glazing": "Parity - Glazing Type",
"Heating": "Parity - Heating Type",
"Main Fuel": "Parity - Main Fuel",
"Hot Water": "Parity - Hot Water",
"Renewables": "Parity - Renewables",
"Total Floor Area": "Parity - Total Floor Area"
}
)
# We now flag the additional properties in the as built list
additional_properties = features[
~features["Address ID"].isin(archetyped_properties["Address ID"].values)
]
# Filter on as built cavity properties
additional_properties = additional_properties[
additional_properties["Walls"].isin(cavity_descriptions)
]
additional_properties["Full Address"] = additional_properties["Address"].copy()
house_numbers = []
for _, x in tqdm(additional_properties.iterrows(), total=len(additional_properties)):
house_no = SearchEpc.get_house_number(x["Address"].split(",")[0], x["Postcode"])
if house_no is None:
house_no = x["Address"].split(",")[0]
# If we end up with a number like "01" we need to remove the leading zero
house_no = house_no.lstrip("0")
house_numbers.append(
{
"Address ID": x["Address ID"],
"Number": house_no
}
)
house_numbers = pd.DataFrame(house_numbers)
additional_properties = additional_properties.merge(house_numbers, how="left", on="Address ID")
additional_properties["row_id"] = additional_properties["Address ID"].copy()
# Flag any units in this list that were installed under ECO3
additional_properties["Installed under ECO3"] = additional_properties["Organisation Reference"].isin(
numeric_ids['STONEWATER UPRN'].values
)
# Additional list ECO3
additional_list_eco3 = additional_properties[additional_properties["Installed under ECO3"]]["Postcode"].unique()
# These are properties that were not installed under ECO3, that have the same postcodes as properties
# installed under ECO3
# These are 297 properties we might want to start with as an immediate priority
additional_properties["Same Postcode as Installed under ECO3"] = (
~additional_properties["Installed under ECO3"] & (
additional_properties["Postcode"].isin(additional_list_eco3)
)
)
# We do some additional manual checks, for ECO3 properties that were installed that didn't get matched to either
# dataaset
numeric_ids["In asset list"] = numeric_ids["STONEWATER UPRN"].isin(
stonewater_cavity_properties['Org. ref.'].astype(int).values
)
numeric_ids["In asset list"] = numeric_ids["In asset list"] | (
numeric_ids["STONEWATER UPRN"].isin(
additional_properties['Organisation Reference'].astype(int).values
)
)
# eco3_installs_not_in_asset_list = numeric_ids[~numeric_ids["In asset list"]]
# # We now take samples of properties randomly and manually check the ID against the asset list
# print(eco3_installs_not_in_asset_list.sample(1)[["STONEWATER UPRN", "Post Code", "NO ", "Street / Block Name", ]])
# # Checked STONEWATER UPRN
# # 9862, BH15 1NR, 33, THE QUAY FOYER [x]
# # 12785, S01 66PN, 57, SEACOLE GARDENS [x]
# # 26071, MK42 0TE, 51, De Havilland Avenue, Shortstown [x]
# # 18213, HR6 9UW, 20 Ford Street [x]
# # 24344, LU4 9FF, 6 SEAL CLOSE [x]
# # 31222, SN14 0QZ, 7 HARDBROOK COURT [x]
# # 9343, SP4 7XL, 10 OAK PLACE [x]
# # 34730, LU5 5TN, 4 TUDOR DRIVE [x]
# # 7021, BN27 2BZ, 32 BUTTS FIELD []
#
# stonewater_cavity_properties[stonewater_cavity_properties['Org. ref.'] == 7021]
# stonewater_cavity_properties[stonewater_cavity_properties['Postcode'] == "BN27 2BZ"]["Name"]
#
# additional_properties[additional_properties['Organisation Reference'] == 7021]
# additional_properties[additional_properties['Postcode'] == "BN27 2BZ"][["Address"]]
# Pull the EPCs for these properties
# additional_properties_epcs, errors = get_data(additional_properties)
# Save this data as a pickle
# import pickle
# with open("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/additional_properties_epcs.pkl",
# "wb") as f:
# pickle.dump(additional_properties_epcs, f)
additional_properties["Suspected Needs CWI - not surveyed"] = (
(
additional_properties["Postcode"].isin(postcodes_found_needing_cwi) &
~additional_properties["Installed under ECO3"]
)
)
# We drop Full Address
additional_properties = additional_properties.drop(columns=["Full Address"])
additional_properties2 = additional_properties[[
"Address", "Postcode", "Address ID", "SAP", "SAP Band", "Property Type", "Walls", "Roofs", "Glazing",
"Heating", "Main Fuel", "Hot Water", "Renewables", "Total Floor Area", 'Installed under ECO3',
'Same Postcode as Installed under ECO3', "Organisation Reference",
]].rename(
columns={
"Organisation Reference": "Org. ref.",
"SAP": "Parity - Predicted SAP",
"SAP Band": "Parity - Predicted SAP Band",
"Age": "Parity - Build Age",
"Property Type": "Parity - Property Type",
"Walls": "Parity - Wall Construction",
"Roofs": "Parity - Roof Construction",
"Glazing": "Parity - Glazing Type",
"Heating": "Parity - Heating Type",
"Main Fuel": "Parity - Main Fuel",
"Hot Water": "Parity - Hot Water",
"Renewables": "Parity - Renewables",
"Total Floor Area": "Parity - Total Floor Area"
}
)
# Combine the data:
stonewater_cavity_properties2 = stonewater_cavity_properties.merge(
features[["Address", "Organisation Reference"]], how="left", on="Organisation Reference"
)
full_dataset = pd.concat([stonewater_cavity_properties2, additional_properties2])
full_dataset = full_dataset.drop(columns=['Osm. ID'])
# We not define the priority list for non-intrusives
full_dataset["Postal Region"] = full_dataset["Postcode"].str.split(" ").str[0].str[0:2]
full_dataset["Postal Region 2"] = full_dataset["Postcode"].str.split(" ").str[0]
# Strip out anything we definitely don't want
full_dataset = full_dataset[~full_dataset["Installed under ECO3"]]
areas = full_dataset[full_dataset["Suspected Needs CWI - not surveyed"] == True]["Postal Region 2"].unique()
priorities = full_dataset[
full_dataset["Postal Region 2"].isin(areas)
]
region_prevalance = priorities["Postal Region 2"].value_counts().to_frame().reset_index()
region_prevalance = region_prevalance[region_prevalance["count"] > 100]
df = priorities[priorities["Postal Region 2"].isin(region_prevalance["Postal Region 2"].values)]
df["Postal Region"].value_counts()
df["Postal Region 2"].value_counts()
if df["Installed under ECO3"].sum():
raise ValueError("There are properties in the priority list that were installed under ECO3")
df.to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Non-intrusives/10022025 Non-Intrusives - "
"revised list.csv",
index=False
)
# We save the data locally
# stonewater_cavity_properties.to_csv(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater Cavity Properties - priority "
# "postcodes.csv",
# index=False
# )
# additional_properties2.to_csv(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater Additional Cavity Properties - "
# "non-priority postcodes.csv",
# index=False
# )
# # Save the survey findings
# needs_cwi.to_csv(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater Properties Needing CWI -
# WIP.csv",
# index=False
# )
def cross_reference_epc_programme():
eco3_fallout = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/STONEWATER LIST OF ADDRESSES TO BE "
"SURVEYED - ECO3 NOT COMPLETED.xlsx"
)
for _, x in eco3_fallout.iterrows():
house_no = SearchEpc.get_house_number(x["ADDRESS"], "")
if house_no is None:
house_no = x["ADDRESS"].split(",")[0]
x["house_number"] = house_no
eco3_fallout["house_number"] = eco3_fallout.apply(
lambda x: SearchEpc.get_house_number(x["ADDRESS"], ""), axis=1
)
# for _, x in eco3_fallout.ite
stonewater_modelled_above_c = pd.read_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Osmosis Reviewed - Parity Download 18.7 - "
"master sheet.csv",
encoding='latin1'
)
stonewater_modelled_above_c["house_number"] = stonewater_modelled_above_c.apply(
lambda x: SearchEpc.get_house_number(x["Address"], x["Postcode"]), axis=1
)
eco3_fallout_matched_to_above_c = []
for _, property in eco3_fallout.iterrows():
# Match on house number
match = stonewater_modelled_above_c[
stonewater_modelled_above_c["house_number"] == property["house_number"]
]
# We do a fuzzy match on the address, with levenstein distance
from fuzzywuzzy import fuzz
match = stonewater_modelled_above_c[
stonewater_modelled_above_c["Address"].apply(lambda x: fuzz.ratio(x, property["ADDRESS"]) > 90)
]
match.head()
def finalise_list_for_non_intrusives():
non_intrusives_list = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Non-intrusives/20250207 Stonewater "
"Non-Intrusives.xlsx"
)
# Remove anything installed under ECO3
non_intrusives_list = non_intrusives_list[~non_intrusives_list["Installed under ECO3"]]
# We make any properties that were surveyed by Osmosis
packages = pd.read_excel(
"/Users/khalimconn-kowlessar/Downloads/Stonewater - Bid Packages WIP 14.11.20 V2 "
"(1).xlsx",
header=13,
sheet_name="Modelled Packages"
)
non_intrusives_list["Surveyed by Osmosis"] = non_intrusives_list["Address ID"].isin(
packages["Address ID"].values
)
# Removed 54 addresses
final_non_intrusives = non_intrusives_list[
~non_intrusives_list["Surveyed by Osmosis"]
]
features = pd.read_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Osmosis Reviewed - Parity Download 18.7 - "
"master sheet.csv",
encoding='latin1'
)
# Add on the orgnisaion reference
final_non_intrusives = final_non_intrusives.merge(
features[["Organisation Reference", "Address ID"]],
how="left",
on="Address ID"
)
final_non_intrusives["Postal Region"] = final_non_intrusives["Postcode"].str.split(" ").str[0].str[0:2]
selected_regions = final_non_intrusives[
final_non_intrusives["Include in non-intrusives"]
]["Postcode"].unique()
final_non_intrusives["Is in region"] = final_non_intrusives["Postcode"].isin(selected_regions)
# Filter down:
final_non_intrusives = final_non_intrusives[
final_non_intrusives["Is in region"]
]
final_non_intrusives.to_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Non-intrusives/10022025 Non-Intrusives "
"List - final.xlsx")

View file

@ -0,0 +1,11 @@
PyPDF2
pandas
tqdm
openpyxl
boto3
epc-api-python==1.0.2
usaddress==0.5.11
fuzzywuzzy==0.18.0
python-dotenv
scipy

View file

@ -0,0 +1,73 @@
import os
import pandas as pd
import numpy as np
from asset_list.utils import get_data
from backend.SearchEpc import SearchEpc
from etl.spatial.OpenUprnClient import OpenUprnClient
from dotenv import load_dotenv
load_dotenv(dotenv_path="backend/.env")
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
def app():
filepath = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/United Living/Potential GMCA props 05.03.xlsx"
df = pd.read_excel(filepath)
df["row_id"] = df.index
df["house_number"] = df.apply(
lambda x: SearchEpc.get_house_number(x["Address"], x["Postcode"]),
axis=1
)
properties_data, _, _ = get_data(
df=df,
manual_uprn_map={},
epc_auth_token=EPC_AUTH_TOKEN,
uprn_column=None,
fulladdress_column="Address",
address1_column="house_number",
postcode_column="Postcode",
property_type_column=None,
built_form_column=None,
epc_api_only=True,
row_id_name="row_id",
)
no_data = df[df["row_id"].isin(_)]
no_data[["Address", "Postcode"]]
# 53 108 Alexandra Street OL6 9QP 100011536830
# 56 301 Whiteacre Road OL6 9QF 100011557437
# 65 97 Princess Street OL6 9QJ 100011551813
data = df.merge(
pd.DataFrame(properties_data)[["uprn", "row_id"]],
how="left", left_on="row_id", right_on="row_id"
)
# Fill missing UPRNS
data["uprn"] = np.where(data["Address"] == "108 Alexandra Street", 100011536830, data["uprn"])
data["uprn"] = np.where(data["Address"] == "301 Whiteacre Road", 100011557437, data["uprn"])
data["uprn"] = np.where(data["Address"] == "97 Princess Street", 100011551813, data["uprn"])
# We now get whether the property is listed, heritage or in a conservation area
spatial_data = OpenUprnClient.get_spatial_data(uprns=data["uprn"].tolist(), bucket_name="retrofit-data-dev")
spatial_data = spatial_data.rename(columns={"UPRN": "uprn"})
data["uprn"] = data["uprn"].astype(int)
merged = data.merge(
spatial_data, how="left", on="uprn"
)
# fill NAs
for c in ['conservation_status', 'is_listed_building', 'is_heritage_building']:
merged[c] = merged[c].fillna(False)
merged.to_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/United Living/Potential GMCA props 05.03 - data "
"pulled.xlsx",
index=False
)

View file

@ -0,0 +1,85 @@
"""
This is the list of properties, based on the EPC data, that look eligible for WHLG
"""
import pandas as pd
from etl.epc.settings import EARLIEST_EPC_DATE
from etl.spatial.OpenUprnClient import OpenUprnClient
epc_data = pd.read_csv(
"/Users/khalimconn-kowlessar/Downloads/all-domestic-certificates/domestic-E09000031-Waltham-Forest/certificates.csv"
)
epc_data.columns = [c.replace("_", "-").lower() for c in epc_data.columns]
epc_data = epc_data[epc_data["lodgement-date"] >= EARLIEST_EPC_DATE]
epc_data = epc_data[~pd.isnull(epc_data["uprn"])]
epc_data["uprn"] = epc_data["uprn"].astype(int)
epc_data = epc_data[epc_data["current-energy-rating"].isin(["D", "E", "F", "G"])]
epc_data = epc_data[epc_data["tenure"].isin(
["rental (private)", "Rented (private)", "owner-occupied", "Owner-occupied"])
]
whlg_eligible_postcodes = pd.read_excel(
"/Users/khalimconn-kowlessar/Downloads/WHLG-eligible-postcodes.xlsx",
sheet_name="Eligible postcodes",
header=1
)
# Format:
whlg_eligible_postcodes = whlg_eligible_postcodes[['Postcode', 'Local Authority']]
uprns = epc_data["uprn"].unique()
# Get data
ca_data = OpenUprnClient.get_spatial_data(uprns, "retrofit-data-dev")
epc_data = epc_data.merge(
ca_data[["UPRN", "conservation_status", "is_listed_building", "is_heritage_building"]].rename(
columns={"UPRN": "uprn"}
),
how="left",
on="uprn",
)
epc_data["has_conservation_restrictions"] = (
(epc_data["conservation_status"] == True)
| (epc_data["is_listed_building"] == True)
| (epc_data["is_heritage_building"] == True)
)
whlg_eligible_postcodes["Local Authority"].value_counts()
whlg_eligible_postcodes = whlg_eligible_postcodes[whlg_eligible_postcodes["Local Authority"] == "Waltham Forest"]
# Pathway 1:
# Match based on eligible postcodes
pathway1 = epc_data[epc_data["postcode"].isin(whlg_eligible_postcodes["Postcode"].values)]
pathway1 = pathway1[
[
"uprn", "address", "address1", "postcode", "current-energy-rating", "current-energy-efficiency",
"lodgement-date",
"has_conservation_restrictions", "walls-description", "roof-description", "mainheat-description"
]
]
pathway1 = pathway1.rename(
columns={
"current-energy-rating": "EPC Rating", "current-energy-efficiency": "SAP Score",
"lodgement-date": "EPC Date", "has_conservation_restrictions": "Conservation Area Restrictions",
"walls-description": "Wall Type", "roof-description": "Roof Type", "mainheat-description": "Main Heating"
}
)
pathway1["EPC Date"] = pd.to_datetime(pathway1["EPC Date"]).dt.strftime("%Y-%m-%d")
# Create a year EPC was lodged
pathway1["EPC Year"] = pd.to_datetime(pathway1["EPC Date"]).dt.year
low_epc = pathway1[pathway1["EPC Rating"].isin(["F", "G"])]
low_epc["EPC Rating"].value_counts()
low_epc.tail(1)[["address", "postcode"]]
pathway1.to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Waltham Forest WHLG - Pathway 1 Eligibility.csv",
index=False
)
# Pathway 2 or 3
# The household will need to be means tested
pathway2 = epc_data[~epc_data["uprn"].isin(pathway1["uprn"].values)]

View file

@ -0,0 +1,123 @@
import pandas as pd
from utils.s3 import save_csv_to_s3
PORTFOLIO_ID = 115
USER_ID = 8
def app():
"""
Used to set up the remote assessments for Warwick
"""
asset_list = [
{
"uprn": 10033604792,
"address": "Flat 2, 3 Green Street",
"postcode": "W1K 6RN"
},
{
"uprn": 10033604794,
"address": "Flat 4, 3 Green Street",
"postcode": "W1K 6RN"
},
{
"uprn": 10033615515,
"address": "Apartment 4, 52 Green Street",
"postcode": "W1K 6RS"
}
]
asset_list = pd.DataFrame(asset_list)
# Store the asset list in s3
filename = f"{USER_ID}/{PORTFOLIO_ID}/asset_list.csv"
save_csv_to_s3(
dataframe=asset_list,
bucket_name="retrofit-plan-inputs-dev",
file_name=filename
)
non_invasive_recommendations = [
{
"uprn": 10033604792,
"recommendations": [
{
"type": "internal_wall_insulation",
"sap_points": 16,
"survey": True
}
]
},
{
"uprn": 10033604794,
"recommendations": [
{
"type": "internal_wall_insulation",
"sap_points": 14,
"survey": True
}
]
},
{
"uprn": 10033615515,
"recommendations": [
{
"type": "room_roof_insulation",
"sap_points": 12,
"survey": True
},
{
"type": "internal_wall_insulation",
"sap_points": 2,
"survey": True
}
]
}
]
# Store non-invasive recommendations in S3
non_invasive_recommendations_filename = f"{USER_ID}/{PORTFOLIO_ID}/non_invasive_recommendations.csv"
save_csv_to_s3(
dataframe=pd.DataFrame(non_invasive_recommendations),
bucket_name="retrofit-plan-inputs-dev",
file_name=non_invasive_recommendations_filename
)
valuation_data = [
{
"uprn": 10033604792,
"value": 3_692_000
},
{
"uprn": 10033604794,
"value": 3_789_000
},
{
"uprn": 10033615515,
"value": 3_499_000
}
]
# Store valuation data to s3
valuation_filename = f"{USER_ID}/{PORTFOLIO_ID}/valuation.csv"
save_csv_to_s3(
dataframe=pd.DataFrame(valuation_data),
bucket_name="retrofit-plan-inputs-dev",
file_name=valuation_filename
)
body = {
"portfolio_id": str(PORTFOLIO_ID),
"housing_type": "Private",
"goal": "Increasing EPC",
"goal_value": "C",
"trigger_file_path": filename,
"already_installed_file_path": "",
"patches_file_path": "",
"non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
"valuation_file_path": valuation_filename,
"scenario_name": "Full package remote assessment",
"multi_plan": True,
"budget": None,
}
print(body)

View file

@ -1,7 +1,7 @@
import os
import re
import openpyxl
import Levenshtein
from fuzzywuzzy import fuzz
from pathlib import Path
import msgpack
from datetime import datetime
@ -2771,7 +2771,8 @@ class DataLoader:
match_to = [x.replace(" ", "") for x in match_to]
# Perform matching between full key and match_to
distances = [Levenshtein.distance(matching_string, s) for s in match_to]
distances = [100 - fuzz.ratio(matching_string, s) for s in match_to]
best_match_index = distances.index(min(distances))
# We might want to consider a threshold for the distance, however for the momeny,
# we don't consider this for the moment
@ -2897,6 +2898,17 @@ class DataLoader:
# Merge onto the survey list
survey_list = survey_list.merge(matching_lookup, how='left', on="survey_list_row_id")
# TEMP FOR NEWER WORK
# matching_lookup = matching_lookup.merge(
# asset_list[["asset_list_row_id", "UPRN"]], how="left", on="asset_list_row_id"
# ).merge(
# survey_list[["survey_list_row_id", "NO.", "Street / Block Name", "Post Code"]],
# how="left", on="survey_list_row_id"
# )
# matching_lookup.to_csv(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Plus Dane/surveys_to_assets.csv"
# )
return survey_list
@staticmethod

View file

@ -203,11 +203,11 @@ class TrainingDataset(BaseDataset):
common_cols = [[col + "_starting", col + "_ending"] for col in common_cols]
self.df = self.df.loc[
:,
no_suffix_cols
+ only_ending_cols
+ [col for cols in common_cols for col in cols],
]
:,
no_suffix_cols
+ only_ending_cols
+ [col for cols in common_cols for col in cols],
]
def _remove_abnormal_change_in_floor_area(self):
"""
@ -511,7 +511,7 @@ class TrainingDataset(BaseDataset):
expanded_df["is_sandstone_or_limestone"]
== expanded_df["is_sandstone_or_limestone_ending"]
)
]
]
elif component == "floor":
expanded_df = expanded_df[
(expanded_df["is_suspended"] == expanded_df["is_suspended_ending"])
@ -528,7 +528,7 @@ class TrainingDataset(BaseDataset):
expanded_df["is_to_external_air"]
== expanded_df["is_to_external_air_ending"]
)
]
]
elif component == "roof":
expanded_df = expanded_df[
(expanded_df["is_pitched"] == expanded_df["is_pitched_ending"])
@ -541,7 +541,7 @@ class TrainingDataset(BaseDataset):
expanded_df["has_dwelling_above"]
== expanded_df["has_dwelling_above_ending"]
)
]
]
return expanded_df

View file

@ -139,28 +139,22 @@ class EPCRecord:
self._clean_records_using_epc_records()
self._clean_with_data_processor()
self._expand_prepared_epc_to_attributes()
self._identify_delta_between_prepared_and_original_records()
# Process to create uvalues for the single epc record
# selff.df = self.epc_record_as_dataframe('prepared_epc')
# self.df = self.epc_record_as_dataframe('prepared_epc')
# self._feature_generation()
# self._drop_features()
return
self._expand_description_to_features()
self._expand_description_to_uvalues()
# self._expand_description_to_features()
# self._expand_description_to_uvalues()
#
# self._generate_uvalues()
# self._validate_expanded_description()
# self._validate_u_values()
# etc
pass
def _drop_features(self):
"""
@ -359,6 +353,8 @@ class EPCRecord:
self._clean_property_dimensions()
self._clean_number_lighting_outlets()
self._clean_floor_level()
self._clean_floor_height()
self._clean_constituency()
# self._clean_potential_energy_efficiency()
# self._clean_environment_impact_potential()
@ -387,6 +383,31 @@ class EPCRecord:
return df
def _clean_floor_height(self):
""" Remaps anomalies in floor height to the average floor height for the property type """
floor_height_data = self.cleaning_data[
(self.cleaning_data["property_type"] == self.prepared_epc["property-type"]) &
(self.cleaning_data["built_form"] == self.prepared_epc["built-form"])
]
average = floor_height_data["floor_height"].mean()
sd = floor_height_data["floor_height"].std()
# If we're in the top 0.5 percentile of floor heights, we'll set it to the average
if self.prepared_epc["floor-height"] > average + 10 * sd:
self.prepared_epc["floor-height"] = average
if self.prepared_epc["floor-height"] <= 1.665:
self.prepared_epc["floor-height"] = average
def _clean_constituency(self):
"""
We handle the single case of finding a missing constituency by using the local authority
"""
if pd.isnull(self.prepared_epc["constituency"]) or (self.prepared_epc["constituency"] == ""):
if self.prepared_epc["local-authority"] != "E06000044":
raise NotImplementedError(
"This function is only implemented for Portsmouth, in the single edgecase seen"
)
self.prepared_epc["constituency"] = "E14000883"
def _clean_floor_level(self):
"""
This method will clean the floor level, if empty or invalid

View file

@ -0,0 +1,133 @@
import time
import pandas as pd
from tqdm import tqdm
from etl.find_my_epc.RetrieveFindMyEpc import RetrieveFindMyEpc
from backend.SearchEpc import SearchEpc
from utils.logger import setup_logger
logger = setup_logger()
class AssetListEpcData:
def __init__(self, asset_list: pd.DataFrame, epc_auth_token: str):
"""
This class handles pulling data assocaited to an asset list and performs common functions like
getting EPC api data, retrieveing data form the find my epc website and extracting non-intrusive
recommendations
:param asset_list:
"""
# Check the asset list contains the correct columns
self.asset_list = self.check_asset_list(asset_list)
self.epc_auth_token = epc_auth_token
self.extracted_data = None
self.non_invasive_recommendations = None
self.patches = None
@staticmethod
def check_asset_list(asset_list):
# TODO: Update this with pydantic
return asset_list
def get_non_invasive_recommendations(self):
"""
Extracts non-invasive recommendations in a format that can be used by the engine
:return:
"""
if self.extracted_data is None:
raise ValueError("Please run get_data first")
self.non_invasive_recommendations = [
{
"uprn": r.get("uprn"),
"address": r["address"],
"postcode": r["postcode"],
"recommendations": r["recommendations"]
} for r in self.extracted_data
]
def get_patch(self):
"""
:return:
"""
if self.extracted_data is None:
raise ValueError("extracted data is missing - run get_data first")
self.patches = [
{
"uprn": r.get("uprn"),
**r.get("patch")
} for r in self.extracted_data if r.get("patch")
]
def get_data(self):
logger.info("Retrieving data for given asset list")
# Pull the additional data
extracted_data = []
for _, home in tqdm(self.asset_list.iterrows(), total=len(self.asset_list)):
add1 = home["address"]
pc = home["postcode"]
# Retrieve the EPC data
epc_searcher = SearchEpc(
address1=add1,
postcode=pc,
uprn=home.get("uprn"),
auth_token=self.epc_auth_token,
os_api_key="",
)
epc_searcher.ordnance_survey_client.property_type = home.get("property_type")
epc_searcher.ordnance_survey_client.built_form = home.get("built_form")
epc_searcher.find_property(skip_os=True)
if epc_searcher.newest_epc is None:
continue
if not pd.isnull(home.get("patch")):
epc_searcher.newest_epc["address1"] = add1
# Attempt both methods:
try:
find_epc_searcher = RetrieveFindMyEpc(
address=epc_searcher.newest_epc["address1"] + ", " + epc_searcher.newest_epc["address2"],
postcode=epc_searcher.newest_epc["postcode"]
)
find_epc_data = find_epc_searcher.retrieve_newest_find_my_epc_data()
except Exception as e:
logger.error(f"Error retrieving find my epc data: {e}")
find_epc_searcher = RetrieveFindMyEpc(
address=epc_searcher.newest_epc["address1"],
postcode=epc_searcher.newest_epc["postcode"]
)
find_epc_data = find_epc_searcher.retrieve_newest_find_my_epc_data()
time.sleep(0.5)
# We need uprn
to_append = {
"uprn": home.get("uprn"),
"address": home["address"],
"postcode": home["postcode"],
**find_epc_data,
}
if not pd.isnull(home.get("patch")):
to_append["patch"] = {
"current-energy-rating": find_epc_data["current_epc_rating"],
"current-energy-efficiency": find_epc_data["current_epc_efficiency"],
"potential-energy-rating": find_epc_data["potential_epc_rating"],
"potential-energy-efficiency": find_epc_data["potential_epc_efficiency"],
**find_epc_data["epc_data"]
}
extracted_data.append(to_append)
self.extracted_data = extracted_data
logger.info("Data Extrction complete")

View file

@ -0,0 +1,480 @@
import re
import pandas as pd
import requests
from bs4 import BeautifulSoup
from datetime import datetime
from utils.logger import setup_logger
logger = setup_logger()
class RetrieveFindMyEpc:
SEARCH_POSTCODE_URL = (
"https://find-energy-certificate.service.gov.uk/find-a-certificate/search-by-postcode?postcode={postcode_input}"
)
BASE_ENERGY_URL = "https://find-energy-certificate.service.gov.uk"
HEADERS = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/111.0.0.0 Safari/537.36'
}
def __init__(self, address: str, postcode: str):
"""
This class is tasked with retrieving the latest EPC data from the find my epc website
:param address: The address of the property
:param postcode: The postcode of the property
"""
self.address = address
self.postcode = postcode
self.address_cleaned = self.address.replace(",", "").replace(" ", "").lower()
self.walls = []
@staticmethod
def extract_low_carbon_sources(soup):
# Find the section header
section_header = soup.find("h3", string="Low and zero carbon energy sources")
if not section_header:
return {}
# Locate the list following the header
energy_list = section_header.find_next("ul")
# Extract the list items
sources = {item.get_text(strip=True): True for item in energy_list.find_all("li")}
return sources
@staticmethod
def get_text(elem):
return elem.get_text(strip=True) if elem else None
def extract_epc_data(self, soup):
results = {}
# 1. Total floor area
results['total-floor-area'] = int(self.get_text(
soup.find("dt", string="Total floor area").find_next_sibling("dd")
).split(" ")[0])
# Table with features
rows = soup.select("table.govuk-table tbody tr")
rating_map = {
"Very poor": "Very Poor",
"Very good": "Very Good"
}
def get_feature_row_text(feature_name, index=0):
matches = [row for row in rows if row.find("th") and feature_name in row.find("th").text]
if len(matches) > index:
cells = matches[index].find_all("td")
description = self.get_text(cells[0])
rating = self.get_text(cells[1])
return description, rating_map.get(rating, rating)
return None, None
# 2-3. First wall description and rating
results['walls-description'], results['walls-energy-eff'] = get_feature_row_text("Wall", 0)
# 4-5. First roof description and rating
results['roof-description'], results['roof-energy-eff'] = get_feature_row_text("Roof", 0)
# 6-7. Windows description and rating
results['windows-description'], results['windows-energy-eff'] = get_feature_row_text("Window")
# 8-9. Main heating description and rating
results['mainheat-description'], results['mainheat-energy-eff'] = get_feature_row_text("Main heating")
# 10-11. Main heating control description and rating
results['mainheatcont-description'], results['mainheatc-energy-eff'] = get_feature_row_text(
"Main heating control"
)
# 12-13. Hot water description and rating
results['hotwater-description'], results['hot-water-energy-ef'] = get_feature_row_text("Hot water")
# 14-15. Lighting description and rating
results['lighting-description'], results['lighting-energy-eff'] = get_feature_row_text("Lighting")
# 16. Floor description
results['floor-description'], _ = get_feature_row_text("Floor")
# 17. Secondary heating description
results['secondheat-description'], _ = get_feature_row_text("Secondary heating")
# 18. Primary energy use
p_energy = soup.find(string=lambda t: "primary energy use for this property per year" in t.lower())
# We should always have this
match = re.search(r"(\d+)\s+kilowatt", p_energy)
results['energy-consumption-current'] = int(match.group(1)) if match else None
# 19. Current CO2 emissions
co2_now = soup.find("dd", id="eir-property-produces")
# We should always have this
match = re.search(r"([\d.]+)", co2_now.text)
results['co2-emissions-current'] = float(match.group(1)) if match else None
# Need co2-emiss-curr-per-floor-area
# 20. Potential CO2 emissions
co2_pot = soup.find("dd", id="eir-potential-production")
match = re.search(r"([\d.]+)", co2_pot.text)
results['co2-emissions-potential'] = float(match.group(1)) if match else None
return results
def retrieve_newest_find_my_epc_data(self, sap_2012_date=None):
"""
For a post code and address, we pull out all the required data from the find my epc website
"""
postcode_input = self.postcode.replace(" ", "+")
postcode_search = self.SEARCH_POSTCODE_URL.format(postcode_input=postcode_input)
postcode_response = requests.get(postcode_search, headers=self.HEADERS)
postcode_res = BeautifulSoup(postcode_response.text, features="html.parser")
rows = postcode_res.find_all('tr', class_='govuk-table__row')
extracted_table = []
for row in rows:
# Extract the address and URL
address_tag = row.find('a', class_='govuk-link')
if address_tag is None:
continue
extracted_address = None
extracted_address_url = None
if address_tag:
extracted_address = address_tag.text.strip()
extracted_address_url = address_tag['href']
extracted_address_cleaned = extracted_address.replace(",", "").replace(" ", "").lower()
if not extracted_address_cleaned.startswith(self.address_cleaned):
continue
# If the address is a match, we can extract the data
# Extract the expiry date
expiry_date_tag = row.find('td', class_='govuk-table__cell date')
expiry_date = None
if expiry_date_tag is not None:
expiry_date = expiry_date_tag.parent.find('span').text.strip()
extracted_table.append(
{
"extracted_address": extracted_address,
"extracted_address_url": extracted_address_url,
"expiry_date": datetime.strptime(expiry_date, '%d %B %Y'),
}
)
if not extracted_table:
raise ValueError("No EPC found")
if len(extracted_table) > 1:
# We take the one with the most recent expiry date
extracted_table = sorted(extracted_table, key=lambda x: x['expiry_date'], reverse=True)
chosen_epc = self.BASE_ENERGY_URL + extracted_table[0]['extracted_address_url']
epc_certificate = chosen_epc.split('/')[-1]
address_response = requests.get(chosen_epc, headers=self.HEADERS)
address_res = BeautifulSoup(address_response.text, features="html.parser")
# Key data we want to retrieve:
# 1) Rating
# 2) Bills estimates
# 3) Recommendations and SAP points
# 4) Low and zero carbon energy sources
# 5) The wall types of the property - used for determining if we have an extension wall insulation#
# recommendation
ratings = address_res.find('desc', {'id': 'svg-desc'}).text
current_rating = ratings.split(".")[0]
potential_rating = ratings.split(".")[1]
current_sap = int(current_rating.split(' ')[-1])
# Floor area
address_res.find()
# Retrieve the energy consumption
bills = address_res.find('div', {'id': 'bills-affected'})
bills_list = bills.find_all('li')
if not bills_list:
# If this is the case, it's usually becaue the EPC was very old. Early EPCs did not have this information
heating_text = None
hot_water_text = None
else:
heating_text = bills_list[0].text
hot_water_text = bills_list[1].text
# Retrieve the recommendations and SAP points
recommendations = []
recommendations_div = address_res.find('div', class_='epb-recommended-improvements')
if recommendations_div:
# Find all h3 headers for each step and extract their related information
step_headers = recommendations_div.find_all('h3', class_='govuk-heading-m')
previous_sap_score = current_sap
previous_epc = current_rating.split(' ')[-6]
for step_num, step_header in enumerate(step_headers, start=1):
# Extract the step title (the measure)
measure_title = step_header.text.strip().replace(f"Step {step_num}: ", "")
# Find the div containing the potential rating within the same section
potential_rating_div = step_header.find_next(
'div', class_='epb-recommended-improvements__potential-rating'
)
# Check if the potential rating div is found
if potential_rating_div:
# Extract the rating text within the SVG text element
extracted_rating_text = potential_rating_div.find('text', class_='govuk-!-font-weight-bold')
if extracted_rating_text is not None:
rating_text = extracted_rating_text.text.strip()
else:
rating_text = " ".join([str(previous_sap_score), previous_epc])
# Parse the rating text to separate the numeric rating and EPC letter
new_rating = int(rating_text.split()[0])
new_epc = rating_text.split()[1]
# Append the information as a dictionary to the recommendations list
recommendations.append({
"step": step_num,
"measure": measure_title,
"new_rating": new_rating,
"new_epc": new_epc,
"sap_points": new_rating - previous_sap_score
})
previous_sap_score = new_rating
previous_epc = new_epc
# Search for the assessment informaton
assessment_information = address_res.find('div', {'id': 'information'})
# Parse this information
rows = assessment_information.find_all('div', class_='govuk-summary-list__row')
# Create a dictionary to hold the parsed information
assessment_data = {}
for row in rows:
key = row.find('dt').text.strip()
if key == "Type of assessment":
# We dont reliably extract this
continue
value_tag = row.find('dd')
# Check if value contains a link (email)
if value_tag.find('a'):
value = value_tag.find('a').text.strip()
elif value_tag.find('summary'):
value = value_tag.find('span').text.strip()
else:
value = value_tag.text.strip()
# These are keys that we have for both the surveyor and the acreditation scheme. Firstly, we'll
# get the surveyor's name and email so we make that information clear
if key in ["Telephone", "Email"]:
if "Assessor's " + key not in assessment_data:
assessment_data["Assessor's " + key] = value
else:
assessment_data["Accreditation Scheme's " + key] = value
continue
assessment_data[key] = value
expected_keys = [
'Assessors name',
"Assessor's Telephone",
"Assessor's Email",
'Assessors ID',
'Accreditation scheme',
'Assessors declaration',
"Accreditation Scheme's Telephone",
"Accreditation Scheme's Email",
'Date of assessment',
'Date of certificate'
]
# Check we have all the expected keys
for key in expected_keys:
if key not in assessment_data:
raise ValueError(f"Missing key: {key}")
# The wall types of the property
property_features_table = address_res.find("tbody", class_="govuk-table__body")
property_features_table = property_features_table.find_all("tr")
# Extract wall types
self.walls = []
for row in property_features_table:
cells = row.find_all("td")
if row.find("th").text.strip() == "Wall":
self.walls.append(cells[0].text.strip())
# Finally, we format the recommendations
recommendations = self.format_recommendations(recommendations, assessment_data, sap_2012_date)
# 4) Low and zero carbon energy sources
low_carbon_energy_sources = self.extract_low_carbon_sources(address_res)
# 5) Pull out the EPC data
epc_data = self.extract_epc_data(address_res)
resulting_data = {
'epc_certificate': epc_certificate,
'current_epc_rating': current_rating.split(' ')[-6],
'current_epc_efficiency': current_sap,
'potential_epc_rating': potential_rating.split(' ')[-6],
"potential_epc_efficiency": int(potential_rating.split(' ')[-1]),
"heating_text": heating_text,
"hot_water_text": hot_water_text,
"recommendations": recommendations,
"epc_data": epc_data,
**assessment_data,
**low_carbon_energy_sources,
}
return resulting_data
def format_recommendations(self, recommendations, assessment_data, sap_2012_date=None):
"""
This function converts the recommendations to a format that we can use in the engine as a non-intrusive survey
:param recommendations: The recommendations from the EPC
:param assessment_data: The assessment data from the EPC
:param sap_2012_date: The date of the SAP 2012 update
"""
measure_map = {
"Internal or external wall insulation": ["internal_wall_insulation", "external_wall_insulation"],
"Hot water cylinder insulation": ["hot_water_tank_insulation"],
"Hot water cylinder thermostat": ["cylinder_thermostat"],
"High performance external doors": ["insulated_doors"],
"Floor insulation (solid floor)": ["solid_floor_insulation"],
"Floor insulation (suspended floor)": ["suspended_floor_insulation"],
"Double glazed windows": ["double_glazing"],
"Cavity wall insulation": ["cavity_wall_insulation"],
"Replace boiler with new condensing boiler": ["boiler_upgrade"],
"Floor insulation": ["floor_insulation"], # Recommendation typically associated to older EPCs
"Heating controls (programmer, room thermostat and TRVs)": [
"roomstat_programmer_trvs", "time_temperature_zone_control"
],
"Low energy lighting": ["low_energy_lighting"],
"Increase loft insulation to 270 mm": ["loft_insulation"],
"Heating controls (thermostatic radiator valves)": [
"roomstat_programmer_trvs", "time_temperature_zone_control"
],
"Solar water heating": ["solar_water_heating"],
"Solar photovoltaic panels, 2.5 kWp": ["solar_pv"],
"Heating controls (room thermostat and TRVs)": [
"roomstat_programmer_trvs", "time_temperature_zone_control"
],
"Change heating to gas condensing boiler": ["boiler_upgrade"],
"Fan assisted storage heaters and dual immersion cylinder": ["high_heat_retention_storage_heater"],
"Flat roof or sloping ceiling insulation": ["flat_roof_insulation"],
"Heating controls (room thermostat)": [
"roomstat_programmer_trvs", "time_temperature_zone_control"
],
"Band A condensing boiler": ["boiler_upgrade"],
"Double glazing": ["double_glazing"],
"Flue gas heat recovery device in conjunction with boiler": ["flue_gas_heat_recovery"],
"Wind turbine": ["wind_turbine"],
"Loft insulation": ["loft_insulation"],
"Solar photovoltaic (PV) panels": ["solar_pv"],
"Party wall insulation": ["party_wall_insulation"],
'Draught proofing': ["draught_proofing"],
"Roof insulation recommendation": [],
"Cavity wall insulation recommendation": [],
"Windows draught proofing": [],
"Low energy lighting for all fixed outlets": ["low_energy_lighting"],
"Cylinder thermostat recommendation": [],
"Heating controls recommendation": [],
"Replace boiler with Band A condensing boiler": ["boiler_upgrade"],
"Band A condensing gas boiler": ["boiler_upgrade"],
"Solar panel recommendation": [],
"Double glazing recommendation": [],
"Solid wall insulation recommendation": [],
"Fuel change recommendation": [],
"PV Cells recommendation": [],
"Replacement glazing units": ["double_glazing"],
"Heating controls (time and temperature zone control)": ["time_temperature_zone_control"],
"High heat retention storage heaters": ["high_heat_retention_storage_heater"],
"Gas condensing boiler": ["boiler_upgrade"],
"Change room heaters to condensing boiler": ["boiler_upgrade"],
"Cylinder thermostat": ["cylinder_thermostat"],
"Heat recovery system for mixer showers": ["heat_recovery_shower"],
"Room-in-roof insulation": ["room_in_roof_insulation"],
"Fan assisted storage heaters": [],
"Fan-assisted storage heaters": [],
"Step 1:": [],
"Step 2:": [],
'Step 3:': [],
"Biomass stove with boiler": [],
"Replace boiler with biomass boiler": [],
"Heating controls (room thermostat and thermostatic radiator valves)": [
"roomstat_programmer_trvs", "time_temperature_zone_control"
],
"Heating controls (programmer, and thermostatic radiator valves)": [
"roomstat_programmer_trvs", "time_temperature_zone_control"
],
"Heating controls (programmer and TRVs)": [
"roomstat_programmer_trvs", "time_temperature_zone_control"
],
"Heating controls (programmer and room thermostat)": [
"roomstat_programmer_trvs", "time_temperature_zone_control"
],
"Replacement warm air unit": [],
"Secondary glazing": ["secondary_glazing"],
"Condensing heating unit": ["boiler_upgrade"],
'???': [],
'Solar photovoltaic panels, 2.5kWp': ["solar_pv"],
'Heating controls (programmer, room thermostat and thermostatic radiator valves)': [
"roomstat_programmer_trvs", "time_temperature_zone_control"
],
'Translation missing: en.improvement_code.41.title': [],
"Condensing boiler (separate from the range cooker)": ["boiler_upgrade"],
"Heating controls (programmer and thermostatic radiator valves)": [
"roomstat_programmer_trvs", "time_temperature_zone_control"
]
}
survey = True
if sap_2012_date is not None:
certificate_date = datetime.strptime(assessment_data["Date of certificate"], "%d %B %Y")
if certificate_date < pd.to_datetime(sap_2012_date):
survey = False
formatted_recommendations = []
for rec in recommendations:
mapped = measure_map[rec["measure"]]
for measure in mapped:
if measure == "cavity_wall_insulation" and "solid brick" in self.walls[0].lower():
measure = "extension_cavity_wall_insulation"
to_append = {
"type": measure,
"sap_points": rec["sap_points"],
"survey": survey,
}
if measure == "solar_pv":
to_append["suitable"] = True
formatted_recommendations.append(to_append)
return formatted_recommendations
@classmethod
def get_from_epc(cls, epc):
# Attempt both methods:
try:
searcher = cls(address=epc["address"], postcode=epc["postcode"])
find_epc_data = searcher.retrieve_newest_find_my_epc_data()
except Exception as e:
logger.error(f"Error retrieving find my epc data: {e}")
# We attempt with the backup add
searcher = cls(address=epc["address1"], postcode=epc["postcode"])
find_epc_data = searcher.retrieve_newest_find_my_epc_data()
non_invasive_recommendations = {
"uprn": epc["uprn"],
"address": epc["address"],
"postcode": epc["postcode"],
"recommendations": find_epc_data["recommendations"],
}
return non_invasive_recommendations

View file

@ -0,0 +1,2 @@
pandas
beautifulsoup4

35
etl/funding/app.py Normal file
View file

@ -0,0 +1,35 @@
"""
This scipt prepares the data, required for us to perform funding calculations. The starting data should be stored
on the machine this is being run on, and this will prepare the information and upload if
"""
import pandas as pd
from utils.s3 import save_csv_to_s3
STAGE = "dev"
DATA_BUCKET = "retrofit-data-{stage}"
PROJECTS_SCORES_MATRIX_LOCATION = "/Users/khalimconn-kowlessar/Downloads/ECO4 Full Project Scores Matrix.csv"
WHLG_ELIGIBLE_POSTCODES = "/Users/khalimconn-kowlessar/Downloads/WHLG-eligible-postcodes.xlsx"
def app():
# Read in the project scores matrix
project_scores_matrix = pd.read_csv(PROJECTS_SCORES_MATRIX_LOCATION)
# Store in AWS S3
save_csv_to_s3(
dataframe=project_scores_matrix,
bucket_name=DATA_BUCKET.format(stage=STAGE),
file_name="funding/ECO4 Full Project Scores Matrix.csv"
)
# Read in the Warm Homes Local Grant eligible postcodes data
whlg_eligible_postcodes = pd.read_excel(WHLG_ELIGIBLE_POSTCODES, sheet_name="Eligible postcodes", header=1)
# We tidy up the data before we store
whlg_eligible_postcodes = whlg_eligible_postcodes[["Postcode"]]
whlg_eligible_postcodes["Postcode"] = whlg_eligible_postcodes["Postcode"].str.lower()
save_csv_to_s3(
dataframe=whlg_eligible_postcodes,
bucket_name=DATA_BUCKET.format(stage=STAGE),
file_name="funding/whlg eligible postcodes.csv"
)

326
etl/lodgement/app.py Normal file
View file

@ -0,0 +1,326 @@
import os
import pandas as pd
import utils.file_data_extraction as file_extraction_tools
from utils.fullSapParser import FullSapParser
from utils.OsmosisCondtionReportParser import OsmosisConditionReportParser
output_template = {
"Property Address": None,
"Osm. ID": None,
"Postcode": None,
"City/County": None,
"District/Town": None,
"Funding Stream": None,
# "Risk Path": None,
"Local Authority": None,
"Trustmark Lodgement ID": None,
"Certificate Number": None,
"EWI UMR": None,
"Loft UMR": None,
"Windows UMR": None,
"Doors UMR": None,
"Measure Lodgement Date": None,
"Full Lodgement Date": None,
"Owner - Name": None,
"Owner - Phone": None,
"Owner - Email": None,
"Tenant - Name": None,
"Tenant - Phone": None,
"R. Assessor - Name": None,
"R. Coordinator - Name": None,
"Trustmark Licence Number": None,
"Retrofit Assessment Date": None,
"Company Name": None,
"Retrofit Designer Name": None,
"Property Type": None,
"Property Detachment": None,
"No. of Bedrooms": None,
"Property age": None,
"SAP Rating Pre (from IMA)": None,
"Pre Heat Transfer": None,
"Pre Total Floor Area": None,
"Pre Heat Demand": None,
"Pre Air Tightness": None,
"SAP Rating Post (from EPC)": None,
"Post Heat Transfer": None,
"Post Total Floor Area": None,
"Post Heat Demand": None,
"Post Air Tightness": None,
"Number of Eligible Measures Installed": None,
"Total Cost of Works": None,
"Annual Fuel Saving (MTP)": None,
}
def update_dictionary_with_check(dictionary, updates):
"""
Updates a dictionary with key-value pairs, raising an error if the key does not exist.
Args:
dictionary (dict): The dictionary to update.
updates (dict): The updates to apply.
Raises:
KeyError: If a key in updates does not exist in the dictionary.
"""
for key, value in updates.items():
if key not in dictionary:
raise KeyError(f"Key '{key}' does not exist in the dictionary.")
dictionary[key] = value
def handler():
"""
This is a simple application that will extract the data from documents that have been uploaded to Sharepoint
to populate the lodgement spreadsheet with
:return:
"""
# Ths source data will eventually come from Sharepoint
source_data_path = "/Users/khalimconn-kowlessar/Documents/hestia/Lodgment Pilot"
output_template_file = "Trustmark Details - Template REV.25.11.24.xlsx"
funding_stream = "HUG2"
customer_name = "Shropshire Council"
customer_phone = "0345 678 9000"
customer_email = "affordablewarmth@shropshire.gov.uk"
# TODO: In order for this to go live, we need to use Poppler, which needs to be installed
# w/ brew install poppler
# We also need to install Tesseract: brew install tesseract
# List the folders in the source data path
folders = [x for x in os.listdir(source_data_path) if os.path.isdir(os.path.join(source_data_path, x))]
extractors = {
"elmhurst epr": file_extraction_tools.ElmhurstEprExtractor,
"elmhurst summary report": file_extraction_tools.ElmhurstSummaryReportExtractor,
"osmosis condition report": OsmosisConditionReportParser,
"elmhurst evidence report": None,
"full sap xml": FullSapParser,
"pulse air permeability": file_extraction_tools.PulseAirPermeabilityExtractor,
"elmhurst project handover": file_extraction_tools.ElmhurstProjectHandoverExtractor,
"core logic pas assessment report": file_extraction_tools.CoreLogicPasAssessmentReportExtractor,
}
extracted = []
for property_folder in folders:
property_folder_path = os.path.join(source_data_path, property_folder)
# List the folders in the source data path
subfolders = [
x for x in os.listdir(property_folder_path) if os.path.isdir(os.path.join(property_folder_path, x))
]
coord_folder = os.path.join(property_folder_path, [f for f in subfolders if "RA Coordinator Info" in f][0])
# Get the contents of the folder
coordinator_folder_contents = [
file for file in os.listdir(coord_folder) if os.path.isfile(os.path.join(coord_folder, file))
]
# We detect the various file types
extracted_contents = {}
for filename in coordinator_folder_contents:
filepath = os.path.join(coord_folder, filename)
if file_extraction_tools.is_pdf(filepath):
report_type = file_extraction_tools.detect_pdf_report_type(pdf_path=filepath)
if report_type is None:
raise ValueError(f"Unknown report type for {filename}")
file_extractor = extractors[report_type]
if file_extractor is None:
continue
extracted_contents[report_type] = file_extractor(filepath).extract()
if file_extraction_tools.is_xml(filepath):
xml_type = file_extraction_tools.detect_xml_report_type(xml_path=filepath)
if xml_type is None:
raise ValueError(f"Unknown report type for {filename}")
file_extractor = extractors.get(xml_type)
if file_extractor is None:
continue
extracted_contents[xml_type] = file_extractor(filepath).extract()
att_folder = os.path.join(property_folder_path, [f for f in subfolders if "Air Tightness Tests" in f][0])
att_folder_contents = [
file for file in os.listdir(att_folder) if os.path.isfile(os.path.join(att_folder, file))
]
for filename in att_folder_contents:
filepath = os.path.join(att_folder, filename)
if file_extraction_tools.is_pdf(filepath):
report_type = file_extraction_tools.detect_pdf_report_type(pdf_path=filepath)
if report_type is None:
raise ValueError(f"Unknown report type for {filename}")
file_extractor = extractors[report_type]
if file_extractor is None:
continue
extracted_contents[report_type] = file_extractor(filepath).extract()
lodgement_folder = os.path.join(
property_folder_path, [f for f in subfolders if "TrustMark Lodgement" in f][0]
)
# Within the lodgement folder, we want the required documents sub-folder
lodgement_subfolders = [
file for file in os.listdir(lodgement_folder) if os.path.isdir(os.path.join(lodgement_folder, file))
]
required_documents_folder = os.path.join(
lodgement_folder, [f for f in lodgement_subfolders if "required documents" in f.lower()][0]
)
# List the contents
required_documents_contents = [
file for file in os.listdir(required_documents_folder) if
os.path.isfile(os.path.join(required_documents_folder, file))
]
# There are only a few file types we actually want to process in here for the moment
for filename in required_documents_contents:
filepath = os.path.join(required_documents_folder, filename)
if file_extraction_tools.is_pdf(filepath):
report_type = file_extraction_tools.detect_pdf_report_type(pdf_path=filepath)
if report_type != "elmhurst project handover":
continue
file_extractor = extractors[report_type]
extracted_contents[report_type] = file_extractor(filepath).extract()
output_row_data = output_template.copy()
# dict_keys([ 'City/County', 'District/Town',
# 'Local Authority', 'Trustmark Lodgement ID', 'Certificate Number', 'EWI UMR', 'Loft UMR', 'Windows UMR',
# 'Doors UMR', 'Measure Lodgement Date', 'Full Lodgement Date', 'Owner - Name', 'Owner - Phone',
# 'Owner - Email', 'Tenant - Name', 'Tenant - Phone',
# 'Trustmark Licence Number',
# Pre Air Tightness', 'SAP Rating Post (from EPC)', 'Post Heat
# Transfer', 'Post Total Floor Area', 'Post Heat Demand', 'Post Air Tightness',
# 'Total Cost of Works', 'Annual Fuel Saving (MTP)'])
update_dictionary_with_check(
output_row_data,
{
"Funding Stream": funding_stream,
"Property Address": property_folder.split(")")[1].strip(),
"Osm. ID": property_folder.split(")")[0].strip().lstrip("(").strip(),
}
)
if extracted_contents.get("elmhurst epr"):
total_floor_area = sum(
[x["Floor Area (m2)"] for x in extracted_contents["elmhurst epr"]["Building Parts"]] +
# Get the conservatory floor area
[extracted_contents["elmhurst epr"]["Conservatory"]["Conservatory Floor Area"]]
)
pre_heat_transfer = extracted_contents["elmhurst epr"]["Primary Energy Use Intensity (kWh/m2/yr)"]
pre_heat_demand = (
extracted_contents["elmhurst epr"]["Primary Energy Use Intensity (kWh/m2/yr)"] * total_floor_area
)
epr_to_insert = {
"Postcode": extracted_contents["elmhurst epr"]["Postcode"],
"City/County": extracted_contents["elmhurst epr"]["County"],
"District/Town": extracted_contents["elmhurst epr"]["Town"],
"Local Authority": None,
'SAP Rating Pre (from IMA)': extracted_contents["elmhurst epr"]["Current SAP Rating"],
'Pre Heat Transfer': pre_heat_transfer,
'Pre Total Floor Area': total_floor_area,
'Pre Heat Demand': pre_heat_demand,
"R. Assessor - Name": extracted_contents["elmhurst epr"]["Assessor Name"],
"Retrofit Assessment Date": extracted_contents["elmhurst epr"]["Assessment Date"],
}
update_dictionary_with_check(
output_row_data,
epr_to_insert
)
if extracted_contents.get("full sap xml"):
xml_to_insert = {
"Property Type": extracted_contents["full sap xml"]["Property Type"],
"Property Detachment": extracted_contents["full sap xml"]["Built Form"],
"Property age": extracted_contents["full sap xml"]["Age Band"],
}
update_dictionary_with_check(
output_row_data,
xml_to_insert
)
if extracted_contents.get("osmosis condition report"):
cr_to_insert = {
"No. of Bedrooms": extracted_contents["osmosis condition report"]["No. of Bedrooms"],
# "Risk Path": extracted_contents["osmosis condition report"]["Risk Assessment Pathway"],
}
update_dictionary_with_check(
output_row_data,
cr_to_insert
)
if extracted_contents.get("elmhurst summary report"):
total_floor_area = sum(
[x["Floor Area (m2)"] for x in extracted_contents["elmhurst summary report"]["Building Parts"]] +
# Get the conservatory floor area
[extracted_contents["elmhurst summary report"]["Conservatory"]["Conservatory Floor Area"]]
)
pre_heat_transfer = (
extracted_contents["elmhurst summary report"]["Primary Energy Use Intensity (kWh/m2/yr)"]
)
pre_heat_demand = None # Don't have this
summary_to_insert = {
"Postcode": extracted_contents["elmhurst summary report"]["Postcode"],
"City/County": extracted_contents["elmhurst summary report"]["County"],
"District/Town": extracted_contents["elmhurst summary report"]["Town"],
'SAP Rating Pre (from IMA)': extracted_contents["elmhurst summary report"]["Current SAP Rating"],
'Pre Heat Transfer': pre_heat_transfer,
'Pre Total Floor Area': total_floor_area,
'Pre Heat Demand': pre_heat_demand,
"R. Assessor - Name": extracted_contents["elmhurst summary report"]["Assessor Name"],
"Retrofit Assessment Date": extracted_contents["elmhurst summary report"]["Assessment Date"],
}
update_dictionary_with_check(
output_row_data,
summary_to_insert
)
if extracted_contents.get("pulse air permeability"):
# We extract the AP50 number
results_table = extracted_contents["pulse air permeability"]["Results Table"]
ap50 = [x["Extrapolated @ 50PA"] for x in results_table if x["Metric"] == "Air Permeability"][0]
update_dictionary_with_check(
output_row_data,
{"Pre Air Tightness": ap50}
)
if extracted_contents.get("elmhurst project handover"):
handover_to_insert = {
"Number of Eligible Measures Installed": len(
extracted_contents["elmhurst project handover"]["Measures Fitted"]
),
"Retrofit Designer Name": extracted_contents["elmhurst project handover"]["Designer Name"],
"Company Name": extracted_contents["elmhurst project handover"]["Installer Name"],
"R. Coordinator - Name": extracted_contents["elmhurst project handover"]["Retrofit Coordinator Name"],
}
update_dictionary_with_check(output_row_data, handover_to_insert)
if extracted_contents.get("core logic pas assessment report"):
cr_to_insert = {
"No. of Bedrooms": extracted_contents["core logic pas assessment report"]["Number of bedrooms"],
}
update_dictionary_with_check(
output_row_data,
cr_to_insert
)
extracted.append(output_row_data)
extracted_df = pd.DataFrame(extracted)
extracted_df.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Lodgment Pilot/poc-extrcted-data.csv",
index=False)

View file

@ -0,0 +1,14 @@
PyPDF2
pandas
tqdm
openpyxl
boto3
usaddress==0.5.11
fuzzywuzzy==0.18.0
python-dotenv
python-docx
pymupdf
pytesseract
pdf2image
pillow
pdfplumber

View file

@ -0,0 +1,240 @@
import os
import pandas as pd
import numpy as np
from tqdm import tqdm
from dotenv import load_dotenv
from urllib.parse import urlencode
from epc_api.client import EpcClient
from utils.logger import setup_logger
from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes
from recommendations.recommendation_utils import (
estimate_perimeter,
estimate_external_wall_area,
estimate_number_of_floors
)
logger = setup_logger()
load_dotenv(dotenv_path="backend/.env")
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
CONFIG = [
{
"filepath": "/Users/khalimconn-kowlessar/Documents/hestia/Route Marches/Surveyors Sites for Week Commencing "
"11.11.2024.xlsx",
"tab": "SETTLE GBIS x 242 ",
"postcode_column": "Postcode",
},
{
"filepath": "/Users/khalimconn-kowlessar/Documents/hestia/Route Marches/Surveyors Sites for Week Commencing "
"11.11.2024.xlsx",
"tab": "ACIS GBIS x 76",
"postcode_column": "Postcode",
},
{
"filepath": "/Users/khalimconn-kowlessar/Documents/hestia/Route Marches/Surveyors Sites for Week Commencing "
"11.11.2024.xlsx",
"tab": "SOUTHERN GBIS x 150",
"postcode_column": "Postcode",
},
{
"filepath": "/Users/khalimconn-kowlessar/Documents/hestia/Route Marches/Surveyors Sites for Week Commencing "
"11.11.2024.xlsx",
"tab": "COMMUNITY HOUSING GBIS x 199",
"postcode_column": "Postcode",
},
{
"filepath": "/Users/khalimconn-kowlessar/Documents/hestia/Route Marches/Surveyors Sites for Week Commencing "
"11.11.2024.xlsx",
"tab": "EASTLIGHT GBIS x 42",
"postcode_column": "Postcode",
},
]
CAVITY_WALL_DESCRIPTIONS = [
"Cavity wall, as built, no insulation (assumed)",
"Cavity wall, as built, partial insulation (assumed)",
"Cavity wall, as built, insulated (assumed)",
"Cavity wall, with internal insulation",
"Cavity wall, with external insulation",
]
ROOF_DESCRIPTIONS = [
"Pitched, no insulation",
"Pitched, no insulation (assumed)",
"Pitched, 25 mm loft insulation",
"Pitched, 50 mm loft insulation",
"Pitched, 75 mm loft insulation",
"Pitched, 100 mm loft insulation",
"Pitched, 150 mm loft insulation",
"Pitched, limited insulation (assumed)",
"Pitched, insulated (assumed)",
]
SOCIAL_TENURES = ["Rented (social)", "rental (social)"]
def process_postcode_epcs(postcode, client):
params = {"postcode": postcode.rstrip().lstrip()}
url = os.path.join(client.domestic.host, "search") + "?" + urlencode({"size": 1000})
response = client.domestic.call(method="get", url=url, params=params)
if "rows" not in response:
logger.warning("No EPCs found for postcode %s", postcode)
return pd.DataFrame()
postcode_epcs = pd.DataFrame(response["rows"])
# Processing code here
postcode_epcs["uprn"] = np.where(
pd.isnull(postcode_epcs["uprn"]),
postcode_epcs["address"],
postcode_epcs["uprn"]
)
postcode_epcs = postcode_epcs.sort_values("lodgement-date", ascending=False)
postcode_epcs = postcode_epcs.drop_duplicates("uprn", keep="first")
return postcode_epcs
def filter_and_prepare_epcs(epcs):
epcs["Is Cavity Property"] = epcs["walls-description"].isin(CAVITY_WALL_DESCRIPTIONS) & (
epcs["current-energy-efficiency"].astype(int) <= 72
)
epcs["Solar and Loft"] = (
epcs["roof-description"].isin(ROOF_DESCRIPTIONS)
) & (
epcs["photo-supply"].isin(["0", "", "0.0"])
) & (
epcs["current-energy-efficiency"].astype(int) <= 68
)
epcs = epcs[epcs["Is Cavity Property"] | epcs["Solar and Loft"]]
epcs = epcs[~epcs["tenure"].isin(SOCIAL_TENURES)]
return epcs
def rename_and_add_columns(epcs):
# Retrieve just the data we need
epcs = epcs[
[
"uprn",
"address",
"postcode",
"property-type",
"built-form",
"inspection-date",
"current-energy-rating",
"current-energy-efficiency",
"roof-description",
"walls-description",
"transaction-type",
# New fields needed
"secondheat-description",
"total-floor-area",
"construction-age-band",
"floor-height",
"number-habitable-rooms",
"mainheat-description",
#
"energy-consumption-current", # kwh/m2
"tenure",
"Is Cavity Property",
"Solar and Loft",
]
]
epcs = epcs.rename(
columns={
"address": "Address",
"postcode": "Postcode",
"inspection-date": "Date of last EPC",
"current-energy-efficiency": "SAP score on register",
"current-energy-rating": "EPC rating on register",
"property-type": "Property Type",
"built-form": "Archetype",
"total-floor-area": "Property Floor Area",
"construction-age-band": "Property Age Band",
"floor-height": "Property Floor Height",
"number-habitable-rooms": "Number of Habitable Rooms",
"walls-description": "Wall Construction",
"roof-description": "Roof Construction",
"mainheat-description": "Heating Type",
"secondheat-description": "Secondary Heating",
"transaction-type": "Reason for last EPC",
"energy-consumption-current": "Heat Demand (kWh/m2)",
"tenure": "Tenure"
}
)
epcs["Number of Habitable Rooms"] = epcs["Number of Habitable Rooms"].astype(int)
epcs["Property Floor Area"] = epcs["Property Floor Area"].astype(float)
# Add additional columns as in your original code
epcs["Estimated Number of Floors"] = epcs.apply(
lambda x: estimate_number_of_floors(x["Property Type"]) if pd.notnull(x["Property Type"]) else None, axis=1
)
epcs["Estimated Perimeter (m)"] = epcs.apply(
lambda x: estimate_perimeter(
x["Property Floor Area"] / x["Estimated Number of Floors"],
x["Number of Habitable Rooms"] / x["Estimated Number of Floors"]
), axis=1
)
epcs["Estimated Heat Loss Perimeter (m2)"] = epcs.apply(
lambda x: estimate_external_wall_area(
x["Estimated Number of Floors"],
float(x["Property Floor Height"]) if x["Property Floor Height"] else 2.4,
x["Estimated Perimeter (m)"],
x["Archetype"]
), axis=1
)
epcs["Roof Insulation Thickness"] = epcs.apply(
lambda x: RoofAttributes(description=x["Roof Construction"]).process()[
"insulation_thickness"] if pd.notnull(x["Roof Construction"]) else None,
axis=1
)
return epcs
def main():
"""
This application is used to identify additional units that are private rentals or owner occupies that can be
included in the route marches
Required inputs are the following:
- An excel file that contains one or many tabs that include the addresses to be visited
"""
# This should be set:
output_filepath = (
"/Users/khalimconn-kowlessar/Documents/hestia/Route Marches/PRS and OO properties - WC 11.11.2024.xlsx"
)
client = EpcClient(auth_token=EPC_AUTH_TOKEN)
writer = pd.ExcelWriter(output_filepath, engine="xlsxwriter")
for config in CONFIG:
logger.info("Processing %s", config["tab"])
# Read in the data
route_march_addresses = pd.read_excel(
config["filepath"],
sheet_name=config["tab"],
engine="openpyxl"
)
postcodes = route_march_addresses[config["postcode_column"]].unique()
epcs = []
for postcode in tqdm(postcodes):
postcode_epcs = process_postcode_epcs(postcode, client)
if postcode_epcs.empty:
continue
epcs.append(postcode_epcs)
# Concatenate all postcodes' data and filter it
epcs = pd.concat(epcs)
epcs = filter_and_prepare_epcs(epcs)
epcs = rename_and_add_columns(epcs)
sheet_name = config["tab"][:31] # Excel sheet names max length of 31 characters
epcs.to_excel(writer, sheet_name=sheet_name, index=False)
# Save and close the writer outside the loop
writer.close()
logger.info("Data successfully written to %s", output_filepath)

View file

@ -0,0 +1,10 @@
openpyxl
epc-api-python==1.0.2
numpy==2.1.2
pandas==2.2.3
usaddress==0.5.11
fuzzywuzzy==0.18.0
boto3==1.35.44
python-dotenv
tqdm
xlsxwriter

View file

@ -9,7 +9,8 @@ from etl.xml_survey_extraction.pcdb import heating_data
PROPERTY_TYPE_LOOKUP = {
"0": "House",
"House": "House",
"2": "Flat"
"2": "Flat",
"3": "Maisonette",
}
@ -107,11 +108,13 @@ class XmlParser:
BUILT_FORM_MAP = {
"1": "Detached",
"2": "Semi-Detached",
"3": "End-Terrace",
"4": "Mid-Terrace",
}
GLAZED_AREA_MAP = {
"2": "More than Typical",
"4": "Much More Than Typical"
}
@ -120,7 +123,9 @@ class XmlParser:
}
TRANSACTION_TYPE_MAP = {
"13": "ECO assessment"
"5": "Rented (social)",
"13": "ECO assessment",
"14": "Stock condition survey",
}
TENURE_MAP = {
@ -131,7 +136,8 @@ class XmlParser:
TARIFF_MAP = {
"1": "Dual",
"2": "Single"
"2": "Single",
"3": "Unknown"
}
def __init__(self, file, filekey, surveyor_company, uprn=None):
@ -400,8 +406,13 @@ class XmlParser:
]
wall_areas = sum([float(f["heat_loss_perimeter"]) * float(f["room_height"]) for f in main_dwelling_floors])
window_areas = sum([float(w["window_area"]) for w in main_dwelling_windows])
return wall_areas - window_areas
window_areas = [float(w["window_area"]) for w in main_dwelling_windows if w["window_area"] is not None]
if not window_areas:
# We discount 10% of the wall area
insulation_wall_area = wall_areas * 0.9
else:
insulation_wall_area = wall_areas - sum(window_areas)
return insulation_wall_area
def extract_additional_data(self):
@ -415,7 +426,8 @@ class XmlParser:
main_dwelling_windows = [w for w in self.windows if w["window_location"] == "0"]
number_of_windows = len(main_dwelling_windows)
windows_area = sum([float(w["window_area"]) for w in main_dwelling_windows])
windows_area = [float(w["window_area"]) for w in main_dwelling_windows if w["window_area"] is not None]
windows_area = sum(windows_area) if windows_area else None
boolean_lookup = {
"true": True,
@ -427,6 +439,7 @@ class XmlParser:
cylinder_insulation_type = {
None: "",
"1": "Foam",
"2": "Jacket"
}
cylinder_insulation_thickness = int(
@ -461,7 +474,7 @@ class XmlParser:
"cylinder_thermostat": cylinder_thermostat,
"main_dwelling_ground_floor_area": float(main_dwelling_ground_floor_area),
"number_of_windows": int(number_of_windows),
"windows_area": float(windows_area),
"windows_area": float(windows_area) if windows_area is not None else windows_area,
}
def get_node_value(self, tag_name):
@ -769,9 +782,10 @@ class XmlParser:
:return:
"""
sap_windows = self.xml.getElementsByTagName("SAP-Windows")[0].getElementsByTagName("SAP-Window")
glazing_type_lookup = {
"ND": "Single glazing",
"1": "double glazing installed before 2002",
"2": "double glazing installed during or after 2002",
"3": "double glazing, unknown install date",
"5": "Single glazing",
}
@ -787,6 +801,40 @@ class XmlParser:
"8": "North West"
}
sap_windows = self.xml.getElementsByTagName("SAP-Windows")
if not sap_windows:
# We look for Multi-Glazed-Proportion
multiple_glazing_type = self.xml.getElementsByTagName("SAP-Property-Details")[0].getElementsByTagName(
"Multiple-Glazing-Type"
)[0].firstChild.nodeValue
pvc_frame = self.xml.getElementsByTagName("SAP-Property-Details")[0].getElementsByTagName(
"PVC-Window-Frames"
)
pvc_frame = pvc_frame[0].firstChild.nodeValue if pvc_frame else None
multple_glazed_proportion = self.xml.getElementsByTagName("SAP-Property-Details")[0].getElementsByTagName(
"Multiple-Glazed-Proportion"
)[0].firstChild.nodeValue
self.windows = [
{
"window_location": "0",
"window_area": None,
"window_type": None,
"glazing_type": glazing_type_lookup[multiple_glazing_type],
"pvc_frame": pvc_frame,
"glazing_gap": None,
"orientation": None,
"multple_glazed_proportion": multple_glazed_proportion
}
]
return
sap_windows = sap_windows[0].getElementsByTagName("SAP-Window")
self.windows = [
self._parse_windows_content(
window=window,

View file

@ -1,12 +0,0 @@
address,postcode,Notes,,,,
28 Distillery Wharf,W6 9bf,,,,,
Flat 14 Godley V C House,E2 0LP,,,,,
49 Elderfield Road,E5 0LF,,,,,
26 Stanhope Road,N6 5NG,,,,,
Flat 3 Frederick Building,N1 4BD,,,,,
Flat 4 Frederick Building,N1 4BD,,,,,
"Flat 28, 22 Adelina Grove",E1 3BX,,,,,
"Flat 39, 239 Long Lane",SE1 4PT,,,,,
"1, Westview, Somerby",LE14 2QH,This property has an unfilled cavity,,,,
"59, Ashdale",CM23 4EB,This property has a partially filled cavity,,,,
88 Cleveland Avenue,DL3 7BE,This property has a filled cavity,,,,
1 address postcode Notes
2 28 Distillery Wharf W6 9bf
3 Flat 14 Godley V C House E2 0LP
4 49 Elderfield Road E5 0LF
5 26 Stanhope Road N6 5NG
6 Flat 3 Frederick Building N1 4BD
7 Flat 4 Frederick Building N1 4BD
8 Flat 28, 22 Adelina Grove E1 3BX
9 Flat 39, 239 Long Lane SE1 4PT
10 1, Westview, Somerby LE14 2QH This property has an unfilled cavity
11 59, Ashdale CM23 4EB This property has a partially filled cavity
12 88 Cleveland Avenue DL3 7BE This property has a filled cavity

View file

@ -1,3 +0,0 @@
address,postcode,Notes,,,,
2 South Terrace,NN1 5JY,,,,,
25 Albert Street,PO12 4TY,,,,,
1 address postcode Notes
2 2 South Terrace NN1 5JY
3 25 Albert Street PO12 4TY

View file

@ -37,22 +37,25 @@ MCS_SOLAR_PV_COST_DATA = {
"average_cost_per_kwh-Northern Ireland": 1347,
}
# Installers are now working with 435 watt panels
PANEL_SIZE = 0.435
INSTALLER_SOLAR_COSTS = [
{'n_panels': 4, 'array_kwp': 1.6, 'cost': 3040.00, 'installer': 'CEG'},
{'n_panels': 5, 'array_kwp': 2.1, 'cost': 3201.00, 'installer': 'CEG'},
{'n_panels': 6, 'array_kwp': 2.5, 'cost': 3363.00, 'installer': 'CEG'},
{'n_panels': 7, 'array_kwp': 2.9, 'cost': 3524.00, 'installer': 'CEG'},
{'n_panels': 8, 'array_kwp': 3.3, 'cost': 3686.00, 'installer': 'CEG'},
{'n_panels': 9, 'array_kwp': 3.7, 'cost': 3847.00, 'installer': 'CEG'},
{'n_panels': 10, 'array_kwp': 4.1, 'cost': 4009.00, 'installer': 'CEG'},
{'n_panels': 11, 'array_kwp': 4.5, 'cost': 4170.00, 'installer': 'CEG'},
{'n_panels': 12, 'array_kwp': 4.9, 'cost': 4332.00, 'installer': 'CEG'},
{'n_panels': 13, 'array_kwp': 5.3, 'cost': 4835.00, 'installer': 'CEG'},
{'n_panels': 14, 'array_kwp': 5.7, 'cost': 5015.00, 'installer': 'CEG'},
{'n_panels': 15, 'array_kwp': 6.2, 'cost': 5176.00, 'installer': 'CEG'},
{'n_panels': 16, 'array_kwp': 6.6, 'cost': 5338.00, 'installer': 'CEG'},
{'n_panels': 17, 'array_kwp': 7.0, 'cost': 5500.00, 'installer': 'CEG'},
{'n_panels': 18, 'array_kwp': 7.4, 'cost': 6021.00, 'installer': 'CEG'}
{'n_panels': 4, 'array_kwp': 4 * PANEL_SIZE, 'cost': 4089.25, 'installer': 'CEG'},
{'n_panels': 5, 'array_kwp': 5 * PANEL_SIZE, 'cost': 4242.48, 'installer': 'CEG'},
{'n_panels': 6, 'array_kwp': 6 * PANEL_SIZE, 'cost': 4395.71, 'installer': 'CEG'},
{'n_panels': 7, 'array_kwp': 7 * PANEL_SIZE, 'cost': 4548.94, 'installer': 'CEG'},
{'n_panels': 8, 'array_kwp': 8 * PANEL_SIZE, 'cost': 4702.17, 'installer': 'CEG'},
{'n_panels': 9, 'array_kwp': 9 * PANEL_SIZE, 'cost': 4855.41, 'installer': 'CEG'},
{'n_panels': 10, 'array_kwp': 10 * PANEL_SIZE, 'cost': 5010.95, 'installer': 'CEG'},
{'n_panels': 11, 'array_kwp': 11 * PANEL_SIZE, 'cost': 5166.49, 'installer': 'CEG'},
{'n_panels': 12, 'array_kwp': 12 * PANEL_SIZE, 'cost': 5322.04, 'installer': 'CEG'},
{'n_panels': 13, 'array_kwp': 13 * PANEL_SIZE, 'cost': 5657.6, 'installer': 'CEG'},
{'n_panels': 14, 'array_kwp': 14 * PANEL_SIZE, 'cost': 5993.16, 'installer': 'CEG'},
{'n_panels': 15, 'array_kwp': 15 * PANEL_SIZE, 'cost': 6328.71, 'installer': 'CEG'},
{'n_panels': 16, 'array_kwp': 16 * PANEL_SIZE, 'cost': 6483.33, 'installer': 'CEG'},
{'n_panels': 17, 'array_kwp': 17 * PANEL_SIZE, 'cost': 6637.95, 'installer': 'CEG'},
{'n_panels': 18, 'array_kwp': 18 * PANEL_SIZE, 'cost': 6792.57, 'installer': 'CEG'}
]
# This is the maximum number of panels that we have a cost from the installers for
INSTALLER_MAX_PANELS = 18
@ -62,11 +65,11 @@ INSTALLER_MAX_PANELS = 18
INSTALLER_SOLAR_PV_INVERTER_COST = 7500
INSTALLER_SOLAR_PV_INVERTER_LABOUR_COST = 500 # Just a rough guess to labour costs
INSTALLER_SCAFFOLDING_COSTS = [
{'stories': 1, 'description': '1 Story Scaffold', 'cost': 531.00, 'installer': 'CEG'},
{'stories': 2, 'description': '2 Story Scaffold', 'cost': 841.00, 'installer': 'CEG'},
{'stories': 3, 'description': '3 Story Scaffold', 'cost': 1077.00, 'installer': 'CEG'}
]
# INSTALLER_SCAFFOLDING_COSTS = [
# {'stories': 1, 'description': '1 Story Scaffold', 'cost': 531.00, 'installer': 'CEG'},
# {'stories': 2, 'description': '2 Story Scaffold', 'cost': 841.00, 'installer': 'CEG'},
# {'stories': 3, 'description': '3 Story Scaffold', 'cost': 1077.00, 'installer': 'CEG'}
# ]
# This data is based on the MCS database, We use the larger figure between the 2023 and 2024 average,
# to be conservative
@ -101,10 +104,10 @@ INSTALLER_ASHP_COSTS = [
BOILER_UPGRADE_SCHEME_ASHP_VALUE = 7500
INSTALLER_SOLAR_BATTERY_COSTS = [
{'capacity_kwh': 5, 'description': 'Battery Add on', 'cost': 2700.00, 'installer': 'CEG'},
{'capacity_kwh': 10, 'description': 'Battery Add on', 'cost': 4300.00, 'installer': 'CEG'},
{'capacity_kwh': 5, 'description': 'Battery Retrofit existing system', 'cost': 4250.00, 'installer': 'CEG'},
{'capacity_kwh': 10, 'description': 'Battery Retrofit Existing system', 'cost': 5950.00, 'installer': 'CEG'}
{'capacity_kwh': 5, 'description': 'Battery Add on', 'cost': 3769.89, 'installer': 'JJC'},
# {'capacity_kwh': 10, 'description': 'Battery Add on', 'cost': 4300.00, 'installer': 'CEG'},
# {'capacity_kwh': 5, 'description': 'Battery Retrofit existing system', 'cost': 4250.00, 'installer': 'CEG'},
# {'capacity_kwh': 10, 'description': 'Battery Retrofit Existing system', 'cost': 5950.00, 'installer': 'CEG'}
]
# This is based on https://www.checkatrade.com/blog/cost-guides/cost-smart-thermostat/
@ -149,7 +152,7 @@ CONDENSING_BOILER_COSTS = {
ELECTRIC_BOILER_COSTS = 1800
# Assumes 1 hours to remove each heater (including re-decorating)
ROOM_HEATER_REMOVAL_COST = 50
ROOM_HEATER_REMOVAL_COST = 25
ROOM_HEATER_REMOVAL_LABOUR_HOURS = 3
# This is a cost quoted by Jim for a system flush - existig system will run more efficiently
@ -190,6 +193,8 @@ class Costs:
# fittings and trimming doors, as well as scope for damage to the existing wall during preparation.
IWI_CONTINGENCY = 0.2
# For air source heat pumps, we inflate the assume cost by quite a bit to account for design and installation
ASHP_CONTINGENCY = 0.35
# Where there is more uncertainty, a higher contingency rate is used
HIGH_RISK_CONTINGENCY = 0.2
# When there is less uncertainty, a lower contingency rate is used
@ -234,6 +239,13 @@ class Costs:
if self.region is None:
# Try and grab using the local-authority-label
self.region = county_to_region_map.get(self.property.data["local-authority-label"], None)
if self.region is None:
# Try and get the region after converting the keys to lower
self.region = {
k.lower(): v for k, v in county_to_region_map.items()
}.get(self.property.data["local-authority-label"].lower(), None)
if self.region is None:
raise ValueError("Region not found in county map")
@ -719,8 +731,9 @@ class Costs:
"labour_days": labour_days
}
@classmethod
def solar_pv(
self,
cls,
n_panels: int | float,
has_battery: bool = False,
array_cost=None,
@ -758,33 +771,28 @@ class Costs:
else:
system_cost = [c for c in INSTALLER_SOLAR_COSTS if c["n_panels"] == n_panels][0]["cost"]
total_cost = array_cost if array_cost is not None else system_cost
subtotal = array_cost if array_cost is not None else system_cost
if has_battery:
battery_cost = [c for c in INSTALLER_SOLAR_BATTERY_COSTS if c["capacity_kwh"] == battery_kwh][0]["cost"]
total_cost += battery_cost
scaffolding_cost = [c for c in INSTALLER_SCAFFOLDING_COSTS if c["stories"] == n_floors][0]["cost"]
total_cost += scaffolding_cost
subtotal += battery_cost
if needs_inverter:
total_cost += INSTALLER_SOLAR_PV_INVERTER_COST
subtotal += INSTALLER_SOLAR_PV_INVERTER_COST
# We also add an additional labour cost
total_cost += INSTALLER_SOLAR_PV_INVERTER_LABOUR_COST
subtotal += INSTALLER_SOLAR_PV_INVERTER_LABOUR_COST
# We add an additional cost for scaffolding
subtotal_before_vat = total_cost / (1 + self.VAT_RATE)
vat = total_cost - subtotal_before_vat
# Solar doesn't have VAT but we add a high risk contingency
# to account for design variation that we see in practice
total_cost = subtotal * (1 + cls.HIGH_RISK_CONTINGENCY)
# Labour hours are based on estimates from online research but an average team seems to consist of 3 people
# and most jobs take around 2 days. Assuming an 8 hour day for 3 people across 2 days, gives us 48 hours of
# labour
return {
"total": total_cost,
"subtotal": subtotal_before_vat,
"vat": vat,
"subtotal": subtotal,
"vat": 0,
"labour_hours": 48,
"labour_days": 2,
}
@ -1154,7 +1162,6 @@ class Costs:
pump. This cost will include the boiler upgrade scheme grant
"""
# This is the average cost of a project, we'll add some additional contingency
if ashp_size is None:
@ -1163,9 +1170,10 @@ class Costs:
cost = [x for x in INSTALLER_ASHP_COSTS if x][0]["cost"]
# We add some contingency since there are additional costs such as resizing radiators, that could be required
total_cost = cost * (1 + self.CONTINGENCY)
subtotal_before_vat = total_cost / (1 + self.VAT_RATE)
vat = total_cost - subtotal_before_vat
subtotal = cost * (1 + self.ASHP_CONTINGENCY)
# The costs from installers exclude VAT
vat = subtotal * self.VAT_RATE
total_cost = subtotal + vat
# We assume 5 days installation
labour_days = 5
@ -1173,7 +1181,7 @@ class Costs:
return {
"total": total_cost,
"subtotal": subtotal_before_vat,
"subtotal": subtotal,
"vat": vat,
"labour_hours": labour_hours,
"labour_days": labour_days,

View file

@ -26,6 +26,9 @@ class DraughtProofingRecommendations:
if not draught_proofing_recommendation_config:
return
# Cost is based on a £50 cost per window, based on Checkatrade
cost = draught_proofing_recommendation_config.get("cost", self.property.number_of_windows * 50)
description = (
"Draught proof doors and windows to improve energy efficiency" if
not draught_proofing_recommendation_config.get("description")
@ -48,7 +51,7 @@ class DraughtProofingRecommendations:
"kwh_savings": 0,
"co2_equivalent_savings": 0,
"energy_cost_savings": 0,
"total": draught_proofing_recommendation_config["cost"],
"total": cost,
# We use a very simple and rough estimate of 4 hours per unit
"labour_hours": draught_proofing_recommendation_config.get("labour_hours", 8),
"labour_days": draught_proofing_recommendation_config.get("labour_days", 1), # Assume 8 hour day

View file

@ -145,7 +145,9 @@ class FloorRecommendations(Definitions):
)
return
raise NotImplementedError("Implement me!")
# In this case, we have no recommendation to make. E.g., if we have a solid floor property
# but solid floor insulation has been excluded as a measure, we get here
return
@staticmethod
def _make_floor_description(material):
@ -172,6 +174,11 @@ class FloorRecommendations(Definitions):
insulation_materials = pd.DataFrame(insulation_materials)
non_invasive_recs = next(
(r for r in self.property.non_invasive_recommendations if
r["type"] == insulation_materials["type"].values[0]), {}
)
lowest_selected_u_value = None
for _, insulation_material_group in insulation_materials.groupby("description"):
@ -217,6 +224,9 @@ class FloorRecommendations(Definitions):
else:
raise NotImplementedError("Implement me!")
sap_points = non_invasive_recs.get("sap_points", None)
survey = non_invasive_recs.get("survey", False)
floor_ending_config = FloorAttributes(new_description).process()
floor_simulation_config = check_simulation_difference(
new_config=floor_ending_config, old_config=self.property.floor, prefix="floor_"
@ -245,7 +255,8 @@ class FloorRecommendations(Definitions):
"description": self._make_floor_description(material),
"starting_u_value": u_value,
"new_u_value": new_u_value,
"sap_points": None,
"sap_points": sap_points,
"survey": survey,
"already_installed": already_installed,
"simulation_config": simulation_config,
"description_simulation": {

View file

@ -12,7 +12,7 @@ class HeatingControlRecommender:
self.recommendation = []
def recommend(self, heating_description, description_prefix="", description_suffix=""):
def recommend(self, heating_description, phase, description_prefix="", description_suffix=""):
# TODO: Many of these functions are quite similar. We can possibly create a single wrapper function that
# takes in the heating description and the description prefix/suffix, and then creates the appropriate
@ -23,32 +23,32 @@ class HeatingControlRecommender:
# This first iteration of the recommender will provide very basic recommendation
# We recommend heating controls based on the main heating system
if heating_description in ["Room heaters, electric"]:
self.recommend_room_heaters_electric_controls()
self.recommend_room_heaters_electric_controls(phase=phase)
return
if heating_description in ["Electric storage heaters", "Electric storage heaters, radiators"]:
self.recommend_high_heat_retention_controls(description_prefix=description_prefix)
self.recommend_high_heat_retention_controls(description_prefix=description_prefix, phase=phase)
return
if heating_description in ["Boiler and radiators, mains gas"]:
# We can recommend roomstat programmer trvs
self.recommend_roomstat_programmer_trvs(description_suffix=description_suffix)
self.recommend_roomstat_programmer_trvs(description_suffix=description_suffix, phase=phase)
# We can also recommend time and temperature zone controls
self.recommend_time_temperature_zone_controls(description_suffix=description_suffix)
self.recommend_time_temperature_zone_controls(description_suffix=description_suffix, phase=phase)
return
if heating_description in ["Boiler and radiators, electric"]:
self.recommend_roomstat_programmer_trvs()
self.recommend_roomstat_programmer_trvs(phase=phase)
return
if heating_description in ["Air source heat pump, radiators, electric"]:
# For an ASHP, we can recommend time and temperature zone controls, as well as programmer, trvs and a bypass
# which are common configurations for ASHPs
self.recommend_time_temperature_zone_controls()
self.recommend_time_temperature_zone_controls(phase=phase)
# self.recommend_programmer_trvs_bypass()
def recommend_room_heaters_electric_controls(self):
def recommend_room_heaters_electric_controls(self, phase):
"""
If the home has Room heaters, electric, we start by identifying potential heating controls that could
be upgraded, that would provide a practical impact. This will be the least invasive improvement.
@ -88,6 +88,9 @@ class HeatingControlRecommender:
self.recommendation.append(
{
"phase": phase,
"type": "heating",
"measure_type": "programmer_appliance_thermostat",
"description": "upgrade heating controls to Programmer and Appliance or Smart Thermostats",
**self.costs.programmer_and_appliance_thermostat(has_programmer=has_programmer),
"simulation_config": simulation_config
@ -97,7 +100,7 @@ class HeatingControlRecommender:
# We don't implement any other recommendations right now
return
def recommend_high_heat_retention_controls(self, description_prefix=""):
def recommend_high_heat_retention_controls(self, phase, description_prefix=""):
"""
When applicable, we recommend upgrading the heating controls to high heat retention controls. This is a
specific type of control system that is designed to work with electric storage heaters. It is a more
@ -133,6 +136,9 @@ class HeatingControlRecommender:
self.recommendation.append(
{
"phase": phase,
"type": "heating",
"measure_type": "celect_type_controls",
"description": "Upgrade heating controls to High Heat Retention Storage Heater Controls",
**self.costs.celect_type_controls(),
"simulation_config": simulation_config,
@ -143,7 +149,7 @@ class HeatingControlRecommender:
# We don't implement any other recommendations right now
return
def recommend_roomstat_programmer_trvs(self, description_suffix=""):
def recommend_roomstat_programmer_trvs(self, phase, description_suffix=""):
"""
If the home has a boiler and radiators, mains gas, we start by identifying potential heating controls that could
be upgraded, that would provide a practical impact.
@ -208,15 +214,16 @@ class HeatingControlRecommender:
description = "Upgrade heating controls to Room thermostat, programmer and TRVs"
already_installed = "heating_control" in self.property.already_installed
already_installed = "roomstat_programmer_trvs" in self.property.already_installed
if already_installed:
cost_result = override_costs(cost_result)
description = "Heating controls have already been upgraded, no further action needed."
self.recommendation.append(
{
"type": "heating_control",
"type": "heating",
"measure_type": "roomstat_programmer_trvs",
"phase": phase,
"parts": [],
"description": description,
**cost_result,
@ -231,7 +238,7 @@ class HeatingControlRecommender:
return
def recommend_time_temperature_zone_controls(self, description_suffix=""):
def recommend_time_temperature_zone_controls(self, phase, description_suffix=""):
"""
If the home has a boiler, we can recommend time and temperature zone controls. This is a more advanced
and more efficient control system than the standard controls that come with a boiler. However, it may come
@ -282,14 +289,15 @@ class HeatingControlRecommender:
"temperature zone control)"
)
already_installed = "heating_control" in self.property.already_installed
already_installed = "time_temperature_zone_control" in self.property.already_installed
if already_installed:
cost_result = override_costs(cost_result)
description = "Heating controls have already been upgraded, no further action needed."
self.recommendation.append(
{
"type": "heating_control",
"type": "heating",
"phase": phase,
"measure_type": "time_temperature_zone_control",
"parts": [],
"description": description,
@ -335,14 +343,15 @@ class HeatingControlRecommender:
description = "Install a Bypass valve, TRVs and a Programmer"
already_installed = "heating_control" in self.property.already_installed
already_installed = "programmer_trvs_bypass" in self.property.already_installed
if already_installed:
cost_result = override_costs(cost_result)
description = "Heating controls have already been upgraded, no further action needed."
self.recommendation.append(
{
"type": "heating_control",
"type": "heating",
"measure_type": "programmer_trvs_bypass",
"parts": [],
"description": description,
**cost_result,

View file

@ -65,7 +65,6 @@ class HeatingRecommender:
self.costs = Costs(self.property)
self.heating_recommendations = []
self.heating_control_recommendations = []
self.has_electric_heating_description = (
self.property.main_heating["has_electric"] or self.property.main_heating["has_electricaire"]
@ -259,7 +258,6 @@ class HeatingRecommender:
"ashp_only_heating_recommendation", False
)
self.heating_recommendations = []
self.heating_control_recommendations = []
# This first iteration of the recommender will provide very basic recommendation
# We recommend heating controls based on the main heating system
@ -302,7 +300,6 @@ class HeatingRecommender:
self.recommend_air_source_heat_pump(
phase=phase,
has_cavity_or_loft_recommendations=has_cavity_or_loft_recommendations,
)
return
@ -360,7 +357,7 @@ class HeatingRecommender:
}
controls_recommender = HeatingControlRecommender(self.property)
controls_recommender.recommend(heating_description="Boiler and radiators, electric")
controls_recommender.recommend(heating_description="Boiler and radiators, electric", phase=phase)
self.heating_recommendations.extend([boiler_recommendation] + controls_recommender.recommendation)
return
@ -453,7 +450,7 @@ class HeatingRecommender:
), {})
controls_recommender = HeatingControlRecommender(self.property)
controls_recommender.recommend(heating_description="Air source heat pump, radiators, electric")
controls_recommender.recommend(heating_description="Air source heat pump, radiators, electric", phase=phase)
ashp_size = self.size_heat_pump()
ashp_costs = self.costs.air_source_heat_pump(ashp_size)
@ -631,7 +628,8 @@ class HeatingRecommender:
heating_controls_only,
system_change,
system_type,
measure_type
measure_type,
non_intrusive_recommendation=None
):
"""
Given a recommendation for heating controls, and a recommendation for the heating system, we combine the two
@ -649,8 +647,13 @@ class HeatingRecommender:
:param system_type: The type of heating system we are recommending
:param measure_type: The type of measure we are recommending - more granular than the "type" field, allowing us
to distinguish between different types of heating recommendations
:param non_intrusive_recommendation: A non-intrusive recommendation, which may specify the number of SAP points
or a cost for this recommendation
"""
if non_intrusive_recommendation is None:
non_intrusive_recommendation = {}
# We produce recommendations with & without heating controls
# We will also produce a recommendation for heating controls only
heating_controls_switch = [True, False] if controls_recommendations else [False]
@ -698,13 +701,14 @@ class HeatingRecommender:
"description": recommendation_description,
"starting_u_value": None,
"new_u_value": None,
"sap_points": None,
"sap_points": non_intrusive_recommendation.get("sap_points"),
"already_installed": already_installed,
**total_costs,
"simulation_config": recommendation_simulation_config,
"description_simulation": recommendation_description_simulation,
# We insert the heating system type here
"system_type": system_type
"system_type": system_type,
"survey": non_intrusive_recommendation.get("survey", False)
}
output.append(recommendation)
@ -798,7 +802,9 @@ class HeatingRecommender:
description_prefix = ""
controls_recommender.recommend(
heating_description="Electric storage heaters", description_prefix=description_prefix
heating_description="Electric storage heaters",
description_prefix=description_prefix,
phase=phase
)
has_hhr = self.is_hhr_already_installed()
@ -807,6 +813,13 @@ class HeatingRecommender:
# No recommendation needed
return
# We check if there is a high heat retention non-intrusive recommendation
non_intrusive_recommendation = next(
(r for r in self.property.non_invasive_recommendations if
r["type"] == "high_heat_retention_storage_heater"),
{}
)
# We check if the property has dual heating in place with a boiler and storage heaters
if self.dual_heating:
new_heating_description = self.DUAL_HEATING_DESCRIPTIONS[
@ -838,6 +851,8 @@ class HeatingRecommender:
else:
heating_simulation_config["mainheat_energy_eff_ending"] = self.property.data["mainheat-energy-eff"]
# TODO:We possibly shouldn't touch the hot water energy efficiency if we aren't recommending dual immersion
# we'll keep this for the moment though
if self.property.data["hot-water-energy-eff"] in ["Very Poor", "Poor"]:
heating_simulation_config["hot_water_energy_eff_ending"] = "Average"
else:
@ -895,7 +910,8 @@ class HeatingRecommender:
heating_controls_only=heating_controls_only,
system_change=system_change,
system_type="high_heat_retention_storage_heater",
measure_type="high_heat_retention_storage_heater"
measure_type="high_heat_retention_storage_heater",
non_intrusive_recommendation=non_intrusive_recommendation
)
if _return:
return recommendations
@ -978,9 +994,13 @@ class HeatingRecommender:
# We check if there's a mains connection and the hot water is inefficient, as this will improve with a boiler
has_inefficient_water = (
self.property.data["mains-gas-flag"] and
self.property.data["hot-water-energy-eff"] in ["Very Poor", "Poor", "Average"]
self.property.data["hot-water-energy-eff"] in ["Very Poor", "Poor"]
)
non_invasive_recommendation = next((
r for r in self.property.non_invasive_recommendations if r["type"] == "boiler_upgrade"
), {})
if has_inefficient_space_heating or has_inefficient_water:
boiler_size = self.estimate_boiler_size(
property_type=self.property.data["property-type"],
@ -1079,12 +1099,13 @@ class HeatingRecommender:
"description": description,
"starting_u_value": None,
"new_u_value": None,
"sap_points": None,
"sap_points": non_invasive_recommendation.get("sap_points", None),
"already_installed": already_installed,
"simulation_config": simulation_config,
"description_simulation": description_simulation,
**boiler_costs,
"system_type": "boiler_upgrade",
"survey": non_invasive_recommendation.get("survey", None)
}
# We recommend the heating controls
@ -1098,10 +1119,10 @@ class HeatingRecommender:
description_suffix = ""
controls_recommender.recommend(
heating_description="Boiler and radiators, mains gas",
description_suffix=description_suffix
description_suffix=description_suffix,
phase=recommendation_phase
)
# We may have 2 recommendations from the heating controls
if not controls_recommender.recommendation and not boiler_recommendation:
return
@ -1111,6 +1132,8 @@ class HeatingRecommender:
if system_change:
# We combine the heating and controls recommendations, in the case of a system change
# If this is true, we set SAP points to None and survey to False for the boiler recommendation
combined_recommendations = []
for controls_recommendation in controls_recommender.recommendation:
combined_recommendation = self.combine_heating_and_controls(
@ -1137,10 +1160,6 @@ class HeatingRecommender:
# 3) Heating controls only
# But they are options that are not mutually exclusive
# So, we actually set heating controls as a heating recommendation
for recommendation in controls_recommender.recommendation:
recommendation["phase"] = recommendation_phase
# recommendation["type"] = "heating"
self.heating_control_recommendations.extend(controls_recommender.recommendation)
self.heating_recommendations.extend(controls_recommender.recommendation)
return

View file

@ -20,26 +20,66 @@ class HotwaterRecommendations:
:return:
"""
# Reset the recommendations
recommendations_phase = phase
self.recommendations = []
non_invasive_recommendations = self.property.non_invasive_recommendations
if non_invasive_recommendations:
measures = [
r["type"] for r in non_invasive_recommendations if
r["type"] in ["hot_water_tank_insulation", "cylinder_thermostat"]
]
for m in measures:
non_invasive_rec = [
r for r in non_invasive_recommendations if r["type"] == m
][0]
if m == "hot_water_tank_insulation":
# We need to be able to stack these recommendations
self.recommend_tank_insulation(
phase=recommendations_phase,
sap_points=non_invasive_rec["sap_points"],
survey=non_invasive_rec["survey"],
)
recommendations_phase += 1
elif m == "cylinder_thermostat":
self.recommend_cylinder_thermostat(
phase=recommendations_phase,
sap_points=non_invasive_rec["sap_points"],
survey=non_invasive_rec["survey"],
)
recommendations_phase += 1
# This first iteration of the recommender will provide very basic recommendation
# We recommend heating controls based on the main heating system
# If there is no system present, but access to the mains, we
if self.property.hotwater["clean_description"] == "Gas boiler/circulator, no cylinder thermostat":
# Handle this case specifically:
self.recommend_cylinder_thermostat_gas_boiler_circulator(phase=recommendations_phase)
return
# If there is no system present, but access to the mains, we
has_tank_recommendation = [r for r in self.recommendations if r["type"] == "hot_water_tank_insulation"]
if (
(self.property.hotwater["heater_type"] in ["electric immersion"]) &
(self.property.data["hot-water-energy-eff"] == "Very Poor") &
(self.property.hotwater["no_system_present"] is None)
(self.property.hotwater["no_system_present"] is None) &
(len(has_tank_recommendation) == 0)
):
self.recommend_tank_insulation(phase=phase)
self.recommend_tank_insulation(phase=recommendations_phase)
return
if self.property.hotwater["clean_description"] == "From main system, no cylinder thermostat":
self.recommend_cylinder_thermostat(phase=phase)
has_cylinder_recommendation = [r for r in self.recommendations if r["type"] == "cylinder_thermostat"]
if ((self.property.hotwater["clean_description"] == "From main system, no cylinder thermostat") &
(len(has_cylinder_recommendation) == 0)):
self.recommend_cylinder_thermostat(phase=recommendations_phase)
return
def recommend_tank_insulation(self, phase):
def recommend_tank_insulation(self, phase, sap_points=None, survey=False, _return=False):
"""
If the home has a very poor hot water system, this is often indicative of a lack of insulation on the hot water
tank. This is a very simple and cost effective improvement that can be made to the home. It will likely
@ -55,27 +95,30 @@ class HotwaterRecommendations:
else:
description = "Insulate hot water tank"
self.recommendations.append(
{
"phase": phase,
"parts": [],
"type": "hot_water_tank_insulation",
"measure_type": "hot_water_tank_insulation",
"description": description,
"starting_u_value": None,
"new_u_value": None,
"sap_points": None,
"already_installed": already_installed,
**recommendation_cost,
"simulation_config": {"hot_water_energy_eff_ending": "Poor"},
"description_simulation": {
"hot-water-energy-eff": "Poor"
}
}
)
to_append = {
"phase": phase,
"parts": [],
"type": "hot_water_tank_insulation",
"measure_type": "hot_water_tank_insulation",
"description": description,
"starting_u_value": None,
"new_u_value": None,
"sap_points": sap_points,
"already_installed": already_installed,
**recommendation_cost,
"simulation_config": {"hot_water_energy_eff_ending": "Poor"},
"description_simulation": {
"hot-water-energy-eff": "Poor"
},
"survey": survey
}
if _return:
return to_append
self.recommendations.append(to_append)
return
def recommend_cylinder_thermostat(self, phase):
def recommend_cylinder_thermostat(self, phase, sap_points=None, survey=False, _return=False):
"""
If the home has a very poor hot water system, this is often indicative of a lack of insulation on the hot water
tank. This is a very simple and cost effective improvement that can be made to the home.
@ -101,23 +144,86 @@ class HotwaterRecommendations:
**hotwater_simulation_config
}
self.recommendations.append(
{
"phase": phase,
"parts": [],
"type": "cylinder_thermostat",
"measure_type": "cylinder_thermostat",
"description": description,
"starting_u_value": None,
"new_u_value": None,
"sap_points": None,
"already_installed": already_installed,
**recommendation_cost,
"simulation_config": simulation_config,
"description_simulation": {
"hot-water-energy-eff": self.property.data["hot-water-energy-eff"],
"hotwater-description": new_epc_description,
}
}
)
to_append = {
"phase": phase,
"parts": [],
"type": "cylinder_thermostat",
"measure_type": "cylinder_thermostat",
"description": description,
"starting_u_value": None,
"new_u_value": None,
"sap_points": sap_points,
"already_installed": already_installed,
**recommendation_cost,
"simulation_config": simulation_config,
"description_simulation": {
"hot-water-energy-eff": self.property.data["hot-water-energy-eff"],
"hotwater-description": new_epc_description,
},
"survey": survey
}
if _return:
return to_append
self.recommendations.append(to_append)
return
def recommend_cylinder_thermostat_gas_boiler_circulator(self, phase):
"""
If the home has a very poor hot water system, this is often indicative of a lack of insulation on the
hot water
tank. This is a very simple and cost effective improvement that can be made to the home.
"""
thermostat_recommendation_cost = self.costs.cylinder_thermostat()
cylinder_recommendation_cost = self.costs.hot_water_tank_insulation()
# Add them
total_cost = {
k: thermostat_recommendation_cost[k] + cylinder_recommendation_cost[k] for k in
thermostat_recommendation_cost.keys()
}
already_installed = "cylinder_thermostat" in self.property.already_installed
if already_installed:
total_cost = override_costs(total_cost)
description = "Cylinder thermostat & insulation has already been installed, no further action required"
else:
description = "Install a smart cylinder thermostat and insulate the hot water tank with 80mm insulation"
new_epc_description = "From main system"
hotwater_ending_config = HotWaterAttributes(new_epc_description).process()
hotwater_simulation_config = check_simulation_difference(
new_config=hotwater_ending_config, old_config=self.property.hotwater
)
if self.property.data["hot-water-energy-eff"] in ["Very Poor", "Poor", "Average"]:
new_efficiency = "Good"
else:
new_efficiency = self.property.data["hot-water-energy-eff"]
simulation_config = {
"hot_water_energy_eff_ending": new_efficiency,
**hotwater_simulation_config
}
to_append = {
"phase": phase,
"parts": [],
"type": "cylinder_thermostat",
"measure_type": "cylinder_thermostat",
"description": description,
"starting_u_value": None,
"new_u_value": None,
"sap_points": None,
"already_installed": already_installed,
**total_cost,
"simulation_config": simulation_config,
"description_simulation": {
"hot-water-energy-eff": simulation_config["hot_water_energy_eff_ending"],
"hotwater-description": new_epc_description,
},
"survey": False
}
self.recommendations.append(to_append)
return

View file

@ -4,6 +4,7 @@ from backend.Property import Property
from typing import List
from recommendations.Costs import Costs
from recommendations.recommendation_utils import override_costs
from backend.ml_models.AnnualBillSavings import AnnualBillSavings
class LightingRecommendations:
@ -161,6 +162,7 @@ class LightingRecommendations:
# the proportion of lights that will be set to low energy
"sap_points": sap_points,
"kwh_savings": heat_demand_change,
"energy_cost_savings": heat_demand_change * AnnualBillSavings.ELECTRICITY_PRICE_CAP,
"co2_equivalent_savings": carbon_change,
"description_simulation": {
"lighting-energy-eff": "Very Good",

View file

@ -142,19 +142,17 @@ class Recommendations:
# Ventilation recommendations
# We only produce a ventilation recommendation if the property is recommended to have wall or roof
# insulation
# We will not attribute a SAP impact to the ventilation recommendation, since we've seen that this
# has no
# real impact on the SAP score. Therefore, we don't need to include phasing for ventilation. If we
# have any
# wall or roof recommendations, we will ensure that ventilation is included in the simulation
# insulation We will not attribute a SAP impact to the ventilation recommendation, since we've seen that this
# has no real impact on the SAP score. Therefore, we don't need to include phasing for ventilation. If we
# have any wall or roof recommendations, we will ensure that ventilation is included in the simulation
if (
(self.wall_recomender.recommendations or self.roof_recommender.recommendations) and
("ventilation" in measures)
):
self.ventilation_recomender.recommend()
self.ventilation_recomender.recommend(phase=phase)
if self.ventilation_recomender.recommendation:
property_recommendations.append(self.ventilation_recomender.recommendation)
phase += 1
if "trickle_vents" in measures:
# This is a recommendatin that typically comes from an energy assessment
@ -211,27 +209,25 @@ class Recommendations:
measures=measures,
has_cavity_or_loft_recommendations=has_cavity_or_loft_recommendations,
)
if (
self.heating_recommender.heating_recommendations or
self.heating_recommender.heating_control_recommendations
):
if self.heating_recommender.heating_recommendations:
# We split into first and second phase recommendations
first_phase_recommendations = [
r for r in (
self.heating_recommender.heating_recommendations +
self.heating_recommender.heating_control_recommendations
self.heating_recommender.heating_recommendations
)
if r["phase"] == phase
]
second_phase_recommendations = [
r for r in (
self.heating_recommender.heating_recommendations +
self.heating_recommender.heating_control_recommendations
self.heating_recommender.heating_recommendations
)
if r["phase"] == phase + 1
]
if first_phase_recommendations and second_phase_recommendations:
raise Exception("Imeplement me")
if first_phase_recommendations:
property_recommendations.append(first_phase_recommendations)
@ -243,8 +239,7 @@ class Recommendations:
# otherwise we incremenet by 1
max_used_phase = max(
[rec["phase"] for rec in
self.heating_recommender.heating_recommendations +
self.heating_recommender.heating_control_recommendations]
self.heating_recommender.heating_recommendations]
)
amount_to_increment = max_used_phase - phase + 1
phase += amount_to_increment
@ -253,8 +248,13 @@ class Recommendations:
if "hot_water" in measures:
self.hotwater_recommender.recommend(phase=phase)
if self.hotwater_recommender.recommendations:
property_recommendations.append(self.hotwater_recommender.recommendations)
phase += 1
if len(self.hotwater_recommender.recommendations) > 1:
for r in self.hotwater_recommender.recommendations:
property_recommendations.append([r])
phase += 1
else:
property_recommendations.append(self.hotwater_recommender.recommendations)
phase += 1
if "secondary_heating" in measures:
self.secondary_heating_recommender.recommend(phase=phase)
@ -304,12 +304,12 @@ class Recommendations:
# want to include the cavity wall insulation recommendation in the defaults
if recommendations_by_type[0].get("type") in [
"mechanical_ventilation", "trickle_vents", "draught_proofing"
"trickle_vents", "draught_proofing"
]:
continue
has_u_value = recommendations_by_type[0].get("new_u_value") is not None
has_sap_points = recommendations_by_type[0].get("sap_points") is not None
has_sap_points = all([r.get("sap_points") is not None for r in recommendations_by_type])
has_rank = recommendations_by_type[0].get("rank") is not None
# When check if these recommendations have two different types, such as solid wall insulation
@ -447,6 +447,7 @@ class Recommendations:
property_instance,
all_predictions,
recommendations,
representative_recommendations,
):
"""
@ -460,6 +461,7 @@ class Recommendations:
:param property_instance: Instance of the Property class, for the home associated to property_id
:param all_predictions: dictionary of predictions from the model apis
:param recommendations: dictionary of recommendations for the property
:param representative_recommendations: dictionary of representative recommendations for the property
:return:
"""
@ -471,15 +473,20 @@ class Recommendations:
property_recommendations = recommendations[property_instance.id].copy()
representative_recs = representative_recommendations[property_instance.id].copy()
representative_ids = [r["recommendation_id"] for r in representative_recs]
increasing_variables = ["sap"]
decreasing_variables = ["carbon", "heat_demand"]
# If the recommendation is mechanical ventilation, we don't apply the rule that the new value should be higher
mv_increasing_variables = ["carbon", "heat_demand"]
mv_decreasing_variables = ["sap"]
impact_summary = []
for recommendations_by_type in property_recommendations:
for rec in recommendations_by_type:
if rec["type"] in [
"mechanical_ventilation", "trickle_vents", "draught_proofing", "extension_cavity_wall_insulation"
]:
if rec["type"] in ["trickle_vents", "draught_proofing", "extension_cavity_wall_insulation"]:
# We don't have a percieved sap impact of mechanical ventilation or trickle vents, and we don't
# have the capacity to score draught proofing
if rec["type"] == "extension_cavity_wall_insulation":
@ -497,7 +504,9 @@ class Recommendations:
impact_summary.append(
{
"phase": rec["phase"],
"representative": rec["recommendation_id"] in representative_ids,
"recommendation_id": rec["recommendation_id"],
"measure_type": rec["measure_type"],
"sap": sap + rec["sap_points"],
"carbon": carbon - rec["co2_equivalent_savings"],
"heat_demand": heat_demand - rec["heat_demand"],
@ -519,15 +528,21 @@ class Recommendations:
# heating_cost_starting and heating_cost_ending are just the values in the EPC. However, with
# heating_cost_ending, we expect that the EPC will predict a heating cost based on what would happen
# if we implemented the recommendation today, so our starting value is the EPC
previous_phase_values = {
"sap": float(property_instance.data["current-energy-efficiency"]),
# For carbon, even though we generally use the updated figure which includes the carbon
# associated to appliances, for this scoring process we use the EPC carbon value. This means
# that we don't overestimate the impact since the model uses the EPC carbon value
"carbon": float(property_instance.data["co2-emissions-current"]),
"heat_demand": float(property_instance.data["energy-consumption-current"]),
}
else:
previous_phase_values_multiple = [x for x in impact_summary if x["phase"] == (rec["phase"] - 1)]
previous_phase_values_multiple = [
x for x in impact_summary if x["phase"] == (rec["phase"] - 1) and x["representative"]
]
if len(previous_phase_values_multiple) != 1:
# Take an average of each of the previous phases
keys_to_median = ["sap", "carbon", "heat_demand"]
@ -541,8 +556,13 @@ class Recommendations:
previous_phase_values = previous_phase_values_multiple[0]
# We extract the values for the current phase
if rec.get("survey", False):
current_phase_sap = rec["sap_points"] + previous_phase_values["sap"]
else:
current_phase_sap = phase_energy_efficiency_metrics["sap_change"]
current_phase_values = {
"sap": phase_energy_efficiency_metrics["sap_change"],
"sap": current_phase_sap,
"carbon": phase_energy_efficiency_metrics["carbon_change"],
"heat_demand": phase_energy_efficiency_metrics["heat_demand"],
}
@ -552,13 +572,23 @@ class Recommendations:
# For decreasing variables, the new value should be lower than the previous, otherwise we set it to
# the previous
# In either case, we adjudge the recommendation to have had no/negligible impact
for v in increasing_variables:
# However, if the recommendation is mechanical ventilation, this can have a negative SAP impact so
# we don't apply this rule
if rec["type"] == "mechanical_ventilation":
phase_increasing_variables = mv_increasing_variables
phase_decreasing_variables = mv_decreasing_variables
else:
phase_increasing_variables = increasing_variables
phase_decreasing_variables = decreasing_variables
for v in phase_increasing_variables:
current_phase_values[v] = (
current_phase_values[v] if current_phase_values[v] > previous_phase_values[v] else
previous_phase_values[v]
)
for v in previous_phase_values:
if v in decreasing_variables:
if v in phase_decreasing_variables:
current_phase_values[v] = (
current_phase_values[v] if current_phase_values[v] < previous_phase_values[v] else
previous_phase_values[v]
@ -573,13 +603,19 @@ class Recommendations:
"heat_demand": previous_phase_values["heat_demand"] - current_phase_values["heat_demand"],
}
# Prevent from being negative
# Prevent from being negative - apart from ventilation
for metric in ["sap", "carbon", "heat_demand"]:
property_phase_impact[metric] = (
0 if property_phase_impact[metric] < 0 else property_phase_impact[metric]
)
if metric == "sap":
property_phase_impact[metric] = round(property_phase_impact[metric], 2)
if rec["type"] != "mechanical_ventilation":
property_phase_impact[metric] = (
0 if property_phase_impact[metric] < 0 else property_phase_impact[metric]
)
if metric == "sap":
property_phase_impact[metric] = round(property_phase_impact[metric], 2)
else:
# We prevent these from being positive
property_phase_impact[metric] = (
0 if property_phase_impact[metric] > 0 else property_phase_impact[metric]
)
# For the moment, we cap the number of SAP points that can be achieved by LEDs at 2
if rec["type"] == "low_energy_lighting":
@ -599,11 +635,18 @@ class Recommendations:
# By limiting here, we don't change the value in current_phase_values. This means that the
# future recommendations won't have an impact that is too large
li_sap_limit = RoofRecommendations.get_loft_insulation_sap_limit(
property_instance.data["roof-energy-eff"], property_instance.data["extension-count"]
property_instance.data["roof-energy-eff"], property_instance.roof["insulation_thickness"]
)
if li_sap_limit is not None:
property_phase_impact["sap"] = min(property_phase_impact["sap"], li_sap_limit)
if rec["type"] == "solar_pv":
# We use the SAP points in the recommendation as a minimum
property_phase_impact["sap"] = (
rec["sap_points"] if property_phase_impact["sap"] < rec["sap_points"] else
property_phase_impact["sap"]
)
# Insert this information into the recommendation.
if not rec.get("survey", False):
rec["sap_points"] = property_phase_impact["sap"]
@ -620,7 +663,9 @@ class Recommendations:
impact_summary.append(
{
"phase": rec["phase"],
"representative": rec["recommendation_id"] in representative_ids,
"recommendation_id": rec["recommendation_id"],
"measure_type": rec["measure_type"],
**current_phase_values
}
)
@ -628,7 +673,9 @@ class Recommendations:
return property_recommendations, impact_summary
@staticmethod
def map_descriptions_to_fuel(heating_description, hotwater_description, main_fuel_description):
def map_descriptions_to_fuel(
heating_description, hotwater_description, main_fuel_description, descriptions_to_fuel_types
):
# Handle the case of community schemes
if (heating_description == "Community scheme") or (hotwater_description == "Community scheme"):
@ -641,7 +688,7 @@ class Recommendations:
}
raise NotImplementedError("Handle this case")
mapped = assumptions.DESCRIPTIONS_TO_FUEL_TYPES[heating_description]
mapped = descriptions_to_fuel_types[heating_description]
heating_fuel = mapped["fuel"]
if hotwater_description in [
@ -661,7 +708,7 @@ class Recommendations:
"heating_cop": mapped["cop"], "hotwater_cop": 1
}
mapped_hotwater = assumptions.DESCRIPTIONS_TO_FUEL_TYPES[hotwater_description]
mapped_hotwater = descriptions_to_fuel_types[hotwater_description]
return {
"heating_fuel_type": heating_fuel, "hotwater_fuel_type": mapped_hotwater["fuel"],
@ -670,17 +717,24 @@ class Recommendations:
@classmethod
def calculate_recommendation_tenant_savings(
cls, property_instance, kwh_simulation_predictions, property_recommendations
cls, property_instance, kwh_simulation_predictions, property_recommendations, ashp_cop=None
):
"""
This method inserts the kwh savings and the bill savings that the customer will make from the recommendations
based on the predictions from the ML model
It also ensures we base our solar savings and solar carbon savings from the calculations based on
the solar API and size of the array, instead of ML model
:param property_instance: Instance of the Property class, for the home associated to property_id
:param kwh_simulation_predictions: dictionary of predictions from the model apis
:param property_recommendations: dictionary of recommendations for the property
:param ashp_cop: The coefficient of performance for the air source heat pump.
:return:
"""
ashp_cop = ashp_cop if ashp_cop else assumptions.AVERAGE_ASHP_EFFICIENCY
kwh_impact_table = kwh_simulation_predictions["heating_kwh_predictions"][
kwh_simulation_predictions["heating_kwh_predictions"]["property_id"] == str(property_instance.id)
].merge(
@ -739,22 +793,42 @@ class Recommendations:
]
).sort_values(["phase", "recommendation_id"], ascending=True).reset_index(drop=True)
# We need the recommendaion type
rec_id_to_type = {
rec["recommendation_id"]: rec["type"] for recs in property_recommendations for rec in recs
}
rec_id_to_type[STARTING_DUMMY_ID_VALUE] = "starting_dummy"
for i in range(0, len(kwh_impact_table)):
current_phase = kwh_impact_table.loc[i, 'phase']
current = kwh_impact_table.loc[i]
current_phase = current['phase']
previous_phase_id = (current_phase - 1) if (current_phase > 0) else -9999
previous_phase = kwh_impact_table[kwh_impact_table['phase'] == previous_phase_id]
if not previous_phase.empty:
for col in ["predictions_heating", "predictions_hotwater"]:
# Check if the recommendation type is ventilation
if rec_id_to_type[current["recommendation_id"]] == "mechanical_ventilation":
# We expect the kwh to increase
if kwh_impact_table.loc[i, col] > previous_phase[col].max():
continue
if kwh_impact_table.loc[i, col] > previous_phase[col].max():
kwh_impact_table.loc[i, col] = previous_phase[col].max()
descriptions_to_fuel_types = assumptions.DESCRIPTIONS_TO_FUEL_TYPES
# We will the air source heat pump efficiencies
ashp_keys = [k for k in descriptions_to_fuel_types.keys() if "air source heat pump" in k.lower()]
for k in ashp_keys:
descriptions_to_fuel_types[k]["cop"] = ashp_cop
# For heating system recommendations, this could result in a fuel type change so we reflect that
fuel_mapping = pd.DataFrame([
{
"id": epc["id"],
**cls.map_descriptions_to_fuel(
epc["mainheat-description"], epc["hotwater-description"], epc["main-fuel"]
epc["mainheat-description"], epc["hotwater-description"], epc["main-fuel"],
descriptions_to_fuel_types
)
} for epc in property_instance.updated_simulation_epcs
])
@ -768,7 +842,8 @@ class Recommendations:
**cls.map_descriptions_to_fuel(
property_instance.data["mainheat-description"],
property_instance.data["hotwater-description"],
property_instance.data["main-fuel"]
property_instance.data["main-fuel"],
descriptions_to_fuel_types
)
}
]
@ -797,7 +872,7 @@ class Recommendations:
for recs in property_recommendations:
for rec in recs:
if rec["type"] in [
"mechanical_ventilation", "trickle_vents", "draught_proofing", "extension_cavity_wall_insulation"
"trickle_vents", "draught_proofing", "extension_cavity_wall_insulation"
]:
# We cannot score the impact on draught proofing
continue
@ -808,6 +883,12 @@ class Recommendations:
if rec["type"] == "solar_pv":
rec["kwh_savings"] = rec_impact["solar_kwh_savings"].values[0]
# Calculate carbon savings from this - emissions in kg and convert to tonnes
emissions_kg = rec["kwh_savings"] * assumptions.ELECTRICITY_CARBON_INTENSITY
emissions_tonnes = emissions_kg / 1000
rec["co2_equivalent_savings"] = emissions_tonnes
rec["energy_cost_savings"] = (
rec_impact["solar_kwh_savings"].values[0] * AnnualBillSavings.ELECTRICITY_PRICE_CAP
)
@ -816,13 +897,18 @@ class Recommendations:
heating_kwh_savings = (
previous_phase_impact["predictions_heating"].mean() - rec_impact["predictions_heating"].values[0]
)
heating_cost_savings = (
previous_phase_impact["heating_cost"].mean() - rec_impact["heating_cost"].values[0]
)
hotwater_kwh_savings = (
previous_phase_impact["predictions_hotwater"].mean() - rec_impact["predictions_hotwater"].values[0]
)
# Shouldn't be positive
if rec["type"] == "mechanical_ventilation":
heating_kwh_savings = 0 if heating_kwh_savings > 0 else heating_kwh_savings
hotwater_kwh_savings = 0 if hotwater_kwh_savings > 0 else hotwater_kwh_savings
heating_cost_savings = (
previous_phase_impact["heating_cost"].mean() - rec_impact["heating_cost"].values[0]
)
hotwater_host = (
previous_phase_impact["hotwater_cost"].mean() - rec_impact["hotwater_cost"].values[0]
)
@ -830,9 +916,8 @@ class Recommendations:
total_kwh_savings = heating_kwh_savings + hotwater_kwh_savings
energy_cost_savings = heating_cost_savings + hotwater_host
if rec["type"] == "lighting":
# In this case, we should probably just SKIP but check when we have one!
raise Exception("Implement me 3")
if rec["type"] == "low_energy_lighting":
continue
rec["kwh_savings"] = total_kwh_savings
rec["energy_cost_savings"] = energy_cost_savings

View file

@ -52,6 +52,10 @@ class RoofRecommendations:
part for part in materials if part["type"] == "flat_roof_insulation"
]
self.room_roof_insulation_materials = [
part for part in materials if part["type"] == "room_roof_insulation"
]
# Extract the insulation thickness from the roof, which is used throughout this method
self.insulation_thickness = convert_thickness_to_numeric(
self.property.roof["insulation_thickness"],
@ -60,16 +64,16 @@ class RoofRecommendations:
)
@classmethod
def get_loft_insulation_sap_limit(cls, roof_energy_eff, extension_count):
def get_loft_insulation_sap_limit(cls, roof_energy_eff, existing_thickness):
"""
Get the SAP limit for loft insulation
:param roof_energy_eff:
:return:
"""
if extension_count == 0:
# No limit
return None
if str(existing_thickness).isdigit():
if float(existing_thickness) >= 250:
return 0
if roof_energy_eff in ["Good", "Very Good"]:
return 1
@ -123,7 +127,11 @@ class RoofRecommendations:
self.property.roof["insulation_thickness"] in ["average", "above_average"]
)
return full_insulated_room_roof or room_roof_insulated_at_rafters
has_non_invasive_recommendation = any(
x["type"] == "room_roof_insulation" for x in self.property.non_invasive_recommendations
)
return (full_insulated_room_roof or room_roof_insulated_at_rafters) and not has_non_invasive_recommendation
def recommend(self, phase, measures=None, default_u_values=False):
@ -134,6 +142,10 @@ class RoofRecommendations:
u_value = self.property.roof["thermal_transmittance"]
# If we have a flat roof but we don't have flat roof as a measure, we exit
if self.property.roof["is_flat"] and "flat_roof_insulation" not in measures:
return
# We check if the roof is already insulated and if so, we exit
# Building regulations part L recommend installing at least 270mm of insulation, however generally we
@ -148,6 +160,9 @@ class RoofRecommendations:
if self.is_room_roof_insulated_or_unsuitable(measures):
return
if self.property.roof["is_thatched"]:
return
# If we have a u-value already, need to implement this
if u_value:
if u_value <= self.BUILDING_REGULATIONS_PART_L_MAX_U_VALUE:
@ -181,7 +196,8 @@ class RoofRecommendations:
# We firstly handle non-intrusive recommendations, which may override the normal roof insulation recommendations
if ("loft_insulation" in [x["type"] for x in non_invasive_recommendations]) or (
self.property.roof["is_pitched"] and "loft_insulation" in measures
self.property.roof["is_pitched"] and "loft_insulation" in measures and
not self.property.roof["is_at_rafters"]
):
self.recommend_roof_insulation(
u_value=u_value,
@ -282,6 +298,11 @@ class RoofRecommendations:
insulation_materials = pd.DataFrame(insulation_materials)
non_invasive_recommendations = next(
(r for r in self.property.non_invasive_recommendations if
r["type"] == insulation_materials["type"].values[0]), {}
)
lowest_selected_u_value = None
recommendations = []
for _, insulation_material_group in insulation_materials.groupby("description"):
@ -421,14 +442,15 @@ class RoofRecommendations:
"description": self.make_roof_insulation_description(material),
"starting_u_value": u_value,
"new_u_value": new_u_value,
"sap_points": None,
"sap_points": non_invasive_recommendations.get("sap_points", 0),
"already_installed": already_installed,
"simulation_config": simulation_config,
"description_simulation": {
"roof-description": new_description,
"roof-energy-eff": new_efficiency
},
**cost_result
**cost_result,
"survey": non_invasive_recommendations.get("survey", False)
}
)
@ -478,28 +500,22 @@ class RoofRecommendations:
:return:
"""
# TODO: We temporarilty use costs from SCIS for RIR insulation. The costing was £180/m2 floor
roof_roof_insulation_materials = [
{
"type": "room_roof_insulation",
"description": "Insulating the ceiling of the roof roof and re-decorate",
"depths": [100],
"depth_unit": "mm",
"r_value_per_mm": 0.038,
"thermal_conductivity": 0.022,
"cost": [180],
}
]
# We have a list of materials that can be used for room roof insulation
# We will iterate over these materials and recommend them based on the current u-value of the roof
# and the cost of the materials
rir_non_invasive_recommendation = next(
(x for x in self.property.non_invasive_recommendations if x["type"] == "room_roof_insulation"), {}
)
insulation_materials = pd.DataFrame(self.room_roof_insulation_materials)
# lowest_selected_u_value = None
recommendations = []
for material in roof_roof_insulation_materials:
for depth, cost_per_unit in zip(material["depths"], material["cost"]):
part_u_value = r_value_per_mm_to_u_value(depth, material["r_value_per_mm"])
for _, material_group in insulation_materials.groupby("description"):
for material in material_group.itertuples():
part_u_value = r_value_per_mm_to_u_value(material.depth, material.r_value_per_mm)
_, new_u_value = calculate_u_value_uplift(u_value, part_u_value)
new_u_value = math.ceil(new_u_value * 100.0) / 100.0
@ -507,13 +523,11 @@ class RoofRecommendations:
# We allow a small tolerance for error so we don't discount the recommendation entirely
estimated_cost = (
cost_per_unit * self.property.insulation_floor_area if
material.total_cost * self.property.insulation_floor_area if
rir_non_invasive_recommendation.get("cost") is None else
rir_non_invasive_recommendation.get("cost")
)
sap_points = rir_non_invasive_recommendation.get("sap_points", None)
# Could also be Roof room(s), ceiling insulated
new_descriptin = "Roof room(s), insulated"
roof_ending_config = RoofAttributes(new_descriptin).process()
@ -562,7 +576,7 @@ class RoofRecommendations:
"description": "Insulate room in roof at rafters and re-decorate",
"starting_u_value": u_value,
"new_u_value": new_u_value,
"sap_points": sap_points,
"sap_points": rir_non_invasive_recommendation.get("sap_points", None),
"simulation_config": simulation_config,
"description_simulation": {
"roof-description": new_descriptin,

View file

@ -9,12 +9,6 @@ class SecondaryHeating:
system.
"""
# The list of existing heating systems that are accepted
ACCEPTED_MAINHEAT_DESCRIPTIONS = ["Boiler and radiators, mains gas"]
ACCEPTED_SECONDHEAT_DESCRIPTIONS = ["Room heaters, electric"]
# These are the heaters where works are required to remove them
FIXED_HEATER_DESCRIPTIONS = ["Room heaters, electric"]
def __init__(self, property_instance: Property):
self.property = property_instance
self.costs = Costs(self.property)
@ -25,18 +19,10 @@ class SecondaryHeating:
# Reset
self.recommendation = []
if self.property.main_heating["clean_description"] not in self.ACCEPTED_MAINHEAT_DESCRIPTIONS:
return
# TODO: We need to clean secondary data
if self.property.data['secondheat-description'] not in self.ACCEPTED_SECONDHEAT_DESCRIPTIONS:
return
if self.property.data['secondheat-description'] in self.FIXED_HEATER_DESCRIPTIONS:
# We have an associated cost otherwise, there is no cost
n_rooms = self.property.data['number-heated-rooms']
if self.property.data['number-habitable-rooms'] > self.property.data['number-heated-rooms']:
n_rooms = self.property.data['number-habitable-rooms'] - self.property.data['number-heated-rooms']
else:
n_rooms = 0
n_rooms = self.property.data["number-heated-rooms"]
costs = self.costs.heater_removal(n_rooms=n_rooms)

View file

@ -1,24 +1,39 @@
import numpy as np
import pandas as pd
import backend.app.assumptions as assumptions
from recommendations.Costs import Costs
from recommendations.recommendation_utils import override_costs, estimate_pitched_roof_area
class SolarPvRecommendations:
# Solar panel specs based on Eurener 400s solar panels
# https://midsummerwholesale.co.uk/buy/eurener/eurener-400w-mepv-zebra-ab-half-cut-mono
# Approximate area of the solar panels
SOLAR_PANEL_AREA = 1.79
# Wattage per panel - this is based on the average wattage of a solar panel being between 250w and 420w
# This was previously set to 250w, but has been upped to 400 based on the systems used by Cotswolrd Energy Group
SOLAR_PANEL_WATTAGE = 400
# For domestic properties, we don't recommend a solar PV system with wattage outside of these
# bounds
MAX_SYSTEM_WATTAGE = 6000
MIN_SYSTEM_WATTAGE = 1000
# the maximum area of root we allow to be covered in solar panels for our recommendations.
MAX_ROOF_AREA_PERCENTAGE = 0.7
SAP_POINTS_PER_5_PERCENT_ROOF_COVERAGE = 1
BACKUP_PANEL_PERFORMANCE = pd.DataFrame(
[
{
"n_panels": 4,
"array_wattage": 1600,
"initial_ac_kwh_per_year": assumptions.MEDIAN_WATTAGE_TO_AC * 1600,
"panneled_roof_area": 4 * assumptions.RDSAP_AREA_PER_PANEL
},
{
"n_panels": 8,
"array_warrage": 3200,
"initial_ac_kwh_per_year": assumptions.MEDIAN_WATTAGE_TO_AC * 3200,
"panneled_roof_area": 8 * assumptions.RDSAP_AREA_PER_PANEL
},
]
)
def __init__(self, property_instance):
"""
:param property_instance: Instance of the Property class, for the home associated to property_id
@ -42,46 +57,6 @@ class SolarPvRecommendations:
return trimmed_list
def mds_recommend(self, phase=None, solar_pv_percentage=0.5):
# For specific usage within the mds report
solar_pv_roof_area = self.property.get_solar_pv_roof_area(solar_pv_percentage)
number_solar_panels = np.floor(solar_pv_roof_area / self.SOLAR_PANEL_AREA)
solar_panel_wattage = number_solar_panels * self.SOLAR_PANEL_WATTAGE
solar_panel_wattage = np.clip(
a=solar_panel_wattage, a_min=self.MIN_SYSTEM_WATTAGE, a_max=self.MAX_SYSTEM_WATTAGE
)
# We now have a property which is potentially suitable for solar PV
roof_coverage_percent = round(solar_pv_percentage * 100)
# Given the wattage, we estimate the cost of the solar PV system. This is based on the MCS database
# of solar PV installations
cost_result = self.costs.solar_pv(wattage=solar_panel_wattage, has_battery=False)
kw = np.floor(solar_panel_wattage / 100) / 10
description = (f"Install a {kw} kilowatt-peak (kWp) solar photovoltaic (PV) p"
f"anel system on {round(roof_coverage_percent)}% the roof.")
return [
{
"phase": phase,
"parts": [],
"type": "solar_pv",
"description": description,
"starting_u_value": None,
"new_u_value": None,
"sap_points": None,
"already_installed": False,
**cost_result,
# This is required for simulating the SAP impact. solar_pv_percentage is between 0 & 1 so we scale
# back up here
"photo_supply": roof_coverage_percent,
"has_battery": False
}
]
def recommend_building_analysis(self, phase):
"""
This recommendation approach handles the case of producing solar PV recommendations at the building level,
@ -103,13 +78,22 @@ class SolarPvRecommendations:
for rank, recommendation_config in best_configurations.iterrows():
# If we dont have the panneled_roof_area in the recommendation_config we calculate it
if recommendation_config.get("panneled_roof_area", None):
roof_coverage_percent = round(recommendation_config["panneled_roof_area"] / total_roof_area * 100)
# We spread the coverage across the individual units
roof_coverage_percent = round(
((recommendation_config["panneled_roof_area"] / total_roof_area) * 100) / n_units
)
else:
raise Exception("IMPLEMENT ME")
n_floors = (
self.property.number_of_storeys["number_of_storeys"] if
self.property.number_of_storeys["number_of_storeys"] is not None else 3
)
total_cost = self.costs.solar_pv(
array_cost=recommendation_config.get("cost", None),
n_panels=recommendation_config["n_panels"],
n_floors=self.property.number_of_storeys["number_of_storeys"],
n_floors=n_floors,
needs_inverter=True,
)["total"] / n_units
@ -203,6 +187,20 @@ class SolarPvRecommendations:
roof_coverage_percent = round(recommendation_config["panneled_roof_area"] / roof_area * 100)
# We round up to the nearest 5
roof_coverage_percent = np.ceil(roof_coverage_percent / 5) * 5
# Typically, we've observed that every 5% of additional roof coverage will result in at least
# an additional 1 SAP points (though often 2 points) Given this, we can add a reasonable minimum
# for the number of SAP points we might expect. We've observed that for some cases where properties
# are hitting the higher SAP scores (e.g. EPC A and above), the model can sometimes under-predict
# the number of SAP points. This appears to be due to a relatively small number of properties
# actually achieving the upper echelons of EPC rating. This can be the case if we're simulating a
# whole house retrofit where the home is getting complete insulation, a heat pump and solar panels.
# Because panels are the final recommendation, they are often the measure that takes the home
# into the medium to high EPC A ranges and so because of a lack of training data, this means that
# we might sometime under-predict. This minimum is intended to try and reduce the negative impact
# of this. This minimum is used in Recommendations.calculate_recommendation_impact
minimum_sap_points = (roof_coverage_percent / 5) * self.SAP_POINTS_PER_5_PERCENT_ROOF_COVERAGE
for has_battery in [False, True]:
cost_result = self.costs.solar_pv(
has_battery=has_battery,
@ -212,11 +210,14 @@ class SolarPvRecommendations:
)
kw = np.floor(recommendation_config["array_wattage"] / 100) / 10
if has_battery:
description = (f"Install a {kw} kilowatt-peak (kWp) solar photovoltaic (PV) panel system on "
f"{round(roof_coverage_percent)}% the roof, with a battery storage system.")
description = (
f"Install a {kw} kilowatt-peak (kWp) solar panel system, with a battery."
)
else:
description = (f"Install a {kw} kilowatt-peak (kWp) solar photovoltaic (PV) p"
f"anel system on {round(roof_coverage_percent)}% the roof.")
description = f"Install a {kw} kilowatt-peak (kWp) solar panel system."
if self.property.in_conservation_area:
description += " Property is in a consevation area - please check with local planning authority."
already_installed = "solar_pv" in self.property.already_installed
if already_installed:
@ -231,7 +232,7 @@ class SolarPvRecommendations:
"description": description,
"starting_u_value": None,
"new_u_value": None,
"sap_points": None,
"sap_points": minimum_sap_points,
"already_installed": already_installed,
**cost_result,
# This is required for simulating the SAP impact. solar_pv_percentage is between 0 & 1 so we

View file

@ -29,7 +29,7 @@ class VentilationRecommendations(Definitions):
def identify_ventilation(self):
self.has_ventilaion = self.property.data["mechanical-ventilation"] in self.VENTILATION_DESCRIPTIONS
def recommend(self):
def recommend(self, phase):
"""
If there is no ventilation, we recommend installing ventilation
@ -63,7 +63,7 @@ class VentilationRecommendations(Definitions):
# We recommend installing two mechanical ventilation systems
self.recommendation = [
{
"phase": None,
"phase": phase,
"parts": part,
"type": part[0]["type"],
"measure_type": "mechanical_ventilation",
@ -79,7 +79,13 @@ class VentilationRecommendations(Definitions):
"total": estimated_cost,
# We use a very simple and rough estimate of 4 hours per unit
"labour_hours": labour_hours,
"labour_days": labour_days # Assume 8 hour day
"labour_days": labour_days, # Assume 8 hour day
"simulation_config": {
"mechanical_ventilation_ending": "mechanical, extract only",
},
"description_simulation": {
"mechanical-ventilation": "mechanical, extract only"
}
}
]

View file

@ -385,6 +385,11 @@ class WallRecommendations(Definitions):
if insulation_thickness == "below average":
cavity_width = cavity_width * (1 - PARTIALLY_FILLED_PERCENTAGE_ASSUMPTION)
non_invasive_recommendations = next(
(r for r in self.property.non_invasive_recommendations if
r["type"] == insulation_materials["type"].values[0]), {}
)
# Test the different fill options
lowest_selected_u_value = None
recommendations = []
@ -475,14 +480,15 @@ class WallRecommendations(Definitions):
"description": description,
"starting_u_value": u_value,
"new_u_value": new_u_value,
"sap_points": None,
"sap_points": non_invasive_recommendations.get("sap_points", None),
"already_installed": already_installed,
"simulation_config": simulation_config,
"description_simulation": {
"walls-description": "Cavity wall, filled cavity",
"walls-energy-eff": "Good"
},
**cost_result
**cost_result,
"survey": non_invasive_recommendations.get("survey", False)
}
)
@ -540,15 +546,10 @@ class WallRecommendations(Definitions):
lowest_selected_u_value = None
recommendations = []
iwi_non_invasive_recommendations = next(
(r for r in self.property.non_invasive_recommendations if r["type"] == "internal_wall_insulation"), {}
non_invasive_recommendations = next(
(r for r in self.property.non_invasive_recommendations if
r["type"] == insulation_materials["type"].values[0]), {}
)
ewi_non_invasive_recommendations = next(
(r for r in self.property.non_invasive_recommendations if r["type"] == "external_wall_insulation"), {}
)
if ewi_non_invasive_recommendations:
raise NotImplementedError("Implement ewi non-invasive recommendations")
for _, insulation_material_group in insulation_materials.groupby("description"):
@ -590,31 +591,25 @@ class WallRecommendations(Definitions):
if already_installed:
cost_result = override_costs(cost_result)
if non_invasive_recommendations.get("cost") is not None:
raise NotImplementedError(
"Not handled passing costs from non-invasive recommendations for iwi"
)
if material["type"] == "internal_wall_insulation":
if iwi_non_invasive_recommendations.get("cost") is not None:
raise NotImplementedError(
"Not handled passing costs from non-invasive recommendations for iwi"
)
sap_points = iwi_non_invasive_recommendations.get("sap_points", None)
survey = iwi_non_invasive_recommendations.get("survey", False)
new_description = self.get_internal_external_wall_description(
self.INTERNALLY_INSULATED_WALL_DESCRIPTIONS, new_u_value
)
elif material["type"] == "external_wall_insulation":
sap_points = ewi_non_invasive_recommendations.get("sap_points", None)
survey = ewi_non_invasive_recommendations.get("survey", False)
new_description = self.get_internal_external_wall_description(
self.EXTERNALLY_INSULATED_WALL_DESCRIPTIONS, new_u_value
)
else:
raise ValueError("Invalid material type")
sap_points = non_invasive_recommendations.get("sap_points", None)
survey = non_invasive_recommendations.get("survey", False)
wall_ending_config = WallAttributes(new_description).process()
walls_simulation_config = check_simulation_difference(

View file

@ -215,21 +215,29 @@ class WindowsRecommendations:
"glazed-type": glazed_type_ending,
}
measure_type = "double_glazing" if not is_secondary_glazing else "secondary_glazing"
non_invasive_recommendation = next(
(r for r in self.property.non_invasive_recommendations if r["type"] in ["windows_glazing", measure_type]),
{}
)
self.recommendation = [
{
"phase": phase,
"parts": [],
"type": "windows_glazing",
"measure_type": "double_glazing" if not is_secondary_glazing else "secondary_glazing",
"measure_type": measure_type,
"description": description,
"starting_u_value": None,
"new_u_value": None,
"sap_points": None,
"sap_points": non_invasive_recommendation.get("sap_points", None),
"already_installed": already_installed,
**cost_result,
"is_secondary_glazing": is_secondary_glazing,
"description_simulation": description_simulation,
"simulation_config": simulation_config,
"survey": non_invasive_recommendation.get("survey", None),
}
]

View file

@ -111,8 +111,11 @@ county_to_region_map = {
'Windsor and Maidenhead': 'South East England', 'Woking': 'South East England', 'Wokingham': 'South East England',
'Worthing': 'South East England', 'Wycombe': 'South East England',
'Bath and North East Somerset': 'South West England', 'Bournemouth': 'South West England',
'Bristol': 'South West England', 'Cheltenham': 'South West England', 'Christchurch': 'South West England',
'City of Bristol': 'South West England', 'Cornwall': 'South West England', 'Cotswold': 'South West England',
'Bristol': 'South West England',
'Cheltenham': 'South West England', 'Christchurch': 'South West England',
'City of Bristol': 'South West England',
'Bristol, City of': 'South West England',
'Cornwall': 'South West England', 'Cotswold': 'South West England',
'Devon': 'South West England', 'Dorset': 'South West England', 'East Devon': 'South West England',
'East Dorset': 'South West England', 'Exeter': 'South West England', 'Forest of Dean': 'South West England',
'Gloucester': 'South West England', 'Gloucestershire': 'South West England',
@ -132,7 +135,10 @@ county_to_region_map = {
'Merthyr Tydfil': 'Wales', 'Monmouthshire': 'Wales', 'Mountain Ash': 'Wales', 'Neath Port Talbot': 'Wales',
'Newport': 'Wales', 'Pembrokeshire': 'Wales', 'Penarth': 'Wales', 'Pentre': 'Wales', 'Pontyclun': 'Wales',
'Pontypridd': 'Wales', 'Porth': 'Wales', 'Porthcawl': 'Wales', 'Powys': 'Wales', 'Rhondda Cynon Taff': 'Wales',
'Rhoose': 'Wales', 'Sully': 'Wales', 'Swansea': 'Wales', 'The Vale of Glamorgan': 'Wales', 'Tonypandy': 'Wales',
'Rhoose': 'Wales', 'Sully': 'Wales', 'Swansea': 'Wales',
'The Vale of Glamorgan': 'Wales',
'Vale of Glamorgan': 'Wales',
'Tonypandy': 'Wales',
'Torfaen': 'Wales', 'Treharris': 'Wales', 'Treorchy': 'Wales', 'Wrexham': 'Wales', 'Birmingham': 'West Midlands',
'Bromsgrove': 'West Midlands', 'Cannock Chase': 'West Midlands', 'Coventry': 'West Midlands',
'Dudley': 'West Midlands', 'East Staffordshire': 'West Midlands', 'Herefordshire': 'West Midlands',

View file

@ -1,10 +1,14 @@
def prepare_input_measures(property_recommendations, goal):
import backend.app.assumptions as assumptions
def prepare_input_measures(property_recommendations, goal, needs_ventilation):
"""
Basic function to convert recommendations_to_upload to a format that is
suitable for the optimiser - large
:param property_recommendations: object containing the recommendations, created in the plan trigger api
:param goal: goal to be optimised for, should be one of the keys in gain_map. E.g. if the gain is SAP points,
the goal should reflect that desired gain
:param needs_ventilation: boolean to indicate if the property needs ventilation
:return: Nested list of input measures
"""
@ -16,23 +20,58 @@ def prepare_input_measures(property_recommendations, goal):
if not goal_key:
raise NotImplementedError("Not implemented this gain type - investigate me")
# We ony ever have one ventilation measure with now
ventilation_recommendation = next(
(measure[0] for measure in property_recommendations if measure[0]["type"] == "mechanical_ventilation"),
{}
)
input_measures = []
for recs in property_recommendations:
if needs_ventilation and recs[0]["type"] == "mechanical_ventilation":
# If we house needs ventilation, ventilation will be packaged with the fabric measure so
# we don't need to optimise it independently
continue
if recs[0]["type"] == "solar_pv":
# if the recommendation is a solar recommendation with a battery, we exclude it from the optimisation.
recs = [r for r in recs if ~r["has_battery"]]
input_measures.append(
[
recs_to_append = [rec for rec in recs if rec["energy_cost_savings"] >= 0]
if not recs_to_append:
continue
to_append = []
for rec in recs:
# We bundle the impact of ventilation with the measure
total = (
rec["total"] + ventilation_recommendation["total"]
if rec["type"] in assumptions.measures_needing_ventilation
else rec["total"]
)
gain = (
rec[goal_key] + ventilation_recommendation[goal_key]
if rec["type"] in assumptions.measures_needing_ventilation
else rec[goal_key]
)
rec_type = (
"+".join(
[rec["type"], ventilation_recommendation["type"]]
) if rec["type"] in assumptions.measures_needing_ventilation
else rec["type"]
)
to_append.append(
{
"id": rec["recommendation_id"],
"cost": rec["total"],
"gain": rec[goal_key],
"type": rec["type"]
"cost": total,
"gain": gain,
"type": rec_type
}
for rec in recs
]
)
)
input_measures.append(to_append)
return input_measures

View file

@ -257,7 +257,7 @@ epc_wall_description_map = {
"Timber frame, as built, partial insulation": "Timber frame as built",
"Timber frame, as built, no insulation": "Timber frame as built",
"Timber frame, with external insulation": "Timber frame with internal insulation",
"Timber frame, with internal insulation": "Timber frame with internal insulation",
############################
# Sandstone/limestones wall mappings
############################

View file

@ -205,7 +205,7 @@ def get_wall_u_value(
mapped_value = wall_uvalues_df[
wall_uvalues_df["Wall_type"] == mapped_description
][age_band].values[0]
][age_band].values[0]
if pd.isnull(mapped_value) and "Park home" in mapped_description:
# We don't know enough in this case so we default to 0
@ -428,6 +428,9 @@ def estimate_number_of_floors(property_type):
Using the property type, we estimate the number of floors in the property
"""
if property_type is None:
return None
if property_type == "House":
number_of_floors = 2
elif property_type in ["Flat", "Bungalow"]:
@ -560,7 +563,7 @@ def get_floor_u_value(
insulation_lookup = s11[
s11["Age_band"].str.contains(age_band) & s11["Floor_construction"]
== floor_type
]
]
if insulation_lookup.empty:
insulation_thickness = 0
else:

270
survey_report/app.py Normal file
View file

@ -0,0 +1,270 @@
import os
import requests
import PyPDF2
from string import Template
import pandas as pd
from survey_report.extraction.detect_report_type import detect_report_type
from survey_report.extraction.quidos import SiteNotesExtractor, EPRExtractor
def generate_html_report(template_path, output_path, data):
"""
Reads an HTML template file, injects dynamic values, and generates a final HTML report.
Args:
- template_path (str): Path to the HTML template file.
- output_path (str): Path to save the generated HTML file.
- data (dict): Dictionary containing dynamic values for the report.
"""
# Read the template file
with open(template_path, "r", encoding="utf-8") as f:
html_template = Template(f.read()) # Use Template from string module
# Replace placeholders with actual data
final_html = html_template.safe_substitute(data) # Use safe_substitute to prevent missing key errors
# Save the generated HTML file
with open(output_path, "w", encoding="utf-8") as f:
f.write(final_html)
print(f"HTML report generated successfully: {output_path}")
def stringify_number(num: int, rounding: bool = True) -> str:
if num < 100000: # 5 figures or fewer
rounded_num = ((num + 99) // 100) * 100 if rounding else num
return f"{rounded_num:,}"
else: # More than 5 figures
rounded_num = ((num + 999) // 1000) * 1000 if rounding else num
return f"{rounded_num // 1000}k"
class PlacidApi:
# Errors as defined by docs: https://placid.app/docs/2.0/rest/errors
ERROR_CODES = {
400: "Bad request",
401: "Unauthorized",
404: "Template Not found",
422: "Validation error",
429: "Rate limit exceeded",
500: "Internal server error",
}
def __init__(self, api_key):
self.api_key = api_key
self.headers = {
"Authorization": f"Bearer {self.api_key}",
"Content-Type": "application/json",
"Accept": "application/json",
}
def create_pdf(
self,
template_uuid: str,
current_epc_rating: str,
current_epc_rating_colour: str,
post_retrofit_epc_rating: str,
post_retrofit_epc_rating_colour: str,
):
url = "https://api.placid.app/api/rest/pdfs"
body = {
"webhook_success": None,
"passthrough": None,
"pages": [
{
"template_uuid": template_uuid,
"layers": {
"current_epc_rating": {
"text": current_epc_rating,
"text_color": current_epc_rating_colour,
},
"post_retrofit_epc_rating": {
"text": post_retrofit_epc_rating,
"text_color": post_retrofit_epc_rating_colour,
}
},
},
]
}
response = requests.post(
url,
headers=self.headers,
json=body
)
response_body = response.json()
return response_body
def get_pdf(self, pdf_id: str):
"""
Poll the API every 5 seconds until the PDF is ready
"""
url = f"https://api.placid.app/api/rest/pdfs/{pdf_id}"
response = requests.get(
url,
headers=self.headers
)
response_body = response.json()
url = response_body["pdf_url"]
# Download the PDF form this uurl
pdf_download = requests.get(url)
with open("survey_report/example_data/output.pdf", "wb") as f:
f.write(pdf_download.content)
def handler():
"""
Performs the data extraction process for the survey report
:return:
"""
PLACID_API_KEY = "placid-mpkwidzer2mens9h-hifa3dmbxpfeghpa"
TEMPLATE_UUID = "5bst9mh1q9lk9"
placid_api = PlacidApi(PLACID_API_KEY)
current_property_value = 250000 # Needs to be an input
EPC_COLOURS = {
"A": "#117d58",
"B": "#2da55c",
"C": "#8dbd40",
"D": "#f7cd14",
"E": "#f3a96a",
"F": "#ef8026",
"G": "#e41e3b",
}
folders = [
{
"site_notes": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 1/3 "
"WILLIS ROAD FLAT 1 PRE EPR SITE NOTES.pdf",
"epr": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 1/3 WILLIS "
"ROAD FLAT 1 PRE EPR PDF.pdf",
"scenario_site_notes": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data"
"/Flat 1/3 WILLIS ROAD FLAT 1 POST EPR SITE NOTES.pdf"
},
{
"site_notes": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 2/3 "
"WILLIS ROAD FLAT 2 PRE EPR SITE NOTES.pdf",
"epr": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 2/3 WILLIS "
"ROAD FLAT 2 PRE EPR PDF.pdf",
"scenario_site_notes": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data"
"/Flat 2/3 WILLIS ROAD FLAT 2 POST EPR SITE NOTES.pdf"
},
{
"site_notes": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 3/3 "
"WILLIS ROAD FLAT 3 PRE EPR SITE NOTES.pdf",
"epr": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data/Flat 3/3 WILLIS "
"ROAD FLAT 3 PRE EPR PDF.pdf",
"scenario_site_notes": "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/example_data"
"/Flat 3/3 WILLIS ROAD FLAT 3 POST EPR SITE NOTES.pdf"
},
]
data = []
for data_config in folders:
file_mapping = {}
for filename, filepath in data_config.items():
with (open(filepath, "rb") as f):
pdf = PyPDF2.PdfReader(f)
first_page = pdf.pages[0].extract_text()
text = ""
for page in pdf.pages:
text += page.extract_text()
# Check the report type
report_type = detect_report_type(first_page)
if report_type is not None:
file_mapping[filename] = text
# This is only set up to work with quido site notes so we must have it
site_notes_extractor = SiteNotesExtractor(file_mapping["site_notes"])
site_notes = site_notes_extractor.extract_all()
# We also must have an EPR
epr_extractor = EPRExtractor(file_mapping["epr"])
epr = epr_extractor.extract_all()
# Valuation simulation
scenario_site_notes_extractor = SiteNotesExtractor(file_mapping["scenario_site_notes"])
scenario_site_notes = scenario_site_notes_extractor.extract_all()
from backend.ml_models.Valuation import PropertyValuation
valuation_uplift = PropertyValuation.estimate_valuation_improvement(
current_value=current_property_value,
current_epc=site_notes["Current EPC Band"],
target_epc=scenario_site_notes["Current EPC Band"],
)
# TODO - should convert this, when it's more than 5 figures and we should certainly stringify this
valuation_difference = round(valuation_uplift["average_increased_value"] - current_property_value)
# Prepare the data for output
bill_savings = round(
site_notes['Estimated Annual Energy Cost (£)'] - scenario_site_notes['Estimated Annual Energy Cost (£)']
)
carbon_savings = round(
site_notes["Current Carbon Emissions (TCO2)"] - scenario_site_notes["Current Carbon Emissions (TCO2)"],
2
)
payback_period = None
if payback_period is None:
raise NotImplementedError("Implement me")
# We extract the measures from the site notes
report_data = {
"current_epc_rating": site_notes["Current EPC Band"],
"current_epc_rating_colour": EPC_COLOURS[site_notes["Current EPC Band"]],
"post_retrofit_epc_rating": scenario_site_notes["Current EPC Band"],
"post_retrofit_epc_rating_colour": EPC_COLOURS[scenario_site_notes["Current EPC Band"]],
"bill_savings": stringify_number(bill_savings),
"valuation_improvement": stringify_number(valuation_difference),
"carbon_savings": carbon_savings,
}
# We now produce the combined data sheet which is the starting figure:
# data_sheet = {**epr, **site_notes}
# del data_sheet['Building Dimensions']
# # We unnest the Total Building Dimensions
# data_sheet["Total Building Floor Area (m2)"] = data_sheet["Total Building Dimensions"]["floor_area"]
# data_sheet["Total Building Heat Loss Area (m2)"] = data_sheet["Total Building Dimensions"]["heat_loss_area"]
# del data_sheet["Total Building Dimensions"]
create_pdf_response = placid_api.create_pdf(
template_uuid=TEMPLATE_UUID, **report_data
)
# {'id': 769832, 'type': 'pdf', 'status': 'queued', 'pdf_url': None, 'transfer_url': None, 'passthrough': None}
# Download locally
placid_api.get_pdf(create_pdf_response["id"])
data = pd.DataFrame(data)
# Generate the HTML report
# Placeholder locations
template_path = "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/template.html"
output_path = "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/output/report.html"
logo_path = "/Users/khalimconn-kowlessar/Documents/hestia/Model/survey_report/assets/logo.png"
generate_html_report(
template_path, output_path,
data={
"address": data_sheet["Address"],
"logo_path": logo_path,
"current_epc": data_sheet["Current EPC Band"],
"current_sap": data_sheet["Current SAP Rating"],
"potential_epc": "A", # TODO PLACEHOLDER
"potential_sap": 91, # TODO PLACEHOLDER
}
)

View file

@ -0,0 +1,22 @@
import re
def detect_report_type(first_page):
"""
Detects the type of report based on the first page of the report
:param first_page:
:return:
"""
# Set up for the minute to handle quidos files. We have the Elmhurst logic so we can introduce
# this when we need
if re.match(
r"^Created \d{2}/\d{2}/\d{4} for Quidos Ltd using Argyle software BRE approved calculator",
first_page
):
return "quidos_site_notes"
if re.search(r"\nIQ-Energy\nEnergy Performance Report\nPage 1 of 1", first_page):
return "quidos_epr"
return None

View file

@ -0,0 +1,256 @@
import re
class SiteNotesExtractor:
"""
Extracts SAP rating, carbon emissions, and building dimensions from an EPC summary report.
"""
def __init__(self, pdf_text):
"""
Initializes the SiteNotesExtractor with the extracted PDF text.
"""
self.text = pdf_text
self.data = {}
def extract_sap_rating(self):
"""
Extracts the current and potential SAP rating from the report.
"""
pattern = re.search(r"Current SAP rating\s*([A-G])\s*(\d+)\s*Potential SAP rating\s*([A-G])\s*(\d+)", self.text)
if not pattern:
raise ValueError("No SAP rating found in the report")
self.data.update({
"Current EPC Band": pattern.group(1),
"Current SAP Rating": int(pattern.group(2)),
"Potential EPC Band": pattern.group(3),
"Potential SAP Rating": int(pattern.group(4)),
})
def extract_carbon_emissions(self):
"""
Extracts the current and adjusted annual carbon emissions (TCO2).
"""
pattern = re.search(r"Current annual emissions\s*([\d.]+)\s*\(TCO2\)", self.text)
if not pattern:
raise ValueError("No carbon emissions found in the report")
self.data.update({
"Current Carbon Emissions (TCO2)": float(pattern.group(1)),
})
def extract_building_dimensions(self):
"""
Extracts dimensions for each building part and stores them in a list.
Handles Main Property and multiple extensions.
"""
# Locate the Dimensions section
dimensions_section = re.search(
r"Dimension Type (?:internal|external)\nPart Floor Area \(m2\) Room Height \(m\) Loss Perimeter \(m\) "
r"Party Wall "
r"Length \(m\)\n"
r"(.*?)\n5\.0 Conservatory", self.text, re.DOTALL
)
if not dimensions_section:
raise ValueError("Failed to locate the dimensions section in the text.")
dimensions_text = dimensions_section.group(1)
# Pattern to match each building part (Main Property, Extension 1, Extension 2, etc.)
building_part_pattern = re.compile(
r"(Main Property|Extension \d+)\s*(?:Property)?\s*([\d.]+)\s+([\d.]+)\s+([\d.]+)\s+([\d.]+)"
)
building_parts = []
for match in building_part_pattern.finditer(dimensions_text):
to_append = {
"Building Part": match.group(1).strip(),
"Part Floor Area (m2)": float(match.group(2)),
"Room Height (m)": float(match.group(3)),
"Loss Perimeter (m)": float(match.group(4)),
"Party Wall Length (m)": float(match.group(5)),
}
# We calculate the heat loss area
to_append["Heat Loss Area (m2)"] = to_append["Loss Perimeter (m)"] * to_append["Room Height (m)"]
building_parts.append(to_append)
if not building_parts:
raise ValueError("No building dimensions found in the report")
self.data["Building Dimensions"] = building_parts
# We calculate some totals
self.data["Total Building Dimensions"] = {
"floor_area": sum([part["Part Floor Area (m2)"] for part in building_parts]),
"heat_loss_area": sum([part["Heat Loss Area (m2)"] for part in building_parts]),
}
def extract_bills_estimate(self):
"""
Extracts the estimated annual energy costs (£) from the report.
"""
pattern = re.search(r"Current annual energy costs £\s*([\d,.]+)", self.text)
if not pattern:
raise ValueError("No bills estimate found in the report")
self.data["Estimated Annual Energy Cost (£)"] = float(pattern.group(1).replace(",", ""))
def extract_all(self):
"""
Runs all extraction methods and returns a dictionary with extracted data.
"""
self.extract_sap_rating()
self.extract_carbon_emissions()
self.extract_bills_estimate()
self.extract_building_dimensions()
# Extract specific measures
# Primary wall
# Secondary wall
# Roof
# Floor
# Heating system
# Hot water system
# Windows
# Doors
# Lighting
# Ventilation
# Solar
return self.data
def extract_walls(self):
"""
Extracts wall type, insulation, dry-lining, and thickness for each building part,
including any alternative wall details within the 7.0 Walls section of the summary PDF text.
"""
text = self.text
wall_data = []
# Isolate the 7.0 Walls section
wall_section_match = re.search(r"7\.0 Walls\n(.*?)\n8\.0 Roofs", text, re.DOTALL)
if not wall_section_match:
raise ValueError("Failed to locate the walls section in the text.")
wall_section = wall_section_match.group(1)
# Define patterns to match walls for each building part
wall_pattern = re.compile(
r"(?P<section>Main Property(?: Alternative)?|Extension \d+)\s*\n"
r"(?:Construction\s*(?P<construction>[^\n]*)\n)?"
r"(?:Insulation\s*(?P<insulation>[^\n]*)\n)?"
r"(?:Insulation Thickness\(mm\)\s*(?P<insulation_thickness>[^\n]*)\n)?"
r"(?:Wall Thickness Measured\?\s*(?P<thickness_measured>[^\n]*)\n)?"
r"(?:Wall Thickness\(mm\)\s*(?P<thickness>\d+))?",
re.MULTILINE
)
# TODO: We aren't effectively picking up alternative walls
# alt_wall_pattern = re.compile(
# r"Alternative Wall Sheltered\s*.*?\n"
# r".*?Construction\s*(?P<alt_construction>[^\n]*)\n"
# r"Insulation\s*(?P<alt_insulation>[^\n]*)\n"
# r"Insulation Thickness\(mm\)\s*(?P<alt_insulation_thickness>[^\n]*)\n"
# r"Wall Thickness Measured\?\s*(?P<alt_thickness_measured>[^\n]*)\n"
# r"Wall Thickness\(mm\)\s*(?P<alt_thickness>\d+)?",
# re.MULTILINE
# )
for match in wall_pattern.finditer(wall_section):
building_part = match.group("section")
# has_alternative_wall = "Alternative" in building_part
building_part = "Main Property" if "Main Property" in building_part else building_part
wall_entry = {
"Building Part": building_part,
"Wall Type": match.group("construction") or "Unknown",
"Wall Insulation": match.group("insulation") or "Unknown",
"Insulation Thickness (mm)": match.group("insulation_thickness") or "Unknown",
"Wall Thickness Measured": match.group("thickness_measured") or "Unknown",
"Wall Thickness (mm)": int(match.group("thickness")) if match.group("thickness") and match.group(
"thickness").isdigit() else None,
"Alternative Wall Type": None,
"Alternative Wall Insulation": None,
"Alternative Insulation Thickness (mm)": None,
"Alternative Wall Thickness Measured": None,
"Alternative Wall Thickness (mm)": None,
}
# Check if an alternative wall section exists
# if has_alternative_wall:
# alt_match = alt_wall_pattern.search(wall_section, match.end())
# if alt_match:
# wall_entry["Alternative Wall Type"] = alt_match.group("alt_construction") or "Unknown"
# wall_entry["Alternative Wall Insulation"] = alt_match.group("alt_insulation") or "Unknown"
# wall_entry["Alternative Insulation Thickness (mm)"] = alt_match.group(
# "alt_insulation_thickness") or "Unknown"
# wall_entry["Alternative Wall Thickness Measured"] = alt_match.group(
# "alt_thickness_measured") or "Unknown"
# wall_entry["Alternative Wall Thickness (mm)"] = int(
# alt_match.group("alt_thickness")) if alt_match.group("alt_thickness") and alt_match.group(
# "alt_thickness").isdigit() else None
wall_data.append(wall_entry)
return wall_data
class EPRExtractor:
"""
Extracts space heating, water heating, and address from an Energy Performance Report (EPR).
"""
def __init__(self, pdf_text):
"""
Initializes the EPRExtractor with the extracted PDF text.
"""
self.text = pdf_text
self.data = {}
def extract_heating_consumption(self):
"""
Extracts space heating and water heating values from the report.
"""
pattern = re.search(
r"Space Heating\(KWH\)\s*([\d,]+).*?\nWater Heating\(KWH\)\s*([\d,]+)",
self.text,
re.DOTALL
)
if not pattern:
raise ValueError("No heating data found in the report")
self.data.update({
"Space Heating (KWH)": int(pattern.group(1).replace(",", "")),
"Water Heating (KWH)": int(pattern.group(2).replace(",", ""))
})
def extract_address(self):
"""
Extracts the full address from the report.
"""
pattern = re.search(
r"Address\s*(.*?)\nTown\s*(.*?)\n",
self.text,
re.DOTALL
)
if not pattern:
raise ValueError("No address found in the report")
full_address = pattern.group(1).strip()
self.data["Address"] = full_address
def extract_all(self):
"""
Runs all extraction methods and returns a dictionary with extracted data.
"""
self.extract_address()
self.extract_heating_consumption()
return self.data

View file

123
survey_report/template.html Normal file
View file

@ -0,0 +1,123 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Domna Energy Report</title>
<style>
body {
font-family: Arial, sans-serif;
background-color: #ffffff;
color: #333;
margin: 0;
padding: 0;
display: flex;
justify-content: center;
}
.container {
width: 100%;
max-width: 1300px;
margin: 20px auto;
}
.header {
background-color: #1B1F3B;
color: white;
padding: 30px;
display: flex;
justify-content: space-between;
align-items: center;
border-radius: 12px;
}
.header h1 {
margin: 5;
font-size: 24px;
}
.header p {
margin: 5px 0 0;
font-size: 16px;
color: #d1d5db;
}
.logo img {
height: 60px;
}
/* EPC Rating Cards */
.epc-container {
display: flex;
justify-content: space-between;
gap: 20px;
margin-top: 30px;
}
.epc-card {
background-color: white;
border: 2px solid #ccc;
border-radius: 10px;
padding: 20px;
flex: 1;
display: flex;
flex-direction: column;
justify-content: space-between; /* Pushes SAP to bottom */
align-items: center;
text-align: center;
box-shadow: 2px 2px 10px rgba(0, 0, 0, 0.1);
position: relative;
height: 160px;
}
.epc-title {
font-size: 18px;
font-weight: bold;
color: #666;
}
.epc-rating {
font-size: 50px;
font-weight: bold;
}
.sap-rating {
font-size: 18px;
color: #555;
position: absolute;
bottom: 10px;
right: 20px;
}
.before .epc-rating {
color: #1B1F3B; /* Medium Blue */
}
.after .epc-rating {
color: #D4AF37; /* Gold */
}
</style>
</head>
<body>
<div class="container">
<!-- Header Section -->
<div class="header">
<div>
<h1>Domna Energy Report</h1>
<p>${address}</p> <!-- Address Placeholder -->
</div>
<div class="logo">
<img src="${logo_path}" alt="Domna Logo">
</div>
</div>
<!-- EPC Rating Cards -->
<div class="epc-container">
<div class="epc-card before">
<div class="epc-title">Current EPC Rating</div>
<div class="epc-rating">${current_epc}</div>
<div class="sap-rating">SAP ${current_sap}</div>
</div>
<div class="epc-card after">
<div class="epc-title">Potential EPC Rating</div>
<div class="epc-rating">${potential_epc}</div>
<div class="sap-rating">SAP ${potential_sap}</div>
</div>
</div>
</div>
</body>
</html>

View file

@ -0,0 +1,49 @@
import re
import boto3
import PyPDF2
import fitz
class OsmosisConditionReportParser:
def __init__(self, filekey, bucket_name=None):
self.s3_client = boto3.client('s3')
self.bucket_name = bucket_name
self.filekey = filekey
self.pdf_text = None
self._read_file()
def _read_file(self):
"""
Reads the XML file either locally or from S3 and parses it using minidom.
Raises:
ValueError: If the file cannot be found, read, or parsed.
"""
chunk_size = 10
try:
if self.bucket_name:
# Read from S3
raise NotImplementedError("Imeplement me")
else:
with fitz.open(self.filekey) as pdf:
text = ""
for page in pdf:
text += page.get_text()
# Parse the XML content using minidom
self.pdf_text = text
except FileNotFoundError:
raise ValueError(f"Local file not found: {self.filekey}")
except Exception as e:
raise ValueError(f"An error occurred while reading or parsing the XML: {e}")
def extract(self):
return {
"No. of Bedrooms": int(re.search(r"No\. of Bedrooms \(Total\)\s*(\d+)", self.pdf_text).group(1)),
"Risk Assessment Pathway": re.search(r"Risk\s*Assessment\s*Pathway\s*([A-Z])", self.pdf_text).group(1)
}

File diff suppressed because it is too large Load diff

Some files were not shown because too many files have changed in this diff Show more