Merge branch 'main' into anchor-sal

This commit is contained in:
Daniel Roth 2026-02-10 14:48:47 +00:00
commit 8fb58ebe56
98 changed files with 3600 additions and 351 deletions

View file

@ -27,8 +27,9 @@ RUN useradd -m -s /usr/bin/bash ${USER} \
# # 4) Python deps - if you want to run assest list # # 4) Python deps - if you want to run assest list
ENV PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1 ENV PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1
ADD asset_list/requirements.txt requirements.txt ADD .devcontainer/asset_list/requirements.txt requirements2.txt
RUN pip install -r requirements.txt ADD asset_list/requirements.txt requirements1.txt
RUN cat requirements1.txt requirements2.txt >> requirements.txt
RUN pip install -r requirements.txt RUN pip install -r requirements.txt
# 5) Workdir # 5) Workdir

View file

@ -15,10 +15,9 @@ uvicorn[standard]
pytest==9.0.2 pytest==9.0.2
pytest-cov==7.0.0 pytest-cov==7.0.0
ipykernel>=6.25,<7 ipykernel>=6.25,<7
pydantic-settings<2
pyyaml>=6.0.1 pyyaml>=6.0.1
pydantic>=1.10.7,<2
sqlmodel sqlmodel
# Formatting # Formatting
black==26.1.0 black==26.1.0
dotenv dotenv
pydantic-settings

View file

@ -13,6 +13,9 @@ on:
required: false required: false
default: "." default: "."
type: string type: string
build_args:
required: false
type: string
outputs: outputs:
image_digest: image_digest:
@ -29,11 +32,22 @@ on:
required: true required: true
AWS_REGION: AWS_REGION:
required: true required: true
DEV_DB_HOST:
required: false
DEV_DB_PORT:
required: false
DEV_DB_NAME:
required: false
jobs: jobs:
build: build:
runs-on: ubuntu-latest runs-on: ubuntu-latest
env:
DEV_DB_HOST: ${{ secrets.DEV_DB_HOST }}
DEV_DB_PORT: ${{ secrets.DEV_DB_PORT }}
DEV_DB_NAME: ${{ secrets.DEV_DB_NAME }}
outputs: outputs:
image_digest: ${{ steps.digest.outputs.image_digest }} image_digest: ${{ steps.digest.outputs.image_digest }}
ecr_repo_url: ${{ steps.repo.outputs.ecr_repo_url }} ecr_repo_url: ${{ steps.repo.outputs.ecr_repo_url }}
@ -64,7 +78,22 @@ jobs:
- name: Build & push image - name: Build & push image
run: | run: |
IMAGE_URI="${{ steps.repo.outputs.ecr_repo_url }}:${GITHUB_SHA}" IMAGE_URI="${{ steps.repo.outputs.ecr_repo_url }}:${GITHUB_SHA}"
docker build -f ${{ inputs.dockerfile_path }} -t $IMAGE_URI ${{ inputs.build_context }}
# Writes build args and removes line breaks
BUILD_ARGS=""
while IFS= read -r line; do
# skip empty lines
[ -n "$line" ] || continue
temp=$(eval echo "$line")
BUILD_ARGS="$BUILD_ARGS --build-arg $temp"
done <<< "${{ inputs.build_args }}"
docker build \
-f ${{ inputs.dockerfile_path }} \
$BUILD_ARGS \
-t $IMAGE_URI \
${{ inputs.build_context }}
docker push $IMAGE_URI docker push $IMAGE_URI
- name: Resolve image digest - name: Resolve image digest

View file

@ -16,6 +16,7 @@ jobs:
id: set-stage id: set-stage
shell: bash shell: bash
run: | run: |
env
BRANCH="${GITHUB_REF_NAME}" BRANCH="${GITHUB_REF_NAME}"
if [[ "$BRANCH" == "prod" ]]; then if [[ "$BRANCH" == "prod" ]]; then
@ -73,8 +74,8 @@ jobs:
uses: ./.github/workflows/_build_image.yml uses: ./.github/workflows/_build_image.yml
with: with:
ecr_repo: address2uprn-${{ needs.determine_stage.outputs.stage }} ecr_repo: address2uprn-${{ needs.determine_stage.outputs.stage }}
dockerfile_path: backend/address2UPRN/Dockerfile dockerfile_path: backend/address2UPRN/handler/Dockerfile
build_context: backend/address2UPRN build_context: .
secrets: secrets:
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
@ -96,3 +97,76 @@ jobs:
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
AWS_REGION: ${{ secrets.DEV_AWS_REGION }} AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
# ============================================================
# 2⃣ Build Postcode Splitter image and Push
# ============================================================
postcodeSplitter_image:
needs: [determine_stage, shared_terraform]
uses: ./.github/workflows/_build_image.yml
with:
ecr_repo: postcode_splitter-${{ needs.determine_stage.outputs.stage }}
dockerfile_path: backend/postcode_splitter/handler/Dockerfile
build_context: .
secrets:
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
# ============================================================
# 3⃣ Deploy Postcode Splitter Lambda
# ============================================================
postcodeSplitter_lambda:
needs: [postcodeSplitter_image, determine_stage]
uses: ./.github/workflows/_deploy_lambda.yml
with:
lambda_name: postcodeSplitter
lambda_path: infrastructure/terraform/lambda/postcodeSplitter
stage: ${{ needs.determine_stage.outputs.stage }}
ecr_repo: postcode_splitter-${{ needs.determine_stage.outputs.stage }}
image_digest: ${{ needs.postcodeSplitter_image.outputs.image_digest }}
secrets:
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
# ============================================================
# Condition ETL image and Push
# ============================================================
condition_etl_image:
needs: [determine_stage, shared_terraform]
uses: ./.github/workflows/_build_image.yml
with:
ecr_repo: condition-etl-${{ needs.determine_stage.outputs.stage }}
dockerfile_path: backend/condition/handler/Dockerfile
build_context: .
build_args: |
DEV_DB_HOST=$DEV_DB_HOST
DEV_DB_PORT=$DEV_DB_PORT
DEV_DB_NAME=$DEV_DB_NAME
secrets:
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
DEV_DB_HOST: ${{ secrets.DEV_DB_HOST }}
DEV_DB_PORT: ${{ secrets.DEV_DB_PORT }}
DEV_DB_NAME: ${{ secrets.DEV_DB_NAME }}
# ============================================================
# Deploy Condition ETL Lambda
# ============================================================
condition_etl_lambda:
needs: [condition_etl_image, determine_stage]
uses: ./.github/workflows/_deploy_lambda.yml
with:
lambda_name: condition-etl
lambda_path: infrastructure/terraform/lambda/condition-etl
stage: ${{ needs.determine_stage.outputs.stage }}
ecr_repo: condition-etl-${{ needs.determine_stage.outputs.stage }}
image_digest: ${{ needs.condition_etl_image.outputs.image_digest }}
secrets:
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}

View file

@ -69,14 +69,51 @@ def app():
Property UPRN Property UPRN
""" """
data_folder = "/workspaces/home/Downloads" data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Aspire"
data_filename = "Anchor 1.xlsx" data_filename = "ASPIRE ASSET LIST.xlsx"
sheet_name = "Asset List"
postcode_column = "Postcode"
address1_column = None
address1_method = "house_number_extraction"
fulladdress_column = "Address"
address_cols_to_concat = []
missing_postcodes_method = None
landlord_year_built = None
landlord_os_uprn = None
landlord_property_type = "Property Type"
landlord_built_form = None
landlord_wall_construction = None
landlord_roof_construction = None
landlord_heating_system = None
landlord_existing_pv = None
landlord_property_id = "LLUPRN"
landlord_sap = None
outcomes_filename = None
outcomes_sheetname = None
outcomes_postcode = None
outcomes_houseno = None
outcomes_id = None
outcomes_address = None
master_filepaths = []
master_id_colnames = []
master_to_asset_list_filepath = None
phase = False
ecosurv_landlords = None
asset_list_header = 0
landlord_block_reference = None
# Peabody data for cleaning
data_folder = (
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
"Project/data_validation"
)
data_filename = "to_standardise_uprns.xlsx"
sheet_name = "Sheet1" sheet_name = "Sheet1"
postcode_column = "Postcode" postcode_column = "Postcode"
address1_column = "House Number" address1_column = None
address1_method = None address1_method = "house_number_extraction"
fulladdress_column = None fulladdress_column = "Address"
address_cols_to_concat = ["House Number", "Address Line 1", "Address Line 2"] address_cols_to_concat = None
missing_postcodes_method = None missing_postcodes_method = None
landlord_year_built = None landlord_year_built = None
landlord_os_uprn = None landlord_os_uprn = None

View file

@ -520,4 +520,14 @@ BUILT_FORM_MAPPINGS = {
'2.EXT.WALL FLAT': 'mid-terrace', '2.EXT.WALL FLAT': 'mid-terrace',
'2 EXT. WALL FLAT': 'mid-terrace', '2 EXT. WALL FLAT': 'mid-terrace',
'Maisonette: Detached: Ground Floor': 'detached',
'Maisonette: Enclosed End Terrace: Top Floor': 'enclosed end-terrace',
'Flat: End Terrace: Basement': 'end-terrace',
'Flat: Mid Terrace: Basement': 'mid-terrace',
'Flat: Enclosed Mid Terrace: Basement': 'enclosed mid-terrace',
'House: Semi Detached: Top Floor': 'semi-detached',
'House: End Terrace: Ground Floor': 'end-terrace',
'Maisonette: Enclosed End Terrace: Mid Floor': 'enclosed end-terrace',
'Bungalow: EnclosedEndTerrace': 'enclosed end-terrace'
} }

View file

@ -17,5 +17,10 @@ EXISTING_PV_MAPPINGS = {
'PV: 10% roof area, PV: 2kWp array': 'already has PV', 'PV: 10% roof area, PV: 2kWp array': 'already has PV',
'PV: 50% roof area': 'already has PV', 'PV: 50% roof area': 'already has PV',
'Solar PV': 'already has PV', 'Solar PV': 'already has PV',
'SOLAR PV': 'already has PV' 'SOLAR PV': 'already has PV',
'PV: 40% roof area, PV: 2kWp array': 'already has PV',
'PV: 33% roof area, PV: 2kWp array': 'already has PV',
'PV: 30% roof area': 'already has PV'
} }

View file

@ -494,6 +494,10 @@ HEATING_MAPPINGS = {
'Gas (including LPG) room heaters: Gas fire, open flue, 1980 or later (open fronted), sitting proud of, ' 'Gas (including LPG) room heaters: Gas fire, open flue, 1980 or later (open fronted), sitting proud of, '
'and sealed to, fireplace opening': 'room heaters', 'and sealed to, fireplace opening': 'room heaters',
'Boiler: A rated Regular Boiler, System 2: Boiler: C rated Regular Boiler': 'boiler - other fuel', 'Boiler: A rated Regular Boiler, System 2: Boiler: C rated Regular Boiler': 'boiler - other fuel',
'Boiler: G rated Combi': 'gas condensing combi' 'Boiler: G rated Combi': 'gas condensing combi',
'Boiler: A rated Combi, System 2: Boiler: A rated Combi': 'gas combi boiler',
'System 2: Boiler: A rated Regular Boiler, Boiler: A rated Regular Boiler': 'gas boiler, radiators',
'Boiler: A rated Combi, System 2: Boiler: C rated Combi': 'gas combi boiler'
} }

View file

@ -427,6 +427,23 @@ PROPERTY_MAPPING = {
'End Terrace': 'unknown', 'End Terrace': 'unknown',
'Detached': 'unknown', 'Detached': 'unknown',
'Mid-terrace': 'unknown', 'Mid-terrace': 'unknown',
'MID - TERRACE': 'unknown' 'MID - TERRACE': 'unknown',
'COMOFF': 'unknown',
'LOTS': 'unknown',
'Maisonette: Detached: Ground Floor': 'maisonette',
'Maisonette: Enclosed End Terrace: Top Floor': 'maisonette',
'Flat: End Terrace: Basement': 'flat',
'Bungalow: EnclosedEndTerrace': 'bungalow',
'Flat: Mid Terrace: Basement': 'flat',
'House: Semi Detached: Top Floor': 'house',
'House: End Terrace: Ground Floor': 'house',
'Maisonette: Enclosed End Terrace: Mid Floor': 'maisonette',
'Flat: Enclosed Mid Terrace: Basement': 'flat',
'Warden Bungalow': 'bungalow',
'Warden Flat': 'flat',
'Upper Floor Flat': 'flat',
'Extracare Scheme': 'other'
} }

View file

@ -301,4 +301,13 @@ ROOF_CONSTRUCTION_MAPPINGS = {
'PitchedWithSlopingCeiling: As Built': 'pitched insulated', 'PitchedWithSlopingCeiling: As Built': 'pitched insulated',
'PitchedNormalLoftAccess: As Built': 'pitched unknown insulation', 'PitchedNormalLoftAccess: As Built': 'pitched unknown insulation',
'Flat: 150mm, Flat: Unknown': 'flat insulated',
'AnotherDwellingAbove: Unknown, Flat: Unknown': 'another dwelling above',
'AnotherDwellingAbove, AnotherDwellingAbove: Unknown': 'another dwelling above',
'PitchedNormalNoLoftAccess: Unknown, PitchedWithSlopingCeiling: As Built': 'pitched unknown access to loft',
'Flat: No Insulation': 'flat uninsulated',
'AnotherDwellingAbove: Unknown, PitchedNormalLoftAccess: 250mm': 'another dwelling above',
'PitchedNormalLoftAccess: 175mm': 'pitched insulated',
'AnotherDwellingAbove: 300mm': 'another dwelling above'
} }

View file

@ -354,6 +354,15 @@ WALL_CONSTRUCTION_MAPPINGS = {
'System built Internal': 'insulated system built', 'System built Internal': 'insulated system built',
'Cavity: AsBuilt (1976-1982), TimberFrame: AsBuilt': 'cavity unknown insulation', 'Cavity: AsBuilt (1976-1982), TimberFrame: AsBuilt': 'cavity unknown insulation',
'Cavity: FilledCavityPlusExternal': 'filled cavity' 'Cavity: FilledCavityPlusExternal': 'filled cavity',
'Cavity, Filled Cavity': 'filled cavity',
'Solid Brick, As Built': 'solid brick unknown insulation',
'Cavity, As Built': 'cavity unknown insulation',
'Sandstone, As Built': 'sandstone or limestone unknown insulation',
'Timber Frame, As Built': 'timber frame unknown insulation',
'Solid Brick, Internal Insulation': 'insulated solid brick',
'Granite or Whinstone, As Built': 'granite or whinstone unknown insulation',
'Solid Brick, External': 'insulated solid brick'
} }

View file

@ -1,22 +0,0 @@
DB_HOST=db
DB_PORT=5432
DB_NAME=tech_team_local_db
DB_USERNAME=postgres
DB_PASSWORD=makingwarmerhomes
#not used
GOOGLE_SOLAR_API_KEY="test"
SAP_PREDICTIONS_BUCKET="test"
CARBON_PREDICTIONS_BUCKET="test"
HEAT_PREDICTIONS_BUCKET="test"
HEATING_KWH_PREDICTIONS_BUCKET="test"
HOTWATER_KWH_PREDICTIONS_BUCKET="test"
API_KEY="test"
ENVIRONMENT="test"
SECRET_KEY="test"
PLAN_TRIGGER_BUCKET="test"
DATA_BUCKET="test"
EPC_AUTH_TOKEN="test"
ENGINE_SQS_URL="test"
ENERGY_ASSESSMENTS_BUCKET="test"

22
backend/.env.test Normal file
View file

@ -0,0 +1,22 @@
DB_HOST=db
DB_PORT=5432
DB_NAME=tech_team_local_db
DB_USERNAME=postgres
DB_PASSWORD=makingwarmerhomes
#not used
GOOGLE_SOLAR_API_KEY=test
SAP_PREDICTIONS_BUCKET=test
CARBON_PREDICTIONS_BUCKET=test
HEAT_PREDICTIONS_BUCKET=test
HEATING_KWH_PREDICTIONS_BUCKET=test
HOTWATER_KWH_PREDICTIONS_BUCKET=test
API_KEY=test
ENVIRONMENT=test
SECRET_KEY=test
PLAN_TRIGGER_BUCKET=test
DATA_BUCKET=test
EPC_AUTH_TOKEN=test
ENGINE_SQS_URL=test
ENERGY_ASSESSMENTS_BUCKET=test

View file

@ -1256,7 +1256,8 @@ class Property:
"biodiesel": "Smokeless Fuel", "biodiesel": "Smokeless Fuel",
"b30d": "B30K Biofuel", "b30d": "B30K Biofuel",
"coal": "Coal", "coal": "Coal",
"oil": "Oil" "oil": "Oil",
"unknown": None # Handle - anything post 2020 is electricity else gas
} }
self.heating_energy_source = list({ self.heating_energy_source = list({
@ -1326,7 +1327,16 @@ class Property:
if self.heating_energy_source == "Varied (Community Scheme)": if self.heating_energy_source == "Varied (Community Scheme)":
if self.main_fuel["fuel_type"] in fuel_map: # We assume when None as it's unknown if self.main_fuel["fuel_type"] in fuel_map: # We assume when None as it's unknown
self.heating_energy_source = fuel_map[self.main_fuel["fuel_type"]] mapped_to = fuel_map[self.main_fuel["fuel_type"]]
if mapped_to is None and self.main_fuel["fuel_type"] == "unknown":
# Handle logic based on age band
if self.year_built >= 2020:
self.heating_energy_source = "Electricity"
else:
self.heating_energy_source = "Natural Gas (Community Scheme)"
else:
self.heating_energy_source = mapped_to
else: else:
raise NotImplementedError(f"Unhandled fuel {self.main_fuel['fuel_type']}") raise NotImplementedError(f"Unhandled fuel {self.main_fuel['fuel_type']}")

View file

@ -1,7 +0,0 @@
FROM public.ecr.aws/lambda/python:3.10
# Copy function code
COPY main.py .
# Set the handler
CMD ["main.handler"]

View file

@ -0,0 +1,23 @@
FROM public.ecr.aws/lambda/python:3.10
# Set working directory (Lambda task root)
WORKDIR /var/task
# -----------------------------
# Copy requirements FIRST (for Docker layer caching)
# -----------------------------
COPY backend/address2UPRN/handler/requirements.txt .
# Install dependencies into Lambda runtime
RUN pip install --no-cache-dir -r requirements.txt
# -----------------------------
# Copy application code
# -----------------------------
COPY utils/ utils/
COPY backend/address2UPRN/main.py .
# -----------------------------
# Lambda handler
# -----------------------------
CMD ["main.handler"]

View file

@ -0,0 +1,3 @@
epc-api-python==1.0.2
tqdm
pandas

View file

@ -212,6 +212,8 @@ def get_epc_data_with_postcode(postcode, size=500, attempt=1, max_attempts=3):
method="get", method="get",
params={"postcode": postcode}, params={"postcode": postcode},
) )
if not search_resp or "rows" not in search_resp:
return pd.DataFrame()
results_df = pd.DataFrame(search_resp["rows"], columns=search_resp["column-names"]) results_df = pd.DataFrame(search_resp["rows"], columns=search_resp["column-names"])
@ -298,7 +300,7 @@ def get_uprn_candidates(
) )
def get_uprn(user_inputed_address: str, postcode: str): def get_uprn(user_inputed_address: str, postcode: str, return_address=False):
""" """
Return uprn (str) Return uprn (str)
Return False if failed to find a sensible matching epc Return False if failed to find a sensible matching epc
@ -337,6 +339,8 @@ def get_uprn(user_inputed_address: str, postcode: str):
if found_uprn == "": if found_uprn == "":
return None return None
if return_address:
return found_uprn, address
return found_uprn return found_uprn

View file

@ -1,17 +1,24 @@
import pandas as pd import pandas as pd
from tqdm import tqdm
from backend.address2UPRN.main import get_uprn
# Enable tqdm for pandas
tqdm.pandas()
df = pd.read_excel("address2.xlsx")
# use Address 1 def extract_uprn(row):
junte_df = pd.read_excel("hackney_uprn_failures.xlsx") print(row["User Input"], row["Postcode"])
result = get_uprn(row["User Input"], row["Postcode"], return_address=True)
if result is None:
return pd.Series([None, None])
uprn, found_address = result
return pd.Series([uprn, found_address])
# use domna_address_1 df[["juntes uprn", "junte found address"]] = df.progress_apply(extract_uprn, axis=1)
khalim_df = pd.read_excel("khalim_standard.xlsx")
combined_df = junte_df.merge(khalim_df, how="left", left_on="Address 1", right_on='domna_address_1')
# Find the row in khalim_df that does not app
result = combined_df[~pd.isnull(combined_df["epc_os_uprn"])]
df.to_excel("outputs2.xlsx", index=False)

View file

@ -1,8 +1,22 @@
import os
from functools import lru_cache from functools import lru_cache
from pydantic_settings import BaseSettings from pydantic_settings import BaseSettings, SettingsConfigDict
from typing import Optional from typing import Optional
def resolve_env_file() -> Optional[str]:
env = os.getenv("ENVIRONMENT", "local")
if env == "local":
return "backend/.env"
if env == "test":
return "backend/.env.test"
# prod = no env file
return None
class Settings(BaseSettings): class Settings(BaseSettings):
API_KEY: str API_KEY: str
API_KEY_NAME: str = "X-API-KEY" API_KEY_NAME: str = "X-API-KEY"
@ -41,8 +55,10 @@ class Settings(BaseSettings):
AWS_SECRET_KEY_ID: Optional[str] = None AWS_SECRET_KEY_ID: Optional[str] = None
AWS_DEFAULT_REGION: Optional[str] = None AWS_DEFAULT_REGION: Optional[str] = None
class Config: model_config = SettingsConfigDict(
env_file = "backend/.env.local" env_file=resolve_env_file(),
env_file_encoding="utf-8",
)
@lru_cache() @lru_cache()

View file

@ -24,7 +24,7 @@ def get_cleaned():
cleaned = read_from_s3( cleaned = read_from_s3(
s3_file_name="cleaned_epc_data/cleaned.bson", s3_file_name="cleaned_epc_data/cleaned.bson",
bucket_name="retrofit-data-{environment}".format(environment=get_settings().ENVIRONMENT) bucket_name=get_settings().DATA_BUCKET
) )
cleaned = msgpack.unpackb(cleaned, raw=False) cleaned = msgpack.unpackb(cleaned, raw=False)

View file

@ -0,0 +1,33 @@
from enum import Enum
from typing import Optional
from pydantic import BaseModel
class ConditionFileType(Enum):
LBWF = "LBWF"
Peabody = "Peabody"
# TODO: make these asset management systems rather than client names
class ConditionTriggerRequest(BaseModel):
file_type: ConditionFileType
trigger_file_bucket: str # TODO: get this from settings
trigger_file_key: str
uprn_lookup_file_bucket: Optional[str] = None # TODO: get this from settings
uprn_lookup_file_key: Optional[str] = None
# {
# "file_type": "Peabody",
# "trigger_file_bucket": "condition-data-dev",
# "trigger_file_key": "input/peabody/2026_01_06 - Peabody - Stock Condition Data - Survey Records - D Lower.xlsx",
# "uprn_lookup_file_bucket": "condition-data-dev",
# "uprn_lookup_file_key": "input/peabody/uprn-lookup/PeabodyPropertymatched_Dec25_propref_UPRN.csv"
# }
# {
# "file_type": "LBWF",
# "trigger_file_bucket": "condition-data-dev",
# "trigger_file_key": "input/lbwf/LBWF - Example Asset Data September 2025.xlsx",
# }

View file

@ -1,4 +1,4 @@
from typing import Any, Dict, List, Optional, Tuple from typing import Any, Dict, Optional, Tuple
from datetime import date from datetime import date
from backend.condition.domain.aspect_condition import AspectCondition from backend.condition.domain.aspect_condition import AspectCondition

View file

@ -1,16 +0,0 @@
from enum import Enum
class FileType(Enum):
LBWF = "lbwf"
Peabody = "peabody"
def detect_file_type(filepath: str) -> FileType:
path = filepath.lower()
if "lbwf" in path:
return FileType.LBWF
if "peabody" in path:
return FileType.Peabody
raise ValueError("Unrecognised file path")

View file

@ -1,16 +0,0 @@
from typing import Mapping, Any
from io import BytesIO
from utils.logger import setup_logger
from backend.condition.processor import process_file
logger = setup_logger()
def handler(event: Mapping[str, Any], context: Any) -> None:
# Temporary stub for PoC wiring
dummy_stream = BytesIO(b"")
source_key = event.get("source_key", "unknown-source")
process_file(dummy_stream, source_key)

View file

@ -0,0 +1,48 @@
FROM public.ecr.aws/lambda/python:3.11
# For local running:
# FROM python:3.11.10-bullseye
ARG DEV_DB_HOST
ARG DEV_DB_PORT
ARG DEV_DB_NAME
# Set working directory (Lambda task root)
WORKDIR /var/task
# Environment
ENV DB_HOST=${DEV_DB_HOST}
ENV DB_PORT=${DEV_DB_PORT}
ENV DB_NAME=${DEV_DB_NAME}
COPY backend/.env.test backend/.env
# -----------------------------
# Copy requirements FIRST (for Docker layer caching)
# -----------------------------
COPY backend/condition/handler/requirements.txt .
# Install dependencies into Lambda runtime
RUN pip install --no-cache-dir -r requirements.txt
# -----------------------------
# Copy application code
# -----------------------------
COPY utils/ utils/
COPY backend/condition/ backend/condition/
COPY backend/app/db/models/condition.py backend/app/db/models/condition.py
COPY backend/app/db/connection.py backend/app/db/connection.py
COPY backend/app/config.py backend/app/config.py
COPY backend/__init__.py backend/__init__.py
COPY backend/app/__init__.py backend/app/__init__.py
COPY backend/app/db/__init__.py backend/app/db/__init__.py
# -----------------------------
# Lambda handler
# -----------------------------
CMD ["backend/condition/handler/handler.handler"]
# For local running
# CMD ["python", "-m", "backend.condition.handler.handler"]

View file

@ -0,0 +1,51 @@
import json
from typing import Mapping, Any
from io import BytesIO
from backend.condition.condition_trigger_request import ConditionTriggerRequest
from backend.condition.lookups.uprn_lookup_s3 import UprnLookupS3
from backend.condition.processor import process_file
from utils.logger import setup_logger
from utils.s3 import read_io_from_s3
logger = setup_logger()
def handler(event: Mapping[str, Any], context: Any) -> None:
for record in event.get("Records", []):
try:
body_dict = json.loads(record["body"])
logger.debug("Validating request body")
payload = ConditionTriggerRequest.model_validate(body_dict)
logger.debug("Successfully validated request body")
if payload.uprn_lookup_file_bucket and payload.uprn_lookup_file_key:
logger.debug("Getting UPRN lookup file from s3")
uprn_lookup = UprnLookupS3(
bucket=payload.uprn_lookup_file_bucket,
key=payload.uprn_lookup_file_key,
) # TODO: replace with postgres implementation
logger.debug("Successfully got UPRN lookup file from s3")
else:
uprn_lookup = None
logger.debug("Getting conditions data from s3")
file_bytes: BytesIO = read_io_from_s3(
bucket_name=payload.trigger_file_bucket,
file_key=payload.trigger_file_key,
)
logger.debug(
"Successfully got conditions data from s3. Moving on to process file..."
)
process_file(
file_stream=file_bytes,
file_type=payload.file_type,
uprn_lookup=uprn_lookup,
)
except Exception as e:
logger.error(f"Failed to process record: {e}")

View file

@ -0,0 +1,9 @@
openpyxl
sqlmodel
pydantic-settings
psycopg2-binary==2.9.10
# pandas isn't used, but needed for importing from utils.s3
pandas==2.2.2
numpy==1.26.4
openpyxl

View file

@ -1,5 +1,7 @@
from pathlib import Path from pathlib import Path
from backend.condition.condition_trigger_request import ConditionFileType
from backend.condition.lookups.uprn_lookup_csv import UprnLookupLocal
from backend.condition.processor import process_file from backend.condition.processor import process_file
@ -20,15 +22,27 @@ def main() -> None:
/ "peabody" / "peabody"
/ "2026_01_06 - Peabody - Stock Condition Data - Survey Records - D Lower.xlsx" / "2026_01_06 - Peabody - Stock Condition Data - Survey Records - D Lower.xlsx"
) )
filepaths = [lbwf_path, peabody_path] peabody_uprn_lookup_path: Path = (
# filepaths = [lbwf_path] path / "peabody" / "PeabodyPropertymatched_Dec25_propref_UPRN.csv"
)
# filepaths = [lbwf_path, peabody_path]
filepaths = [lbwf_path]
# filepaths = [peabody_path] # filepaths = [peabody_path]
uprn_lookup = UprnLookupLocal(csv_path=peabody_uprn_lookup_path.as_posix())
def get_file_type(file_path: str) -> ConditionFileType:
if "peabody" in file_path:
return ConditionFileType.Peabody
if "lbwf" in file_path:
return ConditionFileType.LBWF
for fp in filepaths: for fp in filepaths:
with fp.open("rb") as f: with fp.open("rb") as f:
process_file( process_file(
file_stream=f, file_stream=f,
source_key=fp.as_posix(), file_type=get_file_type(fp.as_posix()),
uprn_lookup=uprn_lookup,
) )

View file

@ -0,0 +1,8 @@
from abc import ABC, abstractmethod
from typing import BinaryIO, Dict
class UprnLookup(ABC):
@abstractmethod
def get_property_ref_to_uprn_lookup(self) -> Dict[str, int]:
pass

View file

@ -0,0 +1,23 @@
import csv
from io import TextIOWrapper
from typing import BinaryIO, Dict, TextIO
from backend.condition.lookups.uprn_lookup import UprnLookup
class UprnLookupLocal(UprnLookup):
def __init__(self, csv_path: str):
self.csv_path = csv_path
def get_property_ref_to_uprn_lookup(self) -> Dict[str, int]:
with open(self.csv_path, "rb") as f:
return self.parse_csv(f)
def parse_csv(self, file_stream: BinaryIO) -> Dict[str, int]:
text_stream: TextIO = TextIOWrapper(file_stream, encoding="utf-8")
mapping: Dict[str, int] = {}
reader = csv.DictReader(text_stream)
for row in reader:
if not row["reference"] or not row["out_uprn"]:
continue
mapping[row["reference"].strip()] = int(row["out_uprn"].strip())
return mapping

View file

@ -0,0 +1,29 @@
import csv
from io import BytesIO, TextIOWrapper
from typing import BinaryIO, Dict, TextIO
from backend.condition.lookups.uprn_lookup import UprnLookup
from utils.s3 import read_io_from_s3
class UprnLookupS3(UprnLookup):
def __init__(self, bucket: str = "", key: str = ""):
self.bucket = bucket
self.key = key
def get_property_ref_to_uprn_lookup(self) -> Dict[str, int]:
file_bytes: BytesIO = read_io_from_s3(
bucket_name=self.bucket, file_key=self.key
)
return self._parse_csv_bytes(file_bytes)
def _parse_csv_bytes(self, file_stream: BinaryIO) -> Dict[str, int]:
text_stream: TextIO = TextIOWrapper(file_stream, encoding="utf-8")
mapping: Dict[str, int] = {}
reader = csv.DictReader(text_stream)
for row in reader:
if not row["reference"] or not row["out_uprn"]:
continue
mapping[row["reference"].strip()] = int(row["out_uprn"].strip())
return mapping

View file

@ -1,27 +1,35 @@
from typing import Optional
from backend.condition.condition_trigger_request import ConditionFileType
from backend.condition.domain.mapping.lbwf.lbwf_mapper import LbwfMapper from backend.condition.domain.mapping.lbwf.lbwf_mapper import LbwfMapper
from backend.condition.domain.mapping.mapper import Mapper from backend.condition.domain.mapping.mapper import Mapper
from backend.condition.domain.mapping.peabody.peabody_mapper import PeabodyMapper from backend.condition.domain.mapping.peabody.peabody_mapper import PeabodyMapper
from backend.condition.file_type import FileType from backend.condition.lookups.uprn_lookup import UprnLookup
from backend.condition.parsing.parser import Parser from backend.condition.parsing.parser import Parser
from backend.condition.parsing.lbwf_parser import LbwfParser from backend.condition.parsing.lbwf_parser import LbwfParser
from backend.condition.parsing.peabody_parser import PeabodyParser from backend.condition.parsing.peabody_parser import PeabodyParser
def select_parser(file_type: FileType) -> Parser: def select_parser(
if file_type is FileType.LBWF: file_type: ConditionFileType, uprn_lookup: Optional[UprnLookup] = None
) -> Parser:
if file_type is ConditionFileType.LBWF:
return LbwfParser() return LbwfParser()
if file_type is FileType.Peabody: if file_type is ConditionFileType.Peabody:
return PeabodyParser() if not uprn_lookup:
raise ValueError(
"Cannot instantiate Peabody Parser without UPRN lookup being provided"
)
return PeabodyParser(uprn_lookup=uprn_lookup)
raise ValueError("Unrecognised file type, unable to instantiate Parser") raise ValueError("Unrecognised file type, unable to instantiate Parser")
def select_mapper(file_type: FileType) -> Mapper: def select_mapper(file_type: ConditionFileType) -> Mapper:
if file_type is FileType.LBWF: if file_type is ConditionFileType.LBWF:
return LbwfMapper() return LbwfMapper()
if file_type is FileType.Peabody: if file_type is ConditionFileType.Peabody:
return PeabodyMapper() return PeabodyMapper()
raise ValueError("Unrecognised file type, unable to instantiate Mapper") raise ValueError("Unrecognised file type, unable to instantiate Mapper")

View file

@ -18,7 +18,6 @@ class LbwfParser(Parser):
def parse( def parse(
self, self,
file_stream: BinaryIO, file_stream: BinaryIO,
location_ref_to_uprn_map: Optional[Dict[str, int]] = None,
) -> Any: ) -> Any:
wb: Workbook = load_workbook(file_stream) wb: Workbook = load_workbook(file_stream)
address_to_uprn_map: Dict[str, int] = LbwfParser._generate_address_to_uprn_dict( address_to_uprn_map: Dict[str, int] = LbwfParser._generate_address_to_uprn_dict(

View file

@ -8,6 +8,5 @@ class Parser(ABC):
def parse( def parse(
self, self,
file_stream: BinaryIO, file_stream: BinaryIO,
location_ref_to_uprn_map: Optional[Dict[str, int]] = None,
) -> Any: ) -> Any:
pass pass

View file

@ -4,6 +4,7 @@ from typing import Any, BinaryIO, Dict, List, Optional, Tuple, DefaultDict
from openpyxl import Workbook, load_workbook from openpyxl import Workbook, load_workbook
from collections import defaultdict from collections import defaultdict
from backend.condition.lookups.uprn_lookup import UprnLookup
from backend.condition.parsing.parser import Parser from backend.condition.parsing.parser import Parser
from backend.condition.parsing.records.peabody.peabody_asset_condition import ( from backend.condition.parsing.records.peabody.peabody_asset_condition import (
PeabodyAssetCondition, PeabodyAssetCondition,
@ -15,42 +16,29 @@ logger = setup_logger()
class PeabodyParser(Parser): class PeabodyParser(Parser):
def __init__(self, uprn_lookup: UprnLookup):
self.uprn_lookup: UprnLookup = uprn_lookup # TODO: move this to the ABC?
def parse( def parse(
self, self,
file_stream: BinaryIO, file_stream: BinaryIO,
location_ref_to_uprn_map: Optional[Dict[str, int]] = None,
) -> Any: ) -> Any:
wb: Workbook = load_workbook(file_stream) file_stream.seek(0)
logger.debug("[PeabodyParser] Loading workbook...")
if location_ref_to_uprn_map is None: wb: Workbook = load_workbook(file_stream, read_only=True, data_only=True)
location_ref_to_uprn_map: Dict[str, int] = ( logger.debug("[PeabodyParser] Successfully loaded workbook. Parsing assets...")
PeabodyParser._build_location_ref_to_uprn_map()
)
assets = PeabodyParser._parse_assets(wb) assets = PeabodyParser._parse_assets(wb)
logger.debug(
"[PeabodyParser] Successfully parsed assets. Parsing UPRN lookup..."
)
location_ref_to_uprn_map = self.uprn_lookup.get_property_ref_to_uprn_lookup()
logger.debug("[PeabodyParser] Successfully parsed UPRN lookup")
return PeabodyParser._group_assets_into_properties( return PeabodyParser._group_assets_into_properties(
assets=assets, assets=assets,
location_ref_to_uprn_map=location_ref_to_uprn_map, location_ref_to_uprn_map=location_ref_to_uprn_map,
) )
@staticmethod
def _build_location_ref_to_uprn_map() -> Dict[str, int]:
location_ref_to_uprn_filepath: Path = (
Path(__file__).resolve().parents[1]
/ "sample_data"
/ "peabody"
/ "PeabodyPropertymatched_Dec25_propref_UPRN.csv"
)
location_ref_to_uprn_map: Dict[str, int] = {}
with location_ref_to_uprn_filepath.open(newline="") as f:
reader: Any = csv.DictReader(f)
for row in reader:
location_ref_to_uprn_map[row["reference"]] = int(row["out_uprn"])
return location_ref_to_uprn_map
@staticmethod @staticmethod
def _parse_assets(wb: Workbook) -> List[PeabodyAssetCondition]: def _parse_assets(wb: Workbook) -> List[PeabodyAssetCondition]:
assets_sheet = wb["Survey Records - D & Lower"] assets_sheet = wb["Survey Records - D & Lower"]
@ -67,7 +55,7 @@ class PeabodyParser(Parser):
) )
if not asset.is_block_level: if not asset.is_block_level:
# Block-level condition surveys are out of scope for now # Block-level condition surveys are out of scope for now
# until we have a wider think on how to handle block # until we have a wider think on how to handle blocks
assets.append(asset) # TODO: handle block-level assets assets.append(asset) # TODO: handle block-level assets
except Exception as e: except Exception as e:
@ -92,13 +80,14 @@ class PeabodyParser(Parser):
assets_by_location_reference[asset.lo_reference].append(asset) assets_by_location_reference[asset.lo_reference].append(asset)
properties: List[PeabodyProperty] = [] properties: List[PeabodyProperty] = []
failed_mappings_count = 0
for location_ref, grouped_assets in assets_by_location_reference.items(): for location_ref, grouped_assets in assets_by_location_reference.items():
uprn = location_ref_to_uprn_map.get(location_ref) uprn = location_ref_to_uprn_map.get(location_ref)
if uprn is None: if uprn is None:
logger.warning(f"No UPRN found for Location Reference: {location_ref}") failed_mappings_count += 1
continue continue
properties.append( properties.append(
@ -108,6 +97,7 @@ class PeabodyParser(Parser):
) )
) )
logger.warning(f"No UPRN found for {failed_mappings_count} Location References")
return properties return properties
@staticmethod @staticmethod

View file

@ -19,18 +19,19 @@ class ConditionPostgres:
def bulk_insert_surveys( def bulk_insert_surveys(
self, surveys: List[PropertyConditionSurvey], batch_size: Optional[int] = 100 self, surveys: List[PropertyConditionSurvey], batch_size: Optional[int] = 100
) -> None: ) -> None:
logger.info( logger.debug(
f"Preparing to load {len(surveys)} property surveys to Postgres. Mapping to SQLModel objects..." f"[ConditionPostgres] Preparing to load {len(surveys)} property surveys to Postgres. Mapping to SQLModel objects..."
) )
survey_models: List[PropertyConditionSurveyModel] = [ survey_models: List[PropertyConditionSurveyModel] = [
ConditionPostgres.map_survey_to_model(s) for s in surveys ConditionPostgres.map_survey_to_model(s) for s in surveys
] ]
total: int = len(survey_models) total: int = len(survey_models)
logger.info( logger.debug(
f"Finished mapping {total} surveys. Writing to database in batches of {batch_size}..." f"[ConditionPostgres] Finished mapping {total} surveys. Writing to database in batches of {batch_size}..."
) )
with db_session() as session: with db_session() as session:
logger.info("[ConditionPostgres] Successfully made connection to database")
for start in range(0, total, batch_size): for start in range(0, total, batch_size):
end = min(start + batch_size, total) end = min(start + batch_size, total)
batch = survey_models[start:end] batch = survey_models[start:end]

View file

@ -1,26 +1,31 @@
from typing import Any, BinaryIO, List from typing import Any, BinaryIO, List, Optional
from datetime import datetime from datetime import datetime
from backend.condition.condition_trigger_request import ConditionFileType
from backend.condition.lookups.uprn_lookup import UprnLookup
from utils.logger import setup_logger from utils.logger import setup_logger
from backend.condition.domain.mapping.mapper import Mapper from backend.condition.domain.mapping.mapper import Mapper
from backend.condition.domain.property_condition_survey import PropertyConditionSurvey from backend.condition.domain.property_condition_survey import PropertyConditionSurvey
from backend.condition.parsing.parser import Parser from backend.condition.parsing.parser import Parser
from backend.condition.persistence.condition_postgres import ConditionPostgres from backend.condition.persistence.condition_postgres import ConditionPostgres
from backend.condition.file_type import FileType, detect_file_type
from backend.condition.parsing.factory import select_parser, select_mapper from backend.condition.parsing.factory import select_parser, select_mapper
logger = setup_logger() logger = setup_logger()
def process_file(file_stream: BinaryIO, source_key: str) -> None: def process_file(
logger.info(f"[processor] Received file: {source_key}") file_stream: BinaryIO,
file_type: ConditionFileType,
uprn_lookup: Optional[UprnLookup],
) -> None:
# Instantiation # Instantiation
file_type: FileType = detect_file_type(source_key) logger.debug(f"[processor] Instantiating classes...")
parser: Parser = select_parser(file_type) parser: Parser = select_parser(file_type, uprn_lookup)
mapper: Mapper = select_mapper(file_type) mapper: Mapper = select_mapper(file_type)
persistence = ConditionPostgres() persistence = ConditionPostgres()
logger.debug(f"[processor] Finished instantiating classes. Calling Parser...")
# Orchestration # Orchestration
raw_properties: List[Any] = parser.parse(file_stream) raw_properties: List[Any] = parser.parse(file_stream)

View file

@ -0,0 +1,34 @@
import pytest
from typing import Dict
from tempfile import NamedTemporaryFile
from backend.condition.lookups.uprn_lookup_csv import UprnLookupLocal
@pytest.fixture
def prop_ref_uprn_csv_file() -> str:
csv_content = """reference,out_uprn
ABC123,10000000001
DEF456,10000000002
GHI789,10000000003
"""
with NamedTemporaryFile(mode="w+", delete=False, suffix=".csv") as tmp:
tmp.write(csv_content)
tmp.flush()
return tmp.name
def test_generate_prop_ref_uprn_from_csv_file(prop_ref_uprn_csv_file: str) -> None:
# arrange
uprn_lookup = UprnLookupLocal(prop_ref_uprn_csv_file)
expected_map: Dict[str, int] = {
"ABC123": 10000000001,
"DEF456": 10000000002,
"GHI789": 10000000003,
}
# act
actual_map: Dict[str, int] = uprn_lookup.get_property_ref_to_uprn_lookup()
# assert
assert actual_map == expected_map

View file

@ -1,11 +1,13 @@
import pytest import pytest
from backend.condition.condition_trigger_request import ConditionFileType
from backend.condition.lookups.uprn_lookup_csv import UprnLookupLocal
from backend.condition.parsing.factory import select_parser from backend.condition.parsing.factory import select_parser
from backend.condition.file_type import FileType
def test_selects_lbwf_parser(): def test_selects_lbwf_parser():
# arrange # arrange
file_type = FileType.LBWF file_type = ConditionFileType.LBWF
expected_class_name = "LbwfParser" expected_class_name = "LbwfParser"
# act # act
@ -14,13 +16,15 @@ def test_selects_lbwf_parser():
# assert # assert
assert expected_class_name == actual_class_name assert expected_class_name == actual_class_name
def test_selects_peabody_parser(): def test_selects_peabody_parser():
# arrange # arrange
file_type = FileType.Peabody file_type = ConditionFileType.Peabody
expected_class_name = "PeabodyParser" expected_class_name = "PeabodyParser"
uprn_lookup = UprnLookupLocal(csv_path="test")
# act # act
actual_class_name = select_parser(file_type).__class__.__name__ actual_class_name = select_parser(file_type, uprn_lookup).__class__.__name__
# assert # assert
assert expected_class_name == actual_class_name assert expected_class_name == actual_class_name

View file

@ -1,9 +1,11 @@
from tempfile import NamedTemporaryFile
import pytest import pytest
from typing import Any, Dict from typing import Any, Dict
from io import BytesIO from io import BytesIO
from openpyxl import Workbook from openpyxl import Workbook
from datetime import datetime from datetime import datetime
from backend.condition.lookups.uprn_lookup_csv import UprnLookupLocal
from backend.condition.parsing.peabody_parser import PeabodyParser from backend.condition.parsing.peabody_parser import PeabodyParser
from backend.condition.parsing.records.peabody.peabody_asset_condition import ( from backend.condition.parsing.records.peabody.peabody_asset_condition import (
PeabodyAssetCondition, PeabodyAssetCondition,
@ -145,23 +147,28 @@ def peabody_assets_xlsx_bytes() -> BytesIO:
@pytest.fixture @pytest.fixture
def location_ref_to_uprn_map() -> Dict[str, int]: def prop_ref_uprn_csv_file() -> str:
return { csv_content = """reference,out_uprn
"B000RAND": 1, B000RAND,1
"B000BLOCK": 2, B000BLOCK,2
"B000FAKE": 3, B000FAKE,3
"B000MIS": 4, B000MIS,4
} """
with NamedTemporaryFile(mode="w+", delete=False, suffix=".csv") as tmp:
tmp.write(csv_content)
tmp.flush()
return tmp.name
def test_peabody_parser_parses_conditions( def test_peabody_parser_parses_conditions(
peabody_assets_xlsx_bytes, location_ref_to_uprn_map peabody_assets_xlsx_bytes, prop_ref_uprn_csv_file
): ):
# arrange # arrange
parser = PeabodyParser() uprn_lookup = UprnLookupLocal(csv_path=prop_ref_uprn_csv_file)
parser = PeabodyParser(uprn_lookup=uprn_lookup)
# act # act
result: Any = parser.parse(peabody_assets_xlsx_bytes, location_ref_to_uprn_map) result: Any = parser.parse(peabody_assets_xlsx_bytes)
# assert # assert
assert len(result) == 3 assert len(result) == 3

View file

@ -1,22 +0,0 @@
import pytest
from backend.condition.file_type import FileType, detect_file_type
def test_detects_lbwf_file_type():
# arrange
file_path_str = "uploads/lbwf/Exaple Asset Data.xlsx"
expected_file_type = FileType.LBWF
# act
actual_file_type: FileType = detect_file_type(file_path_str)
# assert
assert expected_file_type == actual_file_type
def test_unknown_filepath_raises_value_error():
# arrange
file_path_str = "unknown/Example Asset Data.xlsx"
# act + assert
with pytest.raises(ValueError):
detect_file_type(file_path_str)

View file

@ -978,13 +978,15 @@ async def model_engine(body: PlanTriggerRequest):
recommendations_scoring_data.extend(p.recommendations_scoring_data) recommendations_scoring_data.extend(p.recommendations_scoring_data)
logger.info("Preparing data for scoring in sap change api") logger.info("Preparing data for scoring in sap change api")
recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data).drop( recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data)
columns=[ if not recommendations_scoring_data.empty:
"rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending", recommendations_scoring_data = recommendations_scoring_data.drop(
"carbon_ending" columns=[
] "rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
) "carbon_ending"
# Temp putting this here ]
)
# TODO: Temp putting this here
recommendations_scoring_data["is_post_sap10_ending"] = True recommendations_scoring_data["is_post_sap10_ending"] = True
all_predictions = await model_api.async_paginated_predictions( all_predictions = await model_api.async_paginated_predictions(

View file

@ -313,4 +313,15 @@ class ModelApi:
logger.error(f"Batch {chunk}-{chunk + batch_size} failed (Attempt {attempts}): {e}") logger.error(f"Batch {chunk}-{chunk + batch_size} failed (Attempt {attempts}): {e}")
await asyncio.sleep(2 ** attempts) # exponential backoff await asyncio.sleep(2 ** attempts) # exponential backoff
await self.close_aiohttp_session() await self.close_aiohttp_session()
# Ensure stable output structure for the datagrame to be utilised by other functions downstream
for k in all_predictions.keys():
if all_predictions[k].empty:
col_template = ['id', 'predictions', 'property_id', 'recommendation_id', 'phase'] if (
extract_ids) else ['id', 'predictions']
all_predictions[k] = pd.DataFrame(
columns=col_template
)
return all_predictions return all_predictions

View file

@ -0,0 +1,102 @@
# Retrofit Property Data Onboarding
This repository contains an ETL pipeline for transforming raw retrofit property data from external source systems (
currently Parity) into a standardised internal format, compatible for both address2uprn and engine.
The pipeline is designed to:
- Run as an AWS Lambda triggered by SQS
- Read raw CSV/XLSX files from S3
- Perform rule-based mappings
- Infer as built property attributes, assumed based on age
- Output a processed csv, back to s3 to be consumed by address2uprn
### Structure
SQS → Lambda handler → OnboarderFactory → System-specific Onboarder → Mapping → CSV to S3
Each source system implements its own **Onboarder**, while sharing a common base and mapping process.
---
### Repository Structure
onboarders/
├── `handler.py` # Lambda entrypoint \
├── `factory.py` # Onboarder factory \
├── `base.py` # Shared onboarding base class \
├── `parity.py` # Parity-specific transformation logic \
├── `mappings/` \
│ └── `parity/` # Parity domain mappings & classifiers \
│ ├── `age_band.py` \
│ ├── `property_type.py` \
│ ├── `built_form.py` \
│ ├── `walls.py` \
│ ├── `roof.py` \
│ ├── `floor.py` \
│ ├── `glazing.py` \
│ ├── `heating.py` \
│ ├── `as_built_wall_classifiers.py` \
│ ├── `as_built_roof_classifiers.py` \
│ └── `as_built_floor_classifiers.py` \
├── `tests/` \
├── `requirements.txt` \
└── `README.md`
---
### Lambda Entry Point (`handler.py`)
The Lambda handler:
1. Consumes SQS queue
2. Validates the payload
3. Instantiates the correct onboarder via `OnboarderFactory`
4. Runs the transformation
5. Writes the transformed CSV back to S3
### Expected Event Payload
```json
{
"s3_uri": "s3://bucket/path/to/input.xlsx",
"system": "parity",
"format": "xlsx",
"sheet_name": "Sustainability"
}
```
### Onboarder Base `(base.py)`
OnboarderBase provides shared functionality across all systems.
*Responsibilities*
- Reading CSV/XLSX files from S3
- Writing transformed CSVs to S3
- Defining canonical output column names
- Providing validation helpers
- Common output - for the moment, onboards will be expected to return a csv
### Parity Onboarder `(parity.py)`
`ParityOnboarder` contains all Parity-specific transformation logic.
Responsibilities*
- Map raw Parity fields to internal EPC-aligned enums
- Infer “as-built” constructions using age bands when insulation data is missing
- Resolve energy efficiency ratings deterministically
- Normalise output into a fixed schema
The `transform()` method orchestrates the transformation process.
### TODOs
- In `backend/onboarders/mappings/parity/glazing.py` we currently map the partiy descriptions
to duples of descriptions and efficiency ratings. This is okay for the moment but we may consider
using a data class, just given how error-prone this is.
- This is also true for heating mappings in `backend/onboarders/mappings/parity/heating.py`
- Implement a AI-enabled version, to replace the standardised asset list

View file

View file

@ -0,0 +1,84 @@
import pandas as pd
from utils.s3 import read_from_s3, read_excel_from_s3, save_csv_to_s3
class OnboarderBase:
# Input dataset to be transformed
data: pd.DataFrame | None = None
bucket_name = None
input_file_name = None
output_file_name = None
# Description columns
landlord_wall_construction: str = "landlord_wall_construction"
landlord_roof_construction: str = "landlord_roof_construction"
landlord_floor_construction: str = "landlord_floor_construction"
landlord_windows_type: str = "landlord_windows_type"
landlord_heating_construction: str = "landlord_heating_construction"
landlord_fuel_type: str = "landlord_fuel_type"
landlord_heating_controls: str = "landlord_heating_controls"
landlord_hot_water_system: str = "landlord_hot_water_system"
# Efficiency columns
landlord_roof_efficiency: str = "landlord_roof_efficiency"
landlord_windows_efficiency: str = "landlord_windows_efficiency"
landlord_heating_controls_efficiency: str = "landlord_heating_controls_efficiency"
landlord_heating_efficiency: str = "landlord_heating_efficiency"
landlord_hot_water_efficiency: str = "landlord_hot_water_efficiency"
landlord_wall_efficiency: str = "landlord_wall_efficiency"
# Additional windows features
landlord_multi_glaze_proportion: str = "landlord_multi_glaze_proportion"
landlord_glazed_type: str = "landlord_glazed_type"
landlord_glazed_area: str = "landlord_glazed_area"
# Additional roof features
landlord_has_sloping_ceiling: str = "landlord_has_sloping_ceiling"
# Shape, dimensions, age
landlord_total_floor_area_m2: str = "landlord_total_floor_area_m2"
landlord_construction_age_band: str = "landlord_construction_age_band"
landlord_property_type: str = "landlord_property_type"
landlord_built_form: str = "landlord_built_form"
def read_s3(self, file_format, **kwargs):
if self.input_file_name is None or self.bucket_name is None:
raise ValueError("Bucket name and input file name must be set before reading from S3.")
if file_format == "xlsx":
self.data = read_excel_from_s3(
bucket_name=self.bucket_name,
file_key=self.input_file_name,
sheet_name=kwargs.get("sheet_name"),
header_row=kwargs.get("header_row", 0)
)
else:
self.data = read_from_s3(bucket_name=self.bucket_name, s3_file_name=self.input_file_name)
def write(self):
if self.data is None:
raise ValueError("No data to write. Please run transform() before writing.")
if self.bucket_name is None or self.output_file_name is None:
raise ValueError("Bucket name and output file name must be set before writing to S3.")
# Store file as csv - will store in the same route location as the input file
save_csv_to_s3(dataframe=self.data, bucket_name=self.bucket_name, file_name=self.output_file_name)
@staticmethod
def assert_nulls_only_from_source_nulls(data: pd.DataFrame, original_column: str, mapped_column: str) -> bool:
# We only allow nulls if the original value was null
null_vals = data[pd.isnull(data[mapped_column])]
if null_vals.empty:
return True
# We make sure all original values were null
assert pd.isnull(null_vals[original_column]).all(), (
f"Some values in {mapped_column} were not mapped, but original values were not null"
)
@staticmethod
def assert_no_nulls(data: pd.DataFrame, column: str):
assert pd.isnull(data[column]).sum() == 0, f"column {column} contains null values, but should not"
def map_construction_age_band(self):
raise NotImplementedError(
"This method should be implemented by subclasses to map construction age bands to descriptions"
)

View file

@ -0,0 +1,10 @@
from onboarders.parity import ParityOnboarder
class OnboarderFactory:
@staticmethod
def create_onboarder(onboarder_type, **kwargs):
if onboarder_type == "parity":
return ParityOnboarder(**kwargs)
raise ValueError(f"Unknown onboarder type: {onboarder_type}")

View file

@ -0,0 +1,50 @@
import json
from pydantic import BaseModel, Field
from typing import Optional, Literal
from onboarders.factory import OnboarderFactory
from utils.logger import setup_logger
logger = setup_logger()
class OnboardingEvent(BaseModel):
s3_uri: str = Field(..., description="S3 URI of the raw ARA input file")
system: Literal["parity", "generic"] = Field(..., description="Onboarding system identifier")
format: Literal["csv", "xlsx"]
sheet_name: Optional[str] = None
def handler(event, context):
"""
Lambda handler that triggers the model engine for each SQS message.
"""
for record in event.get("Records", []):
try:
event_body = json.loads(record["body"])
# Sample input data
# event_body = {
# "s3_uri": "s3://retrofit-data-dev/ara_raw_inputs/peabody/2025_11_11 - Peabody - Data Extracts for "
# "Domna.xlsx",
# "system": "parity",
# "format": "xlsx",
# "sheet_name": "Sustainability"
# }
logger.info("Processing record with body: %s", event_body)
validated_event = OnboardingEvent(**event_body)
onboarder = OnboarderFactory.create_onboarder(
validated_event.system,
fileuri=validated_event.s3_uri,
format=validated_event.format,
sheet_name=validated_event.sheet_name,
file_format=validated_event.format
)
logger.info("Transforming data")
onboarder.transform()
logger.info(f"Writing data to {onboarder.output_file_name}, bucket: {onboarder.bucket_name}")
onboarder.write()
except Exception as e:
logger.error(f"Failed to process record: {e}")

View file

@ -1,14 +0,0 @@
party_map = {
"Before 1900": 'England and Wales: before 1900',
"1900-1929": 'England and Wales: 1900-1929',
"1930-1949": 'England and Wales: 1930-1949',
"1950-1966": 'England and Wales: 1950-1966',
"1967-1975": 'England and Wales: 1967-1975',
"1976-1982": 'England and Wales: 1976-1982',
"1983-1990": 'England and Wales: 1983-1990',
"1991-1995": 'England and Wales: 1991-1995',
"1996-2002": 'England and Wales: 1996-2002',
"2003-2006": 'England and Wales: 2003-2006',
"2007-2011": 'England and Wales: 2007-2011',
"2012 onwards": 'England and Wales: 2012-2021',
}

View file

@ -1,15 +0,0 @@
parity_map = {
"MidTerrace": "Mid-Terrace",
"EndTerrace": "End-Terrace",
"Detached": "Detached",
"SemiDetached": "Semi-Detached",
"EnclosedMidTerrace": "Enclosed Mid-Terrace",
"EnclosedEndTerrace": "Enclosed End-Terrace",
}
# MidTerrace 41462
# EndTerrace 20910
# Detached 16875
# SemiDetached 14725
# EnclosedMidTerrace 3176
# EnclosedEndTerrace 2393

View file

@ -0,0 +1,19 @@
from datatypes.epc.construction_age_band import EpcConstructionAgeBand
parity_map = {
"Before 1900": EpcConstructionAgeBand.before_1900,
"1900-1929": EpcConstructionAgeBand.from_1900_to_1929,
"1930-1949": EpcConstructionAgeBand.from_1930_to_1949,
"1950-1966": EpcConstructionAgeBand.from_1950_to_1966,
"1967-1975": EpcConstructionAgeBand.from_1967_to_1975,
"1976-1982": EpcConstructionAgeBand.from_1976_to_1982,
"1983-1990": EpcConstructionAgeBand.from_1983_to_1990,
"1991-1995": EpcConstructionAgeBand.from_1991_to_1995,
"1996-2002": EpcConstructionAgeBand.from_1996_to_2002,
"2003-2006": EpcConstructionAgeBand.from_2003_to_2006,
"2007-2011": EpcConstructionAgeBand.from_2007_to_2011,
"2012 onwards": EpcConstructionAgeBand.from_2012_onwards,
# Newer age bands, under SAP10
"2012-2022": EpcConstructionAgeBand.from_2012_to_2022,
"2023 onwards": EpcConstructionAgeBand.from_2023_onwards,
}

View file

@ -0,0 +1,60 @@
from datatypes.epc.construction_age_band import EpcConstructionAgeBand
from datatypes.epc.floor import EpcFloorDescriptions
def unknown_floor_as_built(age_band: EpcConstructionAgeBand) -> EpcFloorDescriptions:
year = age_band.start_year()
if year >= 2003:
return EpcFloorDescriptions.solid_insulated_assumed
if year >= 1996:
return EpcFloorDescriptions.solid_limited_insulation_assumed
if year >= 1930:
return EpcFloorDescriptions.solid_no_insulation_assumed
return EpcFloorDescriptions.suspended_no_insulation_assumed
def unknown_floor_retrofitted(age_band: EpcConstructionAgeBand) -> EpcFloorDescriptions:
year = age_band.start_year()
if year >= 1930:
return EpcFloorDescriptions.solid_insulated
return EpcFloorDescriptions.suspended_insulated
def map_solid_floor_as_built(age_band: EpcConstructionAgeBand) -> EpcFloorDescriptions:
year = age_band.start_year()
if year >= 2003:
return EpcFloorDescriptions.solid_insulated_assumed
if year >= 1996:
return EpcFloorDescriptions.solid_limited_insulation_assumed
return EpcFloorDescriptions.solid_no_insulation_assumed
def map_suspended_floor_as_built(age_band: EpcConstructionAgeBand) -> EpcFloorDescriptions:
year = age_band.start_year()
if year >= 2003:
return EpcFloorDescriptions.suspended_insulated_assumed
if year >= 1996:
return EpcFloorDescriptions.suspended_limited_insulation_assumed
return EpcFloorDescriptions.suspended_no_insulation_assumed
as_built_floor_classifiers = {
"Solid": map_solid_floor_as_built,
"SuspendedTimber": map_suspended_floor_as_built,
"SuspendedNotTimber": map_suspended_floor_as_built,
}
unknown_as_built_floor_classifiers = {
"RetroFitted": unknown_floor_retrofitted,
"AsBuilt": unknown_floor_as_built,
"Unknown": unknown_floor_as_built,
}

View file

@ -0,0 +1,56 @@
from datatypes.epc.roof import EpcRoofDescriptions
from datatypes.epc.construction_age_band import EpcConstructionAgeBand
def map_flat_roof(age_band: EpcConstructionAgeBand) -> EpcRoofDescriptions:
"""
For a flat, as built roof, these are the breakdowns:
2023 onwards Flat, insulated
20032022 Flat, insulated
19832002 Flat, insulated
19761982 Flat, limited insulation
19671975 Flat, limited insulation
19501966 and earlier Flat, no insulation
:param age_band: Input age band
:return: EpcRoofDescriptions
"""
year = age_band.start_year()
if year >= 1983:
return EpcRoofDescriptions.flat_insulated
if year >= 1967:
return EpcRoofDescriptions.flat_limited_insulation
return EpcRoofDescriptions.flat_no_insulation
def map_sloping_ceiling_roof(age_band: EpcConstructionAgeBand) -> EpcRoofDescriptions:
"""
For a sloping ceiling, as built roof, these are the breakdowns:
2023 onwards Sloping pitched, insulated
20032022 Sloping pitched, insulated
19832002 Sloping pitched, insulated
19761982 Sloping pitched, limited insulation
19671975 and earlier Sloping pitched, no insulation
:param age_band: Input age band
:return: EpcRoofDescriptions
"""
year = age_band.start_year()
if year >= 1983:
return EpcRoofDescriptions.sloping_pitched_insulated
if year >= 1976:
return EpcRoofDescriptions.sloping_pitched_limited_insulation
return EpcRoofDescriptions.sloping_pitched_no_insulation
as_built_roof_classifiers = {
# Only need to apply this to flat and sloping ceiling roofs
"Flat": map_flat_roof,
"PitchedWithSlopingCeiling": map_sloping_ceiling_roof,
}

View file

@ -0,0 +1,113 @@
from datatypes.epc.construction_age_band import EpcConstructionAgeBand
from datatypes.epc.walls import EpcWallDescriptions
def map_cavity_wall_insulation(age_band: EpcConstructionAgeBand):
if age_band.start_year() < 1976:
return EpcWallDescriptions.cavity_no_insulation_assumed
if age_band == EpcConstructionAgeBand.from_1976_to_1982:
return EpcWallDescriptions.cavity_partial_insulated_assumed
if age_band in EpcConstructionAgeBand.from_year_onwards(1983):
return EpcWallDescriptions.cavity_insulated_assumed
raise NotImplementedError(f"Age band {age_band} not handled for cavity wall as built insulation mapping")
def map_solid_wall_insulation(age_band: EpcConstructionAgeBand):
if age_band.start_year() < 1976:
return EpcWallDescriptions.solid_brick_no_insulation_assumed
if age_band == EpcConstructionAgeBand.from_1976_to_1982:
return EpcWallDescriptions.solid_brick_partial_insulated_assumed
if age_band in EpcConstructionAgeBand.from_year_onwards(1983):
return EpcWallDescriptions.solid_brick_insulated_assumed
raise NotImplementedError(
f"Age band {age_band.value} not handled for solid wall insulation mapping"
)
def map_timber_frame_wall_insulation(age_band: EpcConstructionAgeBand):
if age_band.start_year() < 1950:
return EpcWallDescriptions.timber_frame_no_insulation_assumed
if age_band.start_year() < 1976:
return EpcWallDescriptions.timber_frame_partial_insulated_assumed
if age_band in EpcConstructionAgeBand.from_year_onwards(1976):
return EpcWallDescriptions.timber_frame_insulated_assumed
raise NotImplementedError(
f"Age band {age_band.value} not handled for timber frame wall insulation mapping"
)
def map_system_build_wall_insulation(age_band: EpcConstructionAgeBand):
if age_band.start_year() < 1976:
return EpcWallDescriptions.system_no_insulation_assumed
if age_band == EpcConstructionAgeBand.from_1976_to_1982:
return EpcWallDescriptions.system_partial_insulated_assumed
if age_band in EpcConstructionAgeBand.from_year_onwards(1983):
return EpcWallDescriptions.system_insulated_assumed
raise NotImplementedError(
f"Age band {age_band.value} not handled for system build wall insulation mapping"
)
def map_granite_wall_insulation(age_band: EpcConstructionAgeBand):
if age_band.start_year() < 1976:
return EpcWallDescriptions.granite_whinstone_no_insulation_assumed
if age_band == EpcConstructionAgeBand.from_1976_to_1982:
return EpcWallDescriptions.granite_whinstone_partial_insulated_assumed
if age_band in EpcConstructionAgeBand.from_year_onwards(1983):
return EpcWallDescriptions.granite_whinestone_insulated_assumed
raise NotImplementedError(
f"Age band {age_band.value} not handled for granite wall insulation mapping"
)
def map_sandstone_wall_insulation(age_band: EpcConstructionAgeBand):
if age_band.start_year() < 1976:
return EpcWallDescriptions.sandstone_limestone_no_insulation_assumed
if age_band == EpcConstructionAgeBand.from_1976_to_1982:
return EpcWallDescriptions.sandstone_limestone_partial_insulated_assumed
if age_band in EpcConstructionAgeBand.from_year_onwards(1983):
return EpcWallDescriptions.sandstone_limestone_insulated_assumed
raise NotImplementedError(
f"Age band {age_band.value} not handled for sandstone wall insulation mapping"
)
def map_cob_wall_insulation(age_band: EpcConstructionAgeBand):
if age_band.start_year() < 1983:
return EpcWallDescriptions.cob_as_built_average
if age_band in EpcConstructionAgeBand.from_year_onwards(1983):
return EpcWallDescriptions.cob_as_built_good
raise NotImplementedError(
f"Age band {age_band.value} not handled for cob wall insulation mapping"
)
as_built_wall_classifiers = {
"Cavity": map_cavity_wall_insulation,
"Solid Brick": map_solid_wall_insulation,
"Timber Frame": map_timber_frame_wall_insulation,
"System": map_system_build_wall_insulation,
"Granite": map_granite_wall_insulation,
"Sandstone": map_sandstone_wall_insulation,
"Cob": map_cob_wall_insulation,
}

View file

@ -0,0 +1,10 @@
from datatypes.epc.property_type_built_form import BuiltForm
parity_map = {
"MidTerrace": BuiltForm.mid_terrace,
"EndTerrace": BuiltForm.end_terrace,
"Detached": BuiltForm.detached,
"SemiDetached": BuiltForm.semi_detached,
"EnclosedMidTerrace": BuiltForm.enclosed_mid_terrace,
"EnclosedEndTerrace": BuiltForm.enclosed_end_terrace,
}

View file

@ -0,0 +1,26 @@
from numpy import nan
from datatypes.epc.floor import EpcFloorDescriptions
floor_map = {
# Solid floor
('Solid', 'AsBuilt'): None, # Mapped
('Solid', 'Unknown'): None, # Mapped
('Solid', nan): None, # Mapped
('Solid', 'RetroFitted'): EpcFloorDescriptions.solid_insulated,
# Suspended floor
('SuspendedTimber', nan): None, # Mapped suspended_floor_as_built
('SuspendedTimber', 'AsBuilt'): None, # Mapped suspended_floor_as_built
('SuspendedTimber', 'RetroFitted'): EpcFloorDescriptions.suspended_insulated,
('SuspendedTimber', 'Unknown'): None, # Mapped suspended_floor_as_built
('SuspendedNotTimber', 'RetroFitted'): EpcFloorDescriptions.suspended_insulated,
('SuspendedNotTimber', nan): None, # Mapped suspended_floor_as_built
('SuspendedNotTimber', 'Unknown'): None, # Mapped suspended_floor_as_built
('SuspendedNotTimber', 'AsBuilt'): None, # Mapped suspended_floor_as_built
# Unknown type - mapped on age
('Unknown', 'Unknown'): None, # Mapped unknown_floor_as_built
('Unknown', 'RetroFitted'): None, # Mapped unknown_floor_retrofitted
(nan, nan): None, # No actual information!
('Unknown', 'AsBuilt'): None, # Mapped unknown_floor_as_built
}

View file

@ -0,0 +1,20 @@
from datatypes.epc.efficiency import EpcEfficiency
glazing_map = {
# (description, energy efficiency, multi_glaze_proportion, glazed_type, glazed_area
# For SAP 10 assessments, The glazed type and glazed area are not populated in the EPC API data any more
"Double 2002 or later": ("Fully double glazed", EpcEfficiency.AVERAGE, 1, None, None),
"Double before 2002": ("Fully double glazed", EpcEfficiency.POOR, 1, None, None),
"Double but age unknown": ("Fully double glazed", EpcEfficiency.POOR, 1, None, None),
"Single": ("Single glazed", EpcEfficiency.VERY_POOR, 0, None, None),
# For triple glazing, with age unknown, the performance is only average, whereas if it's a post 2022
# installation, it's classed as high performance glazing with good efficiency. We'll need to be considerate as to
# how we make updates to the windows data.
# Triple known data is high performance glazing with Good efficiency (at least)
"Triple": ("Fully triple glazed", EpcEfficiency.AVERAGE, 1, None, None),
# This is also classed as high performance glazing
"DoubleKnownData": ("High performance glazing", EpcEfficiency.GOOD, 1, None, None),
# Under SAP 10, secondary glazing is classed as poor efficiency (whereas under SAP 2012 it was generally good)
"Secondary": ("Full secondary glazing", EpcEfficiency.POOR, 1, None, None),
"TripleKnownData": ("High performance glazing", EpcEfficiency.GOOD, 1, None, None),
}

View file

@ -0,0 +1,330 @@
from datatypes.epc.main_heating import EpcHeatingSystems
from datatypes.epc.efficiency import EpcEfficiency
from datatypes.epc.fuel import EpcFuel
from datatypes.epc.heating_controls import EpcHeatingControls
from datatypes.epc.hotwater import EpcHotWaterSystems
heating_map = {
# 0
('Boilers', 'A', 'ElectricityNotCommunity', 'Optimal'): (
EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 1
('Boilers', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community,
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 2
('Boilers', 'A', 'ElectricityNotCommunity', 'Top Spec'): (
EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community,
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 3
('Boilers', 'A', 'LPGNotCommunity', 'Optimal'): (
EpcHeatingSystems.boiler_and_radiators_lpg, EpcEfficiency.POOR, EpcFuel.lpg_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 4
('Boilers', 'A', 'MainsGasNotCommunity', 'Optimal'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.VERY_GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 5
('Boilers', 'A', 'MainsGasNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.VERY_GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 6
('Boilers', 'A', 'MainsGasNotCommunity', 'Top Spec'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.VERY_GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 7
('Boilers', 'B', 'MainsGasNotCommunity', 'Optimal'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 8
('Boilers', 'B', 'MainsGasNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 9
('Boilers', 'B', 'MainsGasNotCommunity', 'Top Spec'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 10
('Boilers', 'C', 'ElectricityNotCommunity', 'Optimal'): (
EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 11
('Boilers', 'C', 'ElectricityNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community,
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 12
('Boilers', 'C', 'ElectricityNotCommunity', 'Top Spec'): (
EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community,
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 13
('Boilers', 'C', 'LPGNotCommunity', 'Optimal'): (
EpcHeatingSystems.boiler_and_radiators_lpg, EpcEfficiency.POOR, EpcFuel.lpg_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 14
('Boilers', 'C', 'LPGNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.boiler_and_radiators_lpg, EpcEfficiency.POOR, EpcFuel.lpg_not_community,
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 15
('Boilers', 'C', 'MainsGasNotCommunity', 'Optimal'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 16
('Boilers', 'C', 'MainsGasNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 17
('Boilers', 'C', 'MainsGasNotCommunity', 'Top Spec'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
('Boilers', 'C', 'OilNotCommunity', 'Optimal'): (
EpcHeatingSystems.boiler_radiators_oil, EpcEfficiency.AVERAGE, EpcFuel.oil_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 19
('Boilers', 'C', 'OilNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.boiler_radiators_oil, EpcEfficiency.AVERAGE, EpcFuel.oil_not_community,
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 20
('Boilers', 'C', 'OilNotCommunity', 'Top Spec'): (
EpcHeatingSystems.boiler_radiators_oil, EpcEfficiency.AVERAGE, EpcFuel.oil_not_community,
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 21
('Boilers', 'D', 'MainsGasNotCommunity', 'Optimal'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 22
('Boilers', 'D', 'MainsGasNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 23
('Boilers', 'D', 'MainsGasNotCommunity', 'Top Spec'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 24
('Boilers', 'E', 'ElectricityNotCommunity', 'Optimal'): (
EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 25
('Boilers', 'E', 'MainsGasNotCommunity', 'Optimal'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 26
('Boilers', 'E', 'MainsGasNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 27
('Boilers', 'E', 'MainsGasNotCommunity', 'Top Spec'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 28
('Boilers', 'E', 'OilNotCommunity', 'Optimal'): (
EpcHeatingSystems.boiler_radiators_oil, EpcEfficiency.AVERAGE, EpcFuel.oil_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 29
('Boilers', 'E', 'OilNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.boiler_radiators_oil, EpcEfficiency.AVERAGE, EpcFuel.oil_not_community,
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 30
('Boilers', 'F', 'MainsGasNotCommunity', 'Optimal'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 31
('Boilers', 'F', 'MainsGasNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 32
('Boilers', 'F', 'MainsGasNotCommunity', 'Top Spec'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 33
('Boilers', 'G', 'MainsGasNotCommunity', 'Optimal'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 34
('Boilers', 'G', 'MainsGasNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 35
('Boilers', 'G', 'MainsGasNotCommunity', 'Top Spec'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.AVERAGE, EpcFuel.mains_gas_not_community,
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 36
('Electric underfloor', 'A', 'ElectricityNotCommunity', 'Optimal'): (
EpcHeatingSystems.electric_underfloor_heating, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE
),
# 37
('Electric underfloor', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.electric_underfloor_heating, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community,
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE,
EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE
),
# 38
('Electric underfloor', 'A', 'ElectricityNotCommunity', 'Top Spec'): (
EpcHeatingSystems.electric_underfloor_heating, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community,
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD,
EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE
),
# 39
('Heat pumps (warm air)', 'A', 'ElectricityNotCommunity', 'Optimal'): (
EpcHeatingSystems.air_to_air_ashp, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 40
('Heat pumps (warm air)', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.air_to_air_ashp, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community,
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 41
('Heat pumps (wet)', 'A', 'ElectricityNotCommunity', 'Optimal'): (
EpcHeatingSystems.ashp_radiators_electric, EpcEfficiency.GOOD, EpcFuel.electricity_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 42
('Heat pumps (wet)', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.ashp_radiators_electric, EpcEfficiency.GOOD, EpcFuel.electricity_not_community,
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 43
('Heat pumps (wet)', 'A', 'ElectricityNotCommunity', 'Top Spec'): (
EpcHeatingSystems.ashp_radiators_electric, EpcEfficiency.GOOD, EpcFuel.electricity_not_community,
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 44
('Room heaters', 'A', 'ElectricityNotCommunity', 'Optimal'): (
EpcHeatingSystems.room_heaters_electric, EpcEfficiency.POOR, EpcFuel.electricity_not_community,
EpcHeatingControls.programmer_and_appliance_thermostats, EpcEfficiency.GOOD,
EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE
),
# 45
('Room heaters', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.room_heaters_electric, EpcEfficiency.POOR, EpcFuel.electricity_not_community,
EpcHeatingControls.appliance_thermostats, EpcEfficiency.GOOD,
EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE
),
# 46
('Room heaters', 'C', 'MainsGasNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.room_heaters_mains_gas, EpcEfficiency.AVERAGE, EpcFuel.mains_gas_not_community,
EpcHeatingControls.appliance_thermostats, EpcEfficiency.GOOD,
EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE
),
# 47 - water done from here
('Room heaters', 'F', 'MainsGasNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.room_heaters_mains_gas, EpcEfficiency.POOR, EpcFuel.mains_gas_not_community,
EpcHeatingControls.appliance_thermostats, EpcEfficiency.GOOD,
EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE
),
('Room heaters', 'G', 'MainsGasNotCommunity', 'Optimal'): (
EpcHeatingSystems.room_heaters_mains_gas, EpcEfficiency.POOR, EpcFuel.mains_gas_not_community,
EpcHeatingControls.programmer_and_appliance_thermostats, EpcEfficiency.GOOD,
EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE
),
('Room heaters', 'G', 'MainsGasNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.room_heaters_mains_gas, EpcEfficiency.POOR, EpcFuel.mains_gas_not_community,
EpcHeatingControls.appliance_thermostats, EpcEfficiency.GOOD,
EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE
),
('Room heaters', 'G', 'SmokelessCoal', 'Sub Optimal'): (
EpcHeatingSystems.room_heaters_smokeless_fuel, EpcEfficiency.VERY_POOR, EpcFuel.smokeless_coal,
EpcHeatingControls.appliance_thermostats, EpcEfficiency.GOOD,
EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE
),
('Storage heaters', 'A', 'ElectricityNotCommunity', 'Optimal'): (
EpcHeatingSystems.electric_storage_heaters, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community,
EpcHeatingControls.automatic_charge_control, EpcEfficiency.AVERAGE,
EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE
),
('Storage heaters', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.electric_storage_heaters, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community,
EpcHeatingControls.manual_charge_control, EpcEfficiency.POOR,
EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE
),
('Warm Air (not heat pump)', 'G', 'ElectricityNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.warm_air_electricaire, EpcEfficiency.GOOD, EpcFuel.electricity_not_community,
EpcHeatingControls.programmer_and_atleast_two_room_thermostats, EpcEfficiency.GOOD,
EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE
),
('Warm Air (not heat pump)', 'G', 'MainsGasNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.warm_air_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.programmer_and_atleast_two_room_thermostats, EpcEfficiency.GOOD,
EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE
)
}

View file

@ -0,0 +1,8 @@
from datatypes.epc.property_type_built_form import PropertyType
parity_map = {
"Flat": PropertyType.flat,
"Maisonette": PropertyType.maisonette,
"Bungalow": PropertyType.bungalow,
"House": PropertyType.house,
}

View file

@ -0,0 +1,461 @@
import pandas as pd
from numpy import nan
from typing import Union, Callable
from collections.abc import Mapping
from datatypes.epc.roof import EpcRoofDescriptions
from datatypes.epc.efficiency import EpcEfficiency
from datatypes.epc.construction_age_band import EpcConstructionAgeBand
roof_map = {
# Dwelling above
('AnotherDwellingAbove', 'Another Dwelling Above'): EpcRoofDescriptions.another_dwelling_above,
('SameDwellingAbove', 'Same Dwelling Above'): EpcRoofDescriptions.another_dwelling_above,
# Pitched, normal loft access, with a loft thickness
('PitchedNormalLoftAccess', 'mm25'): EpcRoofDescriptions.loft_25mm_insulation,
('PitchedNormalLoftAccess', 'mm50'): EpcRoofDescriptions.loft_50mm_insulation,
('PitchedNormalLoftAccess', 'mm75'): EpcRoofDescriptions.loft_75mm_insulation,
('PitchedNormalLoftAccess', 'mm100'): EpcRoofDescriptions.loft_100mm_insulation,
('PitchedNormalLoftAccess', 'mm150'): EpcRoofDescriptions.loft_150mm_insulation,
('PitchedNormalLoftAccess', 'mm200'): EpcRoofDescriptions.loft_200mm_insulation,
('PitchedNormalLoftAccess', 'mm250'): EpcRoofDescriptions.loft_250mm_insulation,
('PitchedNormalLoftAccess', 'mm270'): EpcRoofDescriptions.loft_270mm_insulation,
('PitchedNormalLoftAccess', 'mm300'): EpcRoofDescriptions.loft_300mm_insulation,
('PitchedNormalLoftAccess', 'mm350'): EpcRoofDescriptions.loft_350mm_insulation,
('PitchedNormalLoftAccess', 'mm400'): EpcRoofDescriptions.loft_400mm_plus_insulation,
# Pitched, no loft access, with a loft thickness
('PitchedNormalNoLoftAccess', 'mm25'): EpcRoofDescriptions.loft_25mm_insulation,
('PitchedNormalNoLoftAccess', 'mm50'): EpcRoofDescriptions.loft_50mm_insulation,
('PitchedNormalNoLoftAccess', 'mm75'): EpcRoofDescriptions.loft_75mm_insulation,
('PitchedNormalNoLoftAccess', 'mm100'): EpcRoofDescriptions.loft_100mm_insulation,
('PitchedNormalNoLoftAccess', 'mm150'): EpcRoofDescriptions.loft_150mm_insulation,
('PitchedNormalNoLoftAccess', 'mm200'): EpcRoofDescriptions.loft_200mm_insulation,
('PitchedNormalNoLoftAccess', 'mm250'): EpcRoofDescriptions.loft_250mm_insulation,
('PitchedNormalNoLoftAccess', 'mm270'): EpcRoofDescriptions.loft_270mm_insulation,
('PitchedNormalNoLoftAccess', 'mm300'): EpcRoofDescriptions.loft_300mm_insulation,
('PitchedNormalNoLoftAccess', 'mm350'): EpcRoofDescriptions.loft_350mm_insulation,
('PitchedNormalNoLoftAccess', 'mm400'): EpcRoofDescriptions.loft_400mm_plus_insulation,
# All pitched options with asbuilt or unknown got to EpcRoofDescriptions.pitched_insulated_assumed
# With access
('PitchedNormalLoftAccess', nan): EpcRoofDescriptions.pitched_insulated_assumed,
('PitchedNormalLoftAccess', 'AsBuilt'): EpcRoofDescriptions.pitched_insulated_assumed,
('PitchedNormalLoftAccess', 'Unknown'): EpcRoofDescriptions.pitched_insulated_assumed,
# No access
('PitchedNormalNoLoftAccess', nan): EpcRoofDescriptions.pitched_insulated_assumed,
('PitchedNormalNoLoftAccess', 'AsBuilt'): EpcRoofDescriptions.pitched_insulated_assumed,
('PitchedNormalNoLoftAccess', 'Unknown'): EpcRoofDescriptions.pitched_insulated_assumed,
# Flat
('Flat', 'NoInsulation'): EpcRoofDescriptions.flat_no_insulation,
# Flat - limited insulation
('Flat', '12mm'): EpcRoofDescriptions.flat_limited_insulation,
('Flat', 'mm25'): EpcRoofDescriptions.flat_limited_insulation,
('Flat', 'mm50'): EpcRoofDescriptions.flat_limited_insulation,
# Flat insulated
('Flat', 'mm75'): EpcRoofDescriptions.flat_insulated,
('Flat', 'mm100'): EpcRoofDescriptions.flat_insulated,
('Flat', 'mm150'): EpcRoofDescriptions.flat_insulated,
('Flat', 'mm200'): EpcRoofDescriptions.flat_insulated,
('Flat', 'mm250'): EpcRoofDescriptions.flat_insulated,
('Flat', 'mm300'): EpcRoofDescriptions.flat_insulated,
('Flat', 'mm350'): EpcRoofDescriptions.flat_insulated,
('Flat', 'mm400'): EpcRoofDescriptions.flat_insulated,
# Flat - as built or unknown
('Flat', 'AsBuilt'): None, # To be classified
('Flat', nan): None, # To be classified
('Flat', 'Unknown'): None, # To be classified
# 12mm = very poor & has limited insulation description
# 25, 50 = poor & has limited insulation description
# 75, 100, 125mm = average (Flat, insulated)
# 150, 175, 200, 225, 250mm = good (Flat, insulated)
# 270mm+ = very good (Flat, insulated)
# Thatched
('PitchedThatched', 'mm50'): EpcRoofDescriptions.thatched_with_additional_insulation,
('PitchedThatched', 'mm150'): EpcRoofDescriptions.thatched_with_additional_insulation,
('PitchedThatched', 'mm300'): EpcRoofDescriptions.thatched_with_additional_insulation,
('PitchedThatched', 'Unknown'): EpcRoofDescriptions.thatched, # efficiency classified based on age
# Sloping:
# Limited (12 very poor, 25-50 poor)
('PitchedWithSlopingCeiling', 'mm12'): EpcRoofDescriptions.sloping_pitched_limited_insulation,
('PitchedWithSlopingCeiling', 'mm25'): EpcRoofDescriptions.sloping_pitched_limited_insulation,
('PitchedWithSlopingCeiling', 'mm50'): EpcRoofDescriptions.sloping_pitched_limited_insulation,
# Insulated 75mm+ (75 - 125 average, 150 - 250 good, 270+ very good)
('PitchedWithSlopingCeiling', 'mm75'): EpcRoofDescriptions.sloping_pitched_insulated,
('PitchedWithSlopingCeiling', 'mm100'): EpcRoofDescriptions.sloping_pitched_insulated,
('PitchedWithSlopingCeiling', 'mm150'): EpcRoofDescriptions.sloping_pitched_insulated,
('PitchedWithSlopingCeiling', 'mm200'): EpcRoofDescriptions.sloping_pitched_insulated,
('PitchedWithSlopingCeiling', 'mm250'): EpcRoofDescriptions.sloping_pitched_insulated,
('PitchedWithSlopingCeiling', 'mm270'): EpcRoofDescriptions.sloping_pitched_insulated,
('PitchedWithSlopingCeiling', 'mm300'): EpcRoofDescriptions.sloping_pitched_insulated,
('PitchedWithSlopingCeiling', 'mm350'): EpcRoofDescriptions.sloping_pitched_insulated,
('PitchedWithSlopingCeiling', 'mm400'): EpcRoofDescriptions.sloping_pitched_insulated,
# As built/unknown
('PitchedWithSlopingCeiling', 'AsBuilt'): None, # To be classified
('PitchedWithSlopingCeiling', nan): None, # To be classified
('PitchedWithSlopingCeiling', 'Unknown'): None, #
}
roof_unknown_age_fallback = {
"Flat": EpcRoofDescriptions.flat_as_built_unknown,
"PitchedWithSlopingCeiling": EpcRoofDescriptions.sloping_pitched_as_built_unknown,
"PitchedThatched": EpcRoofDescriptions.thatched_as_built_unknown,
"PitchedNormalLoftAccess": EpcRoofDescriptions.loft_as_built_unknown,
"PitchedNormalNoLoftAccess": EpcRoofDescriptions.loft_as_built_unknown,
}
RoofEfficiencyRule = Union[
EpcEfficiency,
Callable[[EpcConstructionAgeBand, int | None], EpcEfficiency],
]
def flat_insulated_efficiency_age_band(age_band: EpcConstructionAgeBand) -> EpcEfficiency:
"""
before 1900, 1900-1929, 1930-1949, 1950-1966, 1967-1975 -> Pitched, no insulation, Very Poor
1976-1982 -> Pitched, limited insulation, Poor
1983-1990, to 1996-2002 Pitched, insulated, Average
2003 - 2006, 2012-2022 -> Pitched, insulated, Good
2023 onwards -> Pitched, insulated, Very Good
:param age_band: EpcConstructionAgeBand
:return: EpcEfficiency
"""
start_year = age_band.start_year()
if start_year >= 2023:
return EpcEfficiency.VERY_GOOD
if start_year >= 2003:
return EpcEfficiency.GOOD
if start_year >= 1983:
return EpcEfficiency.AVERAGE
if start_year >= 1976:
return EpcEfficiency.POOR
return EpcEfficiency.VERY_POOR
def flat_insulated_efficiency_thickness(insulation_thickness: int | None) -> EpcEfficiency:
"""
12mm -> Very Poor
25mm - 50mm -> Poor
75mm - 125mm -> Pitched, insulated, average
150mm - 250mm -> good
270mm+ -> very good
:param insulation_thickness: Insulation thickness in mm
:return: EpcEfficiency
"""
if insulation_thickness is None:
raise ValueError("Insulation thickness is required for flat insulated efficiency calculation")
if insulation_thickness >= 270:
return EpcEfficiency.VERY_GOOD
if 150 <= insulation_thickness <= 250:
return EpcEfficiency.GOOD
if 75 <= insulation_thickness <= 125:
return EpcEfficiency.AVERAGE
if 25 <= insulation_thickness <= 50:
return EpcEfficiency.POOR
return EpcEfficiency.VERY_POOR
def flat_efficiency(insulation_thickness: int | None, age_band: EpcConstructionAgeBand) -> EpcEfficiency:
"""
Combines both age band and insulation thickness to determine flat roof efficiency.
:param insulation_thickness: Insulation thickness in mm
:param age_band: EpcConstructionAgeBand
:return: EpcEfficiency
"""
if insulation_thickness is not None:
return flat_insulated_efficiency_thickness(insulation_thickness)
return flat_insulated_efficiency_age_band(age_band)
def loft_insulated_efficiency(age_band: EpcConstructionAgeBand) -> EpcEfficiency:
"""
2023 onwards -> Very Good
2012-2022 -> Very Good
2007-2011 -> Very Good
2003-2006 -> Very Good
1996-2002 -> Good
1991-1995 -> Good
1983-1990 -> Average
1976-1982 -> Average
1967-1975 -> Average
1950-1966 -> Average
1930-1949 -> Average
1900-1929 -> Average
before 1900 -> Average
:param age_band: Input age band, EpcConstructionAgeBand
:return: EpcEfficiency
"""
year = age_band.start_year()
if year >= 2003:
return EpcEfficiency.VERY_GOOD
if year >= 1991:
return EpcEfficiency.GOOD
return EpcEfficiency.AVERAGE
def thatched_efficiency_age_band(age_band: EpcConstructionAgeBand) -> EpcEfficiency:
"""
Maps thatched roof efficiency based on construction age band.
:param age_band: EpcConstructionAgeBand
:return: EpcEfficiency
"""
year = age_band.start_year()
if year >= 2023:
return EpcEfficiency.VERY_GOOD
if year >= 2003:
return EpcEfficiency.GOOD
return EpcEfficiency.AVERAGE
def thatched_efficiency_thickness(insulation_thickness: int | None) -> EpcEfficiency:
"""
Maps thatched roof efficiency based on insulation thickness.
:param insulation_thickness: Insulation thickness in mm
:return: EpcEfficiency
"""
if insulation_thickness is None:
raise ValueError("Insulation thickness is required for thatched efficiency calculation")
if insulation_thickness >= 175:
return EpcEfficiency.VERY_GOOD
if insulation_thickness >= 25:
return EpcEfficiency.GOOD
return EpcEfficiency.AVERAGE
def thatched_efficiency(
insulation_thickness: int | None,
age_band: EpcConstructionAgeBand,
) -> EpcEfficiency:
"""
Combines both age band and insulation thickness to determine thatched roof efficiency.
:param insulation_thickness: Insulation thickness in mm
:param age_band: EpcConstructionAgeBand
:return: EpcEfficiency
"""
if insulation_thickness is not None:
return thatched_efficiency_thickness(insulation_thickness)
return thatched_efficiency_age_band(age_band)
def sloping_ceiling_efficiency_age_band(age_band: EpcConstructionAgeBand) -> EpcEfficiency:
"""
Maps sloping ceiling roof efficiency based on construction age band.
:param age_band: EpcConstructionAgeBand
:return: EpcEfficiency
"""
year = age_band.start_year()
if year >= 2023:
return EpcEfficiency.VERY_GOOD
if year >= 2003:
return EpcEfficiency.GOOD
if year >= 1983:
return EpcEfficiency.AVERAGE
if year >= 1976:
return EpcEfficiency.POOR
return EpcEfficiency.VERY_POOR
def sloping_ceiling_efficiency_thickness(insulation_thickness: int | None) -> EpcEfficiency:
"""
Maps sloping ceiling roof efficiency based on insulation thickness.
:param insulation_thickness: Insulation thickness in mm
:return: EpcEfficiency
"""
if insulation_thickness is None:
raise ValueError("Insulation thickness is required for sloping ceiling efficiency calculation")
if insulation_thickness >= 270:
return EpcEfficiency.VERY_GOOD
if insulation_thickness >= 150:
return EpcEfficiency.GOOD
if insulation_thickness >= 75:
return EpcEfficiency.AVERAGE
if insulation_thickness >= 25:
return EpcEfficiency.POOR
return EpcEfficiency.VERY_POOR
def sloping_ceiling_efficiency(
insulation_thickness: int | None,
age_band: EpcConstructionAgeBand,
) -> EpcEfficiency:
"""
Combines both age band and insulation thickness to determine sloping ceiling roof efficiency.
:param insulation_thickness: Insulation thickness in mm
:param age_band: EpcConstructionAgeBand
:return: EpcEfficiency
"""
if insulation_thickness is not None:
return sloping_ceiling_efficiency_thickness(insulation_thickness)
return sloping_ceiling_efficiency_age_band(age_band)
def loft_insulated_at_rafters_efficiency_thickness(insulation_thickness: int | None) -> EpcEfficiency:
"""
400mm, 350mm = very good
200-300mm = good
125-175 = average
50-100 = poor
25 and below= very poor
:return:
"""
if insulation_thickness is None:
raise ValueError("Insulation thickness is required for loft insulated at rafters efficiency calculation")
if insulation_thickness >= 350:
return EpcEfficiency.VERY_GOOD
if insulation_thickness >= 200:
return EpcEfficiency.GOOD
if insulation_thickness >= 125:
return EpcEfficiency.AVERAGE
if insulation_thickness >= 50:
return EpcEfficiency.POOR
return EpcEfficiency.VERY_POOR
def loft_insulated_at_rafters_efficiency_age_band(age_band: EpcConstructionAgeBand) -> EpcEfficiency:
"""
# 2023 onwards -> Very Good
# 2003-2006, 2012-2022 -> Good
# 1983 - 1990, 1996-2002 -> Average
# 1976-1982 -> Poor
# 1967-1975 and earlier bands -> Very Poor
:param age_band: EpcConstructionAgeBand
:return: EpcEfficiency
"""
year = age_band.start_year()
if year >= 2023:
return EpcEfficiency.VERY_GOOD
if year >= 2003:
return EpcEfficiency.GOOD
if year >= 1983:
return EpcEfficiency.AVERAGE
if year >= 1976:
return EpcEfficiency.POOR
return EpcEfficiency.VERY_POOR
def loft_insulated_at_rafters_efficiency(
insulation_thickness: int | None,
age_band: EpcConstructionAgeBand,
) -> EpcEfficiency:
"""
Combines both age band and insulation thickness to determine loft insulated at rafters roof efficiency.
:param insulation_thickness: Insulation thickness in mm
:param age_band: EpcConstructionAgeBand
:return: EpcEfficiency
"""
if insulation_thickness is not None:
return loft_insulated_at_rafters_efficiency_thickness(insulation_thickness)
return loft_insulated_at_rafters_efficiency_age_band(age_band)
ROOF_DESCRIPTION_EFFICIENCIES: Mapping[EpcRoofDescriptions, RoofEfficiencyRule] = {
# Flat roof
EpcRoofDescriptions.flat_no_insulation: EpcEfficiency.VERY_POOR,
EpcRoofDescriptions.flat_limited_insulation: flat_efficiency,
EpcRoofDescriptions.flat_insulated: flat_efficiency,
# Loft:
# value mappings
EpcRoofDescriptions.loft_12mm_insulation: EpcEfficiency.VERY_POOR,
EpcRoofDescriptions.loft_25mm_insulation: EpcEfficiency.POOR,
EpcRoofDescriptions.loft_50mm_insulation: EpcEfficiency.POOR,
EpcRoofDescriptions.loft_75mm_insulation: EpcEfficiency.AVERAGE,
EpcRoofDescriptions.loft_100mm_insulation: EpcEfficiency.AVERAGE,
EpcRoofDescriptions.loft_125mm_insulation: EpcEfficiency.AVERAGE,
EpcRoofDescriptions.loft_150mm_insulation: EpcEfficiency.GOOD,
EpcRoofDescriptions.loft_175mm_insulation: EpcEfficiency.GOOD,
EpcRoofDescriptions.loft_200mm_insulation: EpcEfficiency.GOOD,
EpcRoofDescriptions.loft_250mm_insulation: EpcEfficiency.GOOD,
EpcRoofDescriptions.loft_270mm_insulation: EpcEfficiency.VERY_GOOD,
EpcRoofDescriptions.loft_300mm_insulation: EpcEfficiency.VERY_GOOD,
EpcRoofDescriptions.loft_350mm_insulation: EpcEfficiency.VERY_GOOD,
EpcRoofDescriptions.loft_400mm_plus_insulation: EpcEfficiency.VERY_GOOD,
EpcRoofDescriptions.pitched_no_insulation: EpcEfficiency.VERY_POOR,
# function mappings
EpcRoofDescriptions.pitched_insulated_assumed: loft_insulated_efficiency,
# Loft af rafters
EpcRoofDescriptions.loft_insulated_at_rafters: loft_insulated_at_rafters_efficiency,
# Another dwelling above
EpcRoofDescriptions.another_dwelling_above: EpcEfficiency.NA,
# Thatched
EpcRoofDescriptions.thatched: thatched_efficiency,
EpcRoofDescriptions.thatched_with_additional_insulation: thatched_efficiency,
# Sloping ceiling
EpcRoofDescriptions.sloping_pitched_insulated: sloping_ceiling_efficiency,
EpcRoofDescriptions.sloping_pitched_limited_insulation: sloping_ceiling_efficiency,
EpcRoofDescriptions.sloping_pitched_no_insulation: EpcEfficiency.VERY_POOR,
}
def resolve_roof_efficiency(
description: EpcRoofDescriptions,
age_band: EpcConstructionAgeBand | None,
insulation_thickness: int | None,
) -> EpcEfficiency:
"""
Resolve roof efficiency from description + age band + insulation thickness.
"""
# Unknown / holding descriptions → efficiency unknown
if description in description.unknown_descriptions:
return EpcEfficiency.NA
rule = ROOF_DESCRIPTION_EFFICIENCIES.get(description)
if rule is None:
return EpcEfficiency.NA
# Fixed efficiency
if isinstance(rule, EpcEfficiency):
return rule
# Callable rule
if age_band is None or pd.isnull(age_band):
return EpcEfficiency.NA
try:
# Try (thickness, age_band)
return rule(insulation_thickness, age_band)
except TypeError:
# Fallback to (age_band)
return rule(age_band)

View file

@ -0,0 +1,211 @@
from typing import Callable, Union
from collections.abc import Mapping
from datatypes.epc.walls import EpcWallDescriptions
from datatypes.epc.construction_age_band import EpcConstructionAgeBand
from datatypes.epc.efficiency import EpcEfficiency
# Unique combinations
wall_map = {
# Cavity walls
('Cavity', 'FilledCavity'): EpcWallDescriptions.cavity_filled_cavity,
('Cavity', 'Internal'): EpcWallDescriptions.cavity_internal_insulation,
('Cavity', 'External'): EpcWallDescriptions.cavity_external_insulation,
('Cavity', 'FilledCavityPlusInternal'): EpcWallDescriptions.cavity_filled_plus_internal,
('Cavity', 'FilledCavityPlusExternal'): EpcWallDescriptions.cavity_filled_plus_external,
('Cavity', 'AsBuilt'): None, # To be classified
('Cavity', 'Unknown'): None, # To be classified
# System built walls
('System', 'External'): EpcWallDescriptions.system_external_insulation,
('System', 'Internal'): EpcWallDescriptions.system_internal_insulation,
('System', 'AsBuilt'): None, # To be classified
('System', 'Unknown'): None,
# Timber Frame walls
('Timber Frame', 'Internal'): EpcWallDescriptions.timber_frame_internal_insulation,
('Timber Frame', 'External'): EpcWallDescriptions.timber_frame_external_insulation,
('Timber Frame', 'AsBuilt'): None, # To be classified
('Timber Frame', 'Unknown'): None,
# Solid Brick walls
('Solid Brick', 'External'): EpcWallDescriptions.solid_brick_external_insulation,
('Solid Brick', 'Internal'): EpcWallDescriptions.solid_brick_internal_insulation,
('Solid Brick', 'AsBuilt'): None, # To be classified
('Solid Brick', 'Unknown'): None,
# Granite walls
('Granite', 'External'): EpcWallDescriptions.granite_whinstone_external_insulation,
("Granite", 'Internal'): EpcWallDescriptions.granite_whinstone_internal_insulation,
('Granite', 'AsBuilt'): None,
('Granite', 'Unknown'): None,
# Sandstone walls
('Sandstone', 'Internal'): EpcWallDescriptions.sandstone_limestone_internal_insulation,
('Sandstone', 'External'): EpcWallDescriptions.sandstone_limestone_external_insulation,
('Sandstone', 'Unknown'): None,
('Sandstone', 'AsBuilt'): None,
# Cob walls
('Cob', 'AsBuilt'): None,
}
wall_unknown_age_fallback = {
"Cavity": EpcWallDescriptions.cavity_as_built_unknown,
"Solid Brick": EpcWallDescriptions.solid_brick_as_built_unknown,
"Timber Frame": EpcWallDescriptions.timber_frame_as_built_unknown,
"System": EpcWallDescriptions.system_as_built_unknown,
"Granite": EpcWallDescriptions.granite_as_built_unknown,
"Sandstone": EpcWallDescriptions.sandstone_as_built_unknown,
"Cob": EpcWallDescriptions.cob_as_built_unknown,
}
def cavity_filled_efficiency(age_band: EpcConstructionAgeBand) -> EpcEfficiency:
""""
Maps cavity filled to efficiency based on construction age band.
:param age_band: EpcConstructionAgeBand
:return: EpcEfficiency
"""
if age_band in {
EpcConstructionAgeBand.from_2023_onwards
}:
return EpcEfficiency.VERY_GOOD
return EpcEfficiency.GOOD
def internal_external_insulation_efficiency(
age_band: EpcConstructionAgeBand,
) -> EpcEfficiency:
"""
Maps:
- cavity unfilled with internal/external insulation to efficiency based on construction age band. We assumed
based on 100mm insulation
- solid brick with internal/external insulation to efficiency based on construction age band. We assumed
based on 100mm insulation
- system built with internal/external insulation to efficiency based on construction age band. We assumed
based on 100mm insulation
All of these wall types have the same behaviour in elmhurst
:param age_band: EpcConstructionAgeBand
:return: EpcEfficiency
"""
if age_band in {
EpcConstructionAgeBand.from_1983_to_1990,
EpcConstructionAgeBand.from_1991_to_1995,
EpcConstructionAgeBand.from_1996_to_2002,
EpcConstructionAgeBand.from_2003_to_2006,
EpcConstructionAgeBand.from_2007_to_2011,
EpcConstructionAgeBand.from_2012_to_2022,
EpcConstructionAgeBand.from_2023_onwards,
}:
return EpcEfficiency.VERY_GOOD
return EpcEfficiency.GOOD
def timber_granite_sandstone_internal_external_efficiency(age_band: EpcConstructionAgeBand) -> EpcEfficiency:
""""
Maps:
- timber frame with internal/external wall insulation to efficiency based on construction age band.
- sandstone/limestone with internal/external wall insulation to efficiency based on construction age band.
- granite/whinstone with internal/external wall insulation to efficiency based on construction age band.
:param age_band: EpcConstructionAgeBand
:return: EpcEfficiency
"""
if age_band in {
EpcConstructionAgeBand.from_2023_onwards
}:
return EpcEfficiency.VERY_GOOD
return EpcEfficiency.GOOD
WallEfficiencyRule = Union[
EpcEfficiency,
Callable[[EpcConstructionAgeBand, int | None], EpcEfficiency],
]
WALL_DESCRIPTION_EFFICIENCIES: Mapping[EpcWallDescriptions, WallEfficiencyRule] = {
# Note: all function mappings have been defined based on Elmhurst
# Cavity
# value mappings
EpcWallDescriptions.cavity_no_insulation_assumed: EpcEfficiency.POOR,
EpcWallDescriptions.cavity_partial_insulated_assumed: EpcEfficiency.AVERAGE,
EpcWallDescriptions.cavity_insulated_assumed: EpcEfficiency.GOOD,
EpcWallDescriptions.cavity_filled_plus_internal: EpcEfficiency.VERY_GOOD,
EpcWallDescriptions.cavity_filled_plus_external: EpcEfficiency.VERY_GOOD,
# function mappings
EpcWallDescriptions.cavity_filled_cavity: cavity_filled_efficiency,
EpcWallDescriptions.cavity_internal_insulation: internal_external_insulation_efficiency,
EpcWallDescriptions.cavity_external_insulation: internal_external_insulation_efficiency,
# Solid brick
# value mappings
EpcWallDescriptions.solid_brick_no_insulation_assumed: EpcEfficiency.POOR,
EpcWallDescriptions.solid_brick_partial_insulated_assumed: EpcEfficiency.AVERAGE,
EpcWallDescriptions.solid_brick_insulated_assumed: EpcEfficiency.GOOD,
# function mappings
EpcWallDescriptions.solid_brick_internal_insulation: internal_external_insulation_efficiency,
EpcWallDescriptions.solid_brick_external_insulation: internal_external_insulation_efficiency,
# System
# value mappings
EpcWallDescriptions.system_no_insulation_assumed: EpcEfficiency.POOR,
EpcWallDescriptions.system_partial_insulated_assumed: EpcEfficiency.AVERAGE,
EpcWallDescriptions.system_insulated_assumed: EpcEfficiency.GOOD,
# function mappings
EpcWallDescriptions.system_internal_insulation: internal_external_insulation_efficiency,
EpcWallDescriptions.system_external_insulation: internal_external_insulation_efficiency,
# Timber frame
# value mappings
EpcWallDescriptions.timber_frame_no_insulation_assumed: EpcEfficiency.POOR,
EpcWallDescriptions.timber_frame_partial_insulated_assumed: EpcEfficiency.AVERAGE,
EpcWallDescriptions.timber_frame_insulated_assumed: EpcEfficiency.GOOD,
# function mappings
EpcWallDescriptions.timber_frame_internal_insulation: timber_granite_sandstone_internal_external_efficiency,
EpcWallDescriptions.timber_frame_external_insulation: timber_granite_sandstone_internal_external_efficiency,
# Granite / whinstone
EpcWallDescriptions.granite_whinstone_no_insulation_assumed: EpcEfficiency.VERY_POOR,
EpcWallDescriptions.granite_whinstone_partial_insulated_assumed: EpcEfficiency.AVERAGE,
EpcWallDescriptions.granite_whinestone_insulated_assumed: EpcEfficiency.GOOD,
# function mappings
EpcWallDescriptions.granite_whinstone_internal_insulation: timber_granite_sandstone_internal_external_efficiency,
EpcWallDescriptions.granite_whinstone_external_insulation: timber_granite_sandstone_internal_external_efficiency,
# Sandstone / limestone
EpcWallDescriptions.sandstone_limestone_no_insulation_assumed: EpcEfficiency.VERY_POOR,
EpcWallDescriptions.sandstone_limestone_partial_insulated_assumed: EpcEfficiency.AVERAGE,
EpcWallDescriptions.sandstone_limestone_insulated_assumed: EpcEfficiency.GOOD,
# function mappings
EpcWallDescriptions.sandstone_limestone_internal_insulation: timber_granite_sandstone_internal_external_efficiency,
EpcWallDescriptions.sandstone_limestone_external_insulation: timber_granite_sandstone_internal_external_efficiency,
# Cob (special case)
EpcWallDescriptions.cob_as_built_average: EpcEfficiency.AVERAGE,
EpcWallDescriptions.cob_as_built_good: EpcEfficiency.GOOD,
# Unknown mappings which are unhandled
EpcWallDescriptions.cavity_as_built_unknown: EpcEfficiency.NA,
EpcWallDescriptions.solid_brick_as_built_unknown: EpcEfficiency.NA,
EpcWallDescriptions.system_as_built_unknown: EpcEfficiency.NA,
EpcWallDescriptions.timber_frame_as_built_unknown: EpcEfficiency.NA,
EpcWallDescriptions.granite_as_built_unknown: EpcEfficiency.NA,
EpcWallDescriptions.sandstone_as_built_unknown: EpcEfficiency.NA,
EpcWallDescriptions.cob_as_built_unknown: EpcEfficiency.NA,
}
def resolve_wall_efficiency(
description: EpcWallDescriptions,
age_band: EpcConstructionAgeBand,
) -> EpcEfficiency:
rule = WALL_DESCRIPTION_EFFICIENCIES[description]
if isinstance(rule, EpcEfficiency):
return rule
return rule(age_band)

View file

@ -1,6 +0,0 @@
parity_map = {
"Flat": "Flat",
"Maisonette": "Maisonette",
"Bungalow": "Bungalow",
"House": "House",
}

View file

@ -1,3 +0,0 @@
parity_map = {
}

View file

@ -1,93 +1,371 @@
import re
from tqdm import tqdm
import pandas as pd import pandas as pd
from etl.epc.DataProcessor import construction_age_bounds_map from backend.onboarders.base import OnboarderBase
from backend.onboarders.mappings.property_type import parity_map as property_map # Parity mappings
from backend.onboarders.mappings.age_band import party_map as age_band_map from backend.onboarders.mappings.parity.property_type import parity_map as property_map
from backend.onboarders.mappings.built_form import parity_map as built_form_map from backend.onboarders.mappings.parity.age_band import parity_map as age_band_map
from backend.onboarders.mappings.parity.built_form import parity_map as built_form_map
from backend.onboarders.mappings.parity.walls import wall_map, wall_unknown_age_fallback, WALL_DESCRIPTION_EFFICIENCIES
def check_nulls(data, original_column, mapped_column): from onboarders.mappings.parity.roof import roof_map, roof_unknown_age_fallback, resolve_roof_efficiency
# We only allow nulls if the oroginal value was null from onboarders.mappings.parity.floor import floor_map
null_vals = data[pd.isnull(data[mapped_column])] from onboarders.mappings.parity.heating import heating_map
if null_vals.empty: from onboarders.mappings.parity.glazing import glazing_map
return True from backend.onboarders.mappings.parity.as_built_wall_classifiers import as_built_wall_classifiers
# We make sure all original values were null from backend.onboarders.mappings.parity.as_built_roof_classifiers import as_built_roof_classifiers
assert pd.isnull(null_vals[original_column]).all(), ( from backend.onboarders.mappings.parity.as_built_floor_classifiers import (
f"Some values in {mapped_column} were not mapped, but original values were not null" as_built_floor_classifiers, unknown_as_built_floor_classifiers
)
# Sample input data
data = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody "
"- Data Extracts for Domna.xlsx",
sheet_name="Sustainability"
) )
from datatypes.epc.roof import EpcRoofDescriptions
from datatypes.epc.floor import EpcFloorDescriptions
from datatypes.epc.construction_age_band import EpcConstructionAgeBand
from datatypes.epc.walls import EpcWallDescriptions
from datatypes.epc.efficiency import EpcEfficiency
# We want to map the parity fields to standard EPC references. This will allow us to tqdm.pandas()
# 1) Estimate EPCs, more accurately
# 2) Patch incorrect EPCs with ease
# 3) Indicate already installed measures
# ------------ construction_age_band ------------
# Map to EPC age bands
# def construction_date_to_band(year):
# if pd.isnull(year):
# return None
# # Get the year from the date which is numpy datetime format
# for label, ranges in construction_age_bounds_map.items():
# if ranges["l"] <= year <= ranges["u"]:
# return label
# raise NotImplementedError("year out of bounds")
#
#
# data["construction_age_band"] = pd.to_datetime(data["Construction Date"]).dt.year.apply(construction_date_to_band)
data["construction_age_band"] = data["Construction Years"].map(age_band_map)
check_nulls(data, "Construction Years", "construction_age_band")
# ------------ property_type ------------
data["property_type"] = data["Type"].map(property_map)
assert pd.isnull(data["property_type"]).sum() == 0, "Some property types were not mapped"
# ------------ built_form ------------
data["built_form"] = data["Attachment"].map(built_form_map)
assert pd.isnull(data["built_form"]).sum() == 0, "Some built forms were not mapped"
# ------------ Wall Construction ------------
data["walls_combined"] = data["Wall Construction"] + "+" + data["Wall Insulation"].fillna("Unknown Insulation")
data["Wall Insulation"].value_counts()
data["Wall Construction"].value_counts()
as_built_map = {
"Cavity": {"insulated_age_bands": [], "partial_insulated_age_bands": []},
"Solid Brick": {"insulated_age_bands": [], "partial_insulated_age_bands": []},
"System": {"insulated_age_bands": [], "partial_insulated_age_bands": []},
"Timber Frame": {"insulated_age_bands": [], "partial_insulated_age_bands": []},
"Sandstone": {"insulated_age_bands": [], "partial_insulated_age_bands": []},
"Granite": {"insulated_age_bands": [], "partial_insulated_age_bands": []},
"Cob": {"insulated_age_bands": [], "partial_insulated_age_bands": []},
}
def map_wall_construction(wall_constuction, wall_insulation, construction_age_band): class ParityOnboarder(OnboarderBase):
if wall_insulation == "AsBuilt":
# Deduce based on wall construction and age band
bands = as_built_map.get(wall_constuction, None)
if bands is None:
raise NotImplementedError(f"Wall construction {wall_constuction} not in as built map")
# We check if the age band is in insulated or partial insulated, and if neither, we assume uninsulated def __init__(
self,
fileuri: str,
file_format: str,
**kwargs
):
# Extract bucket, and filekey; Will be in the format s3://bucket/key
self.bucket_name = fileuri.split("/")[2]
self.input_file_name = "/".join(fileuri.split("/")[3:])
# Also prepare output file name
self.output_file_name = self.input_file_name.replace("." + file_format, "") + "_transformed.csv"
# Variables we want to map self.read_s3(file_format=file_format, **kwargs)
# 'Org Ref', 'Address 1', 'Address 2', 'Address 3', 'Postcode', 'Type', pass
# 'Attachment', 'Construction Years', 'Wall Construction',
# 'Wall Insulation', 'Roof Construction', 'Roof Insulation', def map_construction_age_band(self):
# 'Floor Construction', 'Floor Insulation', 'Glazing', 'Heating', self.data[self.landlord_construction_age_band] = self.data["Construction Years"].map(age_band_map)
# 'Boiler Efficiency', 'Main Fuel', 'Controls Adequacy', 'UPRN', self.assert_nulls_only_from_source_nulls(
# 'Total Floor Area (m2)' self.data, "Construction Years", self.landlord_construction_age_band
)
def map_property_type(self):
self.data[self.landlord_property_type] = self.data["Type"].map(property_map)
self.assert_no_nulls(self.data, self.landlord_property_type)
def map_built_form(self):
self.data[self.landlord_built_form] = self.data["Attachment"].map(built_form_map)
self.assert_no_nulls(self.data, self.landlord_built_form)
@staticmethod
def _fill_wall_as_built(row: pd.Series) -> EpcWallDescriptions | None:
"""
Utility function, used by map_wall_construction in parity transformation module
:param row: row of input sustainability data, being transformed
:return: EpcWallDescriptions, the as built wall description for the input row, based on the wall construction
type and age band
"""
# Already resolved via direct mapping
if row.landlord_wall_construction is not None:
return row.landlord_wall_construction
wall_type = row["Wall Construction"]
# Missing construction age → conservative fallback
if pd.isnull(row.landlord_construction_age_band):
return wall_unknown_age_fallback.get(wall_type)
classifier = as_built_wall_classifiers.get(wall_type)
if classifier is None:
return None
return classifier(row.landlord_construction_age_band)
@staticmethod
def _resolve_wall_efficiency(
description: EpcWallDescriptions,
age_band: EpcConstructionAgeBand | None,
) -> EpcEfficiency:
# Unknown / holding descriptions → efficiency unknown
if "unknown insulation" in description.value.lower():
return EpcEfficiency.NA
rule = WALL_DESCRIPTION_EFFICIENCIES.get(description)
if rule is None:
return EpcEfficiency.NA
if isinstance(rule, EpcEfficiency):
return rule
# Rule needs age band but we don't have one
if age_band is None or pd.isnull(age_band):
return EpcEfficiency.NA
return rule(age_band)
def map_wall_construction(self):
self.data[self.landlord_wall_construction] = (
self.data[["Wall Construction", "Wall Insulation"]]
.apply(tuple, axis=1)
.map(wall_map)
)
self.data[self.landlord_wall_construction] = self.data.progress_apply(self._fill_wall_as_built, axis=1)
# Sanity check
self.assert_no_nulls(self.data, self.landlord_wall_construction)
self.data[self.landlord_wall_efficiency] = self.data.progress_apply(
lambda row: self._resolve_wall_efficiency(
row.landlord_wall_construction,
row.landlord_construction_age_band,
),
axis=1,
)
# Additional santify check
self.assert_no_nulls(self.data, self.landlord_wall_efficiency)
@staticmethod
def _fill_roof_as_built(row: pd.Series) -> EpcRoofDescriptions | None:
# Already resolved
if not pd.isnull(row.landlord_roof_construction):
return row.landlord_roof_construction
roof_type = row["Roof Construction"]
classifier = as_built_roof_classifiers.get(roof_type)
if classifier is None:
raise NotImplementedError(f"No roof classifier for roof type '{roof_type}'")
if pd.isnull(row.landlord_construction_age_band):
return roof_unknown_age_fallback.get(roof_type)
output = classifier(row.landlord_construction_age_band)
if output is None:
raise NotImplementedError(
f"Roof classification returned None for roof type '{roof_type}'"
)
return output
@staticmethod
def _extract_insulation_thickness(value: str | None) -> int | None:
"""
Extract insulation thickness in mm from a string like 'mm150'.
Returns None if not present or not parseable.
"""
if value is None or pd.isnull(value):
return None
match = re.search(r"(\d+)", str(value))
if not match:
return None
return int(match.group(1))
def map_roof_construction(self):
self.data[self.landlord_roof_construction] = (
self.data[["Roof Construction", "Roof Insulation"]]
.progress_apply(tuple, axis=1)
.map(roof_map)
)
self.data[self.landlord_roof_construction] = self.data.progress_apply(
self._fill_roof_as_built,
axis=1,
)
# sanity check
self.assert_no_nulls(self.data, self.landlord_roof_construction)
self.data["roof_insulation_thickness_mm"] = self.data["Roof Insulation"].apply(
self._extract_insulation_thickness
)
self.data[self.landlord_roof_efficiency] = self.data.progress_apply(
lambda row: resolve_roof_efficiency(
description=row.landlord_roof_construction,
age_band=row.landlord_construction_age_band,
insulation_thickness=row.roof_insulation_thickness_mm,
),
axis=1,
)
# sanity check
self.assert_no_nulls(self.data, self.landlord_roof_efficiency)
# Flag sloping ceiling
self.data[self.landlord_has_sloping_ceiling] = self.data["Roof Construction"].apply(
lambda x: x == "PitchedWithSlopingCeiling"
)
@staticmethod
def _fill_floor_as_built(row: pd.Series):
# 1. Already resolved
if row.landlord_floor_construction is not None:
return row.landlord_floor_construction
age_band = row.landlord_construction_age_band
floor_type = row["Floor Construction"]
insulation = row["Floor Insulation"]
# 2. Missing age band → conservative fallback
if pd.isnull(age_band):
return EpcFloorDescriptions.unknown
# 3. Known floor types
if floor_type in ["Solid", "SuspendedTimber", "SuspendedNotTimber"]:
classifier = as_built_floor_classifiers[floor_type]
return classifier(age_band)
# 4. Unknown floor type
if floor_type == "Unknown":
classifier = unknown_as_built_floor_classifiers[insulation]
return classifier(age_band)
# 5. Truly missing / garbage input
return EpcFloorDescriptions.unknown
def map_floor_construction(self):
self.data[self.landlord_floor_construction] = (
self.data[["Floor Construction", "Floor Insulation"]]
.progress_apply(tuple, axis=1)
.map(floor_map)
)
self.data[self.landlord_floor_construction] = self.data.progress_apply(
self._fill_floor_as_built,
axis=1,
)
self.assert_no_nulls(self.data, self.landlord_floor_construction)
def map_glazing(self):
# TODO: probably doesn't make sense to store multi glazed proportion, glazed type or glazed area.
# There is maybe an argument for landlord_multi_glaze_proportion as this could be variable,
# however
self.data[
[
self.landlord_windows_type,
self.landlord_windows_efficiency,
self.landlord_multi_glaze_proportion,
self.landlord_glazed_type,
self.landlord_glazed_area
]
] = self.data["Glazing"].map(glazing_map).progress_apply(pd.Series)
def map_heating(self):
# TODO - when mapping heating controls, we should check the existing heating controls and the efficiency rating
# For sub optimal heating controls, we're going to make an assumption as to what the heating controls are
# and the energy efficiency rating we prescribe here may not be accurate. We therefore use this as an
# upper limit
# as opposed to a guaranteed efficiency rating. To stress, this is only relevant for sub optimal heating
# controls. E.g. it may be programmer and room thermostat
self.data[
[
self.landlord_heating_construction,
self.landlord_heating_efficiency,
self.landlord_fuel_type,
self.landlord_heating_controls,
self.landlord_heating_controls_efficiency,
self.landlord_hot_water_system,
self.landlord_hot_water_efficiency
]
] = self.data[
[
"Heating",
"Boiler Efficiency",
"Main Fuel",
"Controls Adequacy"
]
].progress_apply(tuple, axis=1).map(heating_map).progress_apply(pd.Series)
def map_floor_area(self):
# This is just a rename
self.data = self.data.rename(
columns={"Total Floor Area (m2)": self.landlord_total_floor_area_m2}
)
def select_columns(self):
self.data = self.data[
[
"Org Ref",
"UPRN",
"Address 1",
"Address 2",
"Address 3",
"Postcode",
self.landlord_total_floor_area_m2,
self.landlord_construction_age_band,
self.landlord_property_type,
self.landlord_built_form,
self.landlord_wall_construction,
self.landlord_wall_efficiency,
self.landlord_roof_construction,
self.landlord_roof_efficiency,
self.landlord_has_sloping_ceiling,
self.landlord_floor_construction,
self.landlord_windows_type,
self.landlord_windows_efficiency,
self.landlord_multi_glaze_proportion,
self.landlord_glazed_type,
self.landlord_glazed_area,
self.landlord_heating_construction,
self.landlord_heating_efficiency,
self.landlord_fuel_type,
self.landlord_heating_controls,
self.landlord_heating_controls_efficiency,
self.landlord_hot_water_system,
self.landlord_hot_water_efficiency
]
].rename(
columns={
"Org Ref": "landlord_property_id",
"Address1": "address1",
"Address2": "address2",
"Address3": "address3",
"Postcode": "postcode",
}
)
def extract_values(self):
for columns in [
self.landlord_construction_age_band, self.landlord_property_type, self.landlord_built_form,
self.landlord_wall_construction, self.landlord_wall_efficiency, self.landlord_roof_construction,
self.landlord_roof_efficiency, self.landlord_floor_construction, self.landlord_windows_type,
self.landlord_windows_efficiency, self.landlord_heating_construction, self.landlord_heating_efficiency,
self.landlord_fuel_type, self.landlord_heating_controls, self.landlord_heating_controls_efficiency,
self.landlord_hot_water_system, self.landlord_hot_water_efficiency
]:
self.data[columns] = self.data[columns].progress_apply(lambda x: x.value if hasattr(x, "value") else x)
def transform(self):
# ------------ construction_age_band ------------
self.map_construction_age_band()
# ------------ property_type ------------
self.map_property_type()
# ------------ built_form ------------
self.map_built_form()
# ------------ Wall Construction ------------
self.map_wall_construction()
# ------------ Roof Construction ------------
self.map_roof_construction()
# ------------ Floor Construction ------------
self.map_floor_construction()
# ------------ Glazing ------------
self.map_glazing()
# ------------ Heating, fuel, controls & hot water ------------
self.map_heating()
# ------------ Floor Area ------------
self.map_floor_area()
# ------------ Formating ------------
self.select_columns()
self.extract_values()

View file

@ -0,0 +1,6 @@
boto3
numpy==2.1.2
pandas==2.2.3
tqdm==4.66.5
pydantic==2.9.2
openpyxl==3.1.2

View file

@ -0,0 +1,97 @@
import pytest
from datatypes.epc.construction_age_band import EpcConstructionAgeBand
from datatypes.epc.floor import EpcFloorDescriptions
from backend.onboarders.mappings.parity.as_built_floor_classifiers import (
unknown_floor_as_built,
unknown_floor_retrofitted,
map_solid_floor_as_built,
map_suspended_floor_as_built,
)
@pytest.mark.parametrize(
"age_band,expected",
[
# Before 1900 / 19001929 → suspended, no insulation
(EpcConstructionAgeBand.before_1900, EpcFloorDescriptions.suspended_no_insulation_assumed),
(EpcConstructionAgeBand.from_1900_to_1929, EpcFloorDescriptions.suspended_no_insulation_assumed),
# 19301995 → solid, no insulation
(EpcConstructionAgeBand.from_1930_to_1949, EpcFloorDescriptions.solid_no_insulation_assumed),
(EpcConstructionAgeBand.from_1950_to_1966, EpcFloorDescriptions.solid_no_insulation_assumed),
(EpcConstructionAgeBand.from_1967_to_1975, EpcFloorDescriptions.solid_no_insulation_assumed),
(EpcConstructionAgeBand.from_1976_to_1982, EpcFloorDescriptions.solid_no_insulation_assumed),
(EpcConstructionAgeBand.from_1983_to_1990, EpcFloorDescriptions.solid_no_insulation_assumed),
(EpcConstructionAgeBand.from_1991_to_1995, EpcFloorDescriptions.solid_no_insulation_assumed),
# 19962002 → solid, limited insulation
(EpcConstructionAgeBand.from_1996_to_2002, EpcFloorDescriptions.solid_limited_insulation_assumed),
# 2003+ → solid, insulated
(EpcConstructionAgeBand.from_2003_to_2006, EpcFloorDescriptions.solid_insulated_assumed),
(EpcConstructionAgeBand.from_2012_to_2022, EpcFloorDescriptions.solid_insulated_assumed),
(EpcConstructionAgeBand.from_2023_onwards, EpcFloorDescriptions.solid_insulated_assumed),
],
)
def test_unknown_floor_as_built(age_band, expected):
assert unknown_floor_as_built(age_band) == expected
@pytest.mark.parametrize(
"age_band,expected",
[
# Pre-1930 → suspended, insulated
(EpcConstructionAgeBand.before_1900, EpcFloorDescriptions.suspended_insulated),
(EpcConstructionAgeBand.from_1900_to_1929, EpcFloorDescriptions.suspended_insulated),
# 1930+ → solid, insulated
(EpcConstructionAgeBand.from_1930_to_1949, EpcFloorDescriptions.solid_insulated),
(EpcConstructionAgeBand.from_1950_to_1966, EpcFloorDescriptions.solid_insulated),
(EpcConstructionAgeBand.from_1976_to_1982, EpcFloorDescriptions.solid_insulated),
(EpcConstructionAgeBand.from_2023_onwards, EpcFloorDescriptions.solid_insulated),
],
)
def test_unknown_floor_retrofitted(age_band, expected):
assert unknown_floor_retrofitted(age_band) == expected
@pytest.mark.parametrize(
"age_band,expected",
[
# 19831995 → no insulation
(EpcConstructionAgeBand.from_1983_to_1990, EpcFloorDescriptions.solid_no_insulation_assumed),
(EpcConstructionAgeBand.from_1991_to_1995, EpcFloorDescriptions.solid_no_insulation_assumed),
# 19962002 → limited insulation
(EpcConstructionAgeBand.from_1996_to_2002, EpcFloorDescriptions.solid_limited_insulation_assumed),
# 2003+ → insulated
(EpcConstructionAgeBand.from_2003_to_2006, EpcFloorDescriptions.solid_insulated_assumed),
(EpcConstructionAgeBand.from_2012_to_2022, EpcFloorDescriptions.solid_insulated_assumed),
(EpcConstructionAgeBand.from_2023_onwards, EpcFloorDescriptions.solid_insulated_assumed),
],
)
def test_solid_floor_as_built(age_band, expected):
assert map_solid_floor_as_built(age_band) == expected
@pytest.mark.parametrize(
"age_band,expected",
[
# 19831995 → no insulation
(EpcConstructionAgeBand.from_1983_to_1990, EpcFloorDescriptions.suspended_no_insulation_assumed),
(EpcConstructionAgeBand.from_1991_to_1995, EpcFloorDescriptions.suspended_no_insulation_assumed),
# 19962002 → limited insulation
(EpcConstructionAgeBand.from_1996_to_2002, EpcFloorDescriptions.suspended_limited_insulation_assumed),
# 2003+ → insulated
(EpcConstructionAgeBand.from_2003_to_2006, EpcFloorDescriptions.suspended_insulated_assumed),
(EpcConstructionAgeBand.from_2012_to_2022, EpcFloorDescriptions.suspended_insulated_assumed),
(EpcConstructionAgeBand.from_2023_onwards, EpcFloorDescriptions.suspended_insulated_assumed),
],
)
def test_suspended_floor_as_built(age_band, expected):
assert map_suspended_floor_as_built(age_band) == expected

View file

@ -0,0 +1,173 @@
import pytest
from datatypes.epc.construction_age_band import EpcConstructionAgeBand
from datatypes.epc.roof import EpcRoofDescriptions
from datatypes.epc.efficiency import EpcEfficiency
from backend.onboarders.mappings.parity.as_built_roof_classifiers import (
map_flat_roof,
map_sloping_ceiling_roof,
)
from backend.onboarders.mappings.parity.roof import resolve_roof_efficiency
# ---------------------------------------------------------------------
# As-built roof description classification
# ---------------------------------------------------------------------
@pytest.mark.parametrize(
"age_band, expected",
[
(EpcConstructionAgeBand.before_1900, EpcRoofDescriptions.flat_no_insulation),
(EpcConstructionAgeBand.from_1950_to_1966, EpcRoofDescriptions.flat_no_insulation),
(EpcConstructionAgeBand.from_1967_to_1975, EpcRoofDescriptions.flat_limited_insulation),
(EpcConstructionAgeBand.from_1976_to_1982, EpcRoofDescriptions.flat_limited_insulation),
(EpcConstructionAgeBand.from_1983_to_1990, EpcRoofDescriptions.flat_insulated),
(EpcConstructionAgeBand.from_2007_to_2011, EpcRoofDescriptions.flat_insulated),
(EpcConstructionAgeBand.from_2023_onwards, EpcRoofDescriptions.flat_insulated),
],
)
def test_classify_flat_roof(age_band, expected):
assert map_flat_roof(age_band) == expected
@pytest.mark.parametrize(
"age_band, expected",
[
(EpcConstructionAgeBand.before_1900, EpcRoofDescriptions.sloping_pitched_no_insulation),
(EpcConstructionAgeBand.from_1967_to_1975, EpcRoofDescriptions.sloping_pitched_no_insulation),
(EpcConstructionAgeBand.from_1976_to_1982, EpcRoofDescriptions.sloping_pitched_limited_insulation),
(EpcConstructionAgeBand.from_1983_to_1990, EpcRoofDescriptions.sloping_pitched_insulated),
(EpcConstructionAgeBand.from_2012_to_2022, EpcRoofDescriptions.sloping_pitched_insulated),
(EpcConstructionAgeBand.from_2023_onwards, EpcRoofDescriptions.sloping_pitched_insulated),
],
)
def test_classify_sloping_ceiling_roof(age_band, expected):
assert map_sloping_ceiling_roof(age_band) == expected
# ---------------------------------------------------------------------
# Roof efficiency — fixed & age-band driven
# ---------------------------------------------------------------------
@pytest.mark.parametrize(
"description, age_band, expected",
[
# Flat roof, no insulation
(EpcRoofDescriptions.flat_no_insulation, EpcConstructionAgeBand.before_1900, EpcEfficiency.VERY_POOR),
# Flat roof, limited insulation (age-band driven)
(EpcRoofDescriptions.flat_limited_insulation, EpcConstructionAgeBand.from_1976_to_1982, EpcEfficiency.POOR),
(
EpcRoofDescriptions.flat_limited_insulation, EpcConstructionAgeBand.from_1967_to_1975,
EpcEfficiency.VERY_POOR),
# Flat roof, insulated (age-band driven)
(EpcRoofDescriptions.flat_insulated, EpcConstructionAgeBand.from_1983_to_1990, EpcEfficiency.AVERAGE),
(EpcRoofDescriptions.flat_insulated, EpcConstructionAgeBand.from_2003_to_2006, EpcEfficiency.GOOD),
(EpcRoofDescriptions.flat_insulated, EpcConstructionAgeBand.from_2023_onwards, EpcEfficiency.VERY_GOOD),
# Pitched, insulated assumed (loft)
(EpcRoofDescriptions.pitched_insulated_assumed, EpcConstructionAgeBand.from_1996_to_2002, EpcEfficiency.GOOD),
(EpcRoofDescriptions.pitched_insulated_assumed, EpcConstructionAgeBand.from_2007_to_2011,
EpcEfficiency.VERY_GOOD),
],
)
def test_roof_efficiency_age_band_only(description, age_band, expected):
assert resolve_roof_efficiency(
description=description,
age_band=age_band,
insulation_thickness=None,
) == expected
# ---------------------------------------------------------------------
# Roof efficiency — insulation thickness driven
# ---------------------------------------------------------------------
@pytest.mark.parametrize(
"description, thickness, expected",
[
# Loft insulation
(EpcRoofDescriptions.loft_12mm_insulation, 12, EpcEfficiency.VERY_POOR),
(EpcRoofDescriptions.loft_25mm_insulation, 25, EpcEfficiency.POOR),
(EpcRoofDescriptions.loft_75mm_insulation, 75, EpcEfficiency.AVERAGE),
(EpcRoofDescriptions.loft_150mm_insulation, 150, EpcEfficiency.GOOD),
(EpcRoofDescriptions.loft_300mm_insulation, 300, EpcEfficiency.VERY_GOOD),
# Flat insulated — thickness overrides age band
(EpcRoofDescriptions.flat_insulated, 50, EpcEfficiency.POOR),
(EpcRoofDescriptions.flat_insulated, 100, EpcEfficiency.AVERAGE),
(EpcRoofDescriptions.flat_insulated, 200, EpcEfficiency.GOOD),
(EpcRoofDescriptions.flat_insulated, 300, EpcEfficiency.VERY_GOOD),
# Sloping ceiling
(EpcRoofDescriptions.sloping_pitched_insulated, 75, EpcEfficiency.AVERAGE),
(EpcRoofDescriptions.sloping_pitched_insulated, 150, EpcEfficiency.GOOD),
(EpcRoofDescriptions.sloping_pitched_insulated, 350, EpcEfficiency.VERY_GOOD),
],
)
def test_roof_efficiency_thickness_based(description, thickness, expected):
assert resolve_roof_efficiency(
description=description,
age_band=EpcConstructionAgeBand.before_1900, # should be ignored
insulation_thickness=thickness,
) == expected
# ---------------------------------------------------------------------
# Thatched roofs
# ---------------------------------------------------------------------
@pytest.mark.parametrize(
"description, age_band, expected",
[
(EpcRoofDescriptions.thatched, EpcConstructionAgeBand.before_1900, EpcEfficiency.AVERAGE),
(EpcRoofDescriptions.thatched, EpcConstructionAgeBand.from_2003_to_2006, EpcEfficiency.GOOD),
(EpcRoofDescriptions.thatched, EpcConstructionAgeBand.from_2023_onwards, EpcEfficiency.VERY_GOOD),
],
)
def test_thatched_efficiency_age_band(description, age_band, expected):
assert resolve_roof_efficiency(
description=description,
age_band=age_band,
insulation_thickness=None,
) == expected
@pytest.mark.parametrize(
"thickness, expected",
[
(12, EpcEfficiency.AVERAGE),
(50, EpcEfficiency.GOOD),
(150, EpcEfficiency.GOOD),
(200, EpcEfficiency.VERY_GOOD),
],
)
def test_thatched_efficiency_thickness(thickness, expected):
assert resolve_roof_efficiency(
description=EpcRoofDescriptions.thatched_with_additional_insulation,
age_band=EpcConstructionAgeBand.before_1900,
insulation_thickness=thickness,
) == expected
# ---------------------------------------------------------------------
# Unknown / holding descriptions
# ---------------------------------------------------------------------
@pytest.mark.parametrize(
"description",
[
EpcRoofDescriptions.flat_as_built_unknown,
EpcRoofDescriptions.loft_as_built_unknown,
EpcRoofDescriptions.thatched_as_built_unknown,
EpcRoofDescriptions.sloping_pitched_as_built_unknown,
],
)
def test_unknown_roof_descriptions_return_na(description):
assert resolve_roof_efficiency(
description=description,
age_band=None,
insulation_thickness=None,
) == EpcEfficiency.NA

View file

@ -0,0 +1,161 @@
import pytest
from datatypes.epc.construction_age_band import EpcConstructionAgeBand
from datatypes.epc.walls import EpcWallDescriptions
from datatypes.epc.efficiency import EpcEfficiency
from backend.onboarders.mappings.parity.walls import resolve_wall_efficiency
from backend.onboarders.mappings.parity.as_built_wall_classifiers import (
map_cavity_wall_insulation,
map_solid_wall_insulation,
map_timber_frame_wall_insulation,
map_system_build_wall_insulation,
map_granite_wall_insulation,
map_sandstone_wall_insulation,
map_cob_wall_insulation,
)
# ---------------------------------------------------------------------
# As-built wall description classification
# ---------------------------------------------------------------------
@pytest.mark.parametrize(
"age_band, expected",
[
(EpcConstructionAgeBand.before_1900, EpcWallDescriptions.cavity_no_insulation_assumed),
(EpcConstructionAgeBand.from_1950_to_1966, EpcWallDescriptions.cavity_no_insulation_assumed),
(EpcConstructionAgeBand.from_1976_to_1982, EpcWallDescriptions.cavity_partial_insulated_assumed),
(EpcConstructionAgeBand.from_1983_to_1990, EpcWallDescriptions.cavity_insulated_assumed),
(EpcConstructionAgeBand.from_2023_onwards, EpcWallDescriptions.cavity_insulated_assumed),
],
)
def test_map_cavity_wall_insulation(age_band, expected):
assert map_cavity_wall_insulation(age_band) == expected
@pytest.mark.parametrize(
"age_band, expected",
[
(EpcConstructionAgeBand.before_1900, EpcWallDescriptions.solid_brick_no_insulation_assumed),
(EpcConstructionAgeBand.from_1976_to_1982, EpcWallDescriptions.solid_brick_partial_insulated_assumed),
(EpcConstructionAgeBand.from_1996_to_2002, EpcWallDescriptions.solid_brick_insulated_assumed),
],
)
def test_map_solid_wall_insulation(age_band, expected):
assert map_solid_wall_insulation(age_band) == expected
@pytest.mark.parametrize(
"age_band, expected",
[
(EpcConstructionAgeBand.before_1900, EpcWallDescriptions.timber_frame_no_insulation_assumed),
(EpcConstructionAgeBand.from_1950_to_1966, EpcWallDescriptions.timber_frame_partial_insulated_assumed),
(EpcConstructionAgeBand.from_1983_to_1990, EpcWallDescriptions.timber_frame_insulated_assumed),
],
)
def test_map_timber_frame_wall_insulation(age_band, expected):
assert map_timber_frame_wall_insulation(age_band) == expected
@pytest.mark.parametrize(
"age_band, expected",
[
(EpcConstructionAgeBand.before_1900, EpcWallDescriptions.system_no_insulation_assumed),
(EpcConstructionAgeBand.from_1976_to_1982, EpcWallDescriptions.system_partial_insulated_assumed),
(EpcConstructionAgeBand.from_2003_to_2006, EpcWallDescriptions.system_insulated_assumed),
],
)
def test_map_system_wall_insulation(age_band, expected):
assert map_system_build_wall_insulation(age_band) == expected
@pytest.mark.parametrize(
"age_band, expected",
[
(EpcConstructionAgeBand.before_1900, EpcWallDescriptions.granite_whinstone_no_insulation_assumed),
(EpcConstructionAgeBand.from_1976_to_1982, EpcWallDescriptions.granite_whinstone_partial_insulated_assumed),
(EpcConstructionAgeBand.from_2012_to_2022, EpcWallDescriptions.granite_whinestone_insulated_assumed),
],
)
def test_map_granite_wall_insulation(age_band, expected):
assert map_granite_wall_insulation(age_band) == expected
@pytest.mark.parametrize(
"age_band, expected",
[
(EpcConstructionAgeBand.before_1900, EpcWallDescriptions.sandstone_limestone_no_insulation_assumed),
(EpcConstructionAgeBand.from_1976_to_1982, EpcWallDescriptions.sandstone_limestone_partial_insulated_assumed),
(EpcConstructionAgeBand.from_2007_to_2011, EpcWallDescriptions.sandstone_limestone_insulated_assumed),
],
)
def test_map_sandstone_wall_insulation(age_band, expected):
assert map_sandstone_wall_insulation(age_band) == expected
@pytest.mark.parametrize(
"age_band, expected",
[
(EpcConstructionAgeBand.before_1900, EpcWallDescriptions.cob_as_built_average),
(EpcConstructionAgeBand.from_1976_to_1982, EpcWallDescriptions.cob_as_built_average),
(EpcConstructionAgeBand.from_1983_to_1990, EpcWallDescriptions.cob_as_built_good),
],
)
def test_map_cob_wall_insulation(age_band, expected):
assert map_cob_wall_insulation(age_band) == expected
# ---------------------------------------------------------------------
# Wall efficiency resolution
# ---------------------------------------------------------------------
@pytest.mark.parametrize(
"description, age_band, expected",
[
# Fixed efficiencies
(EpcWallDescriptions.cavity_no_insulation_assumed, None, EpcEfficiency.POOR),
(EpcWallDescriptions.cavity_partial_insulated_assumed, None, EpcEfficiency.AVERAGE),
(EpcWallDescriptions.cavity_insulated_assumed, None, EpcEfficiency.GOOD),
# Function-based efficiencies
(
EpcWallDescriptions.cavity_filled_cavity,
EpcConstructionAgeBand.from_2023_onwards,
EpcEfficiency.VERY_GOOD,
),
(
EpcWallDescriptions.cavity_filled_cavity,
EpcConstructionAgeBand.from_1991_to_1995,
EpcEfficiency.GOOD,
),
(
EpcWallDescriptions.solid_brick_internal_insulation,
EpcConstructionAgeBand.from_2003_to_2006,
EpcEfficiency.VERY_GOOD,
),
(
EpcWallDescriptions.solid_brick_internal_insulation,
EpcConstructionAgeBand.from_1950_to_1966,
EpcEfficiency.GOOD,
),
],
)
def test_resolve_wall_efficiency(description, age_band, expected):
assert resolve_wall_efficiency(description, age_band) == expected
@pytest.mark.parametrize(
"description",
[
EpcWallDescriptions.cavity_as_built_unknown,
EpcWallDescriptions.solid_brick_as_built_unknown,
EpcWallDescriptions.system_as_built_unknown,
EpcWallDescriptions.timber_frame_as_built_unknown,
EpcWallDescriptions.granite_as_built_unknown,
EpcWallDescriptions.sandstone_as_built_unknown,
EpcWallDescriptions.cob_as_built_unknown,
],
)
def test_unknown_wall_descriptions_return_na(description):
assert resolve_wall_efficiency(description, None) == EpcEfficiency.NA

View file

@ -0,0 +1,9 @@
FROM public.ecr.aws/lambda/python:3.10
# Set working directory (Lambda task root)
WORKDIR /var/task
# -----------------------------
# Lambda handler
# -----------------------------
CMD ["main.handler"]

View file

@ -1,10 +1,12 @@
import pandas as pd import pandas as pd
import requests import requests
from backend.address2UPRN.main import resolve_uprns_for_postcode_group, get_epc_data_with_postcode from backend.address2UPRN.main import (
resolve_uprns_for_postcode_group,
get_epc_data_with_postcode,
)
from tqdm import tqdm from tqdm import tqdm
def sanitise_postcode(postcode: str) -> str | None: def sanitise_postcode(postcode: str) -> str | None:
""" """
Normalise postcode for grouping. Normalise postcode for grouping.
@ -51,11 +53,7 @@ def main():
# --- validate AFTER grouping (save API calls) --- # --- validate AFTER grouping (save API calls) ---
# Get unique, non-null postcodes # Get unique, non-null postcodes
unique_postcodes = ( unique_postcodes = df["postcode_clean"].dropna().unique()
df["postcode_clean"]
.dropna()
.unique()
)
# Validate each postcode once, TODOadd a progress bar # Validate each postcode once, TODOadd a progress bar
postcode_validity = { postcode_validity = {
@ -66,7 +64,6 @@ def main():
# Map validity back onto dataframe # Map validity back onto dataframe
df["postcode_valid"] = df["postcode_clean"].map(postcode_validity) df["postcode_valid"] = df["postcode_clean"].map(postcode_validity)
results = [] results = []
for postcode, group_df in tqdm( for postcode, group_df in tqdm(
@ -98,17 +95,33 @@ def main():
results.append(tmp) results.append(tmp)
final_df = pd.concat(results, ignore_index=True) final_df = pd.concat(results, ignore_index=True)
a = final_df[[ a = final_df[
"best_match_lexiscore","Address 1", [
"best_match_address", "Postcode", "best_match_lexiscore",
"UPRN", "best_match_uprn" "Address 1",
]] # add levi score to viewing "best_match_address",
b = final_df[final_df["best_match_lexiscore"]>0] # add levi score to viewing "Postcode",
b = b[[ "UPRN",
"best_match_lexiscore","Address 1", "best_match_uprn",
"best_match_address", "Postcode", ]
"UPRN", "best_match_uprn" ] # add levi score to viewing
]] b = final_df[final_df["best_match_lexiscore"] > 0] # add levi score to viewing
b = b[
[
"best_match_lexiscore",
"Address 1",
"best_match_address",
"Postcode",
"UPRN",
"best_match_uprn",
]
]
def handler(event, context):
print("hello Postcode splitter world")
return {"statusCode": 200, "body": "hello world"}
if __name__ == "__main__": if __name__ == "__main__":
main() main()

View file

View file

@ -0,0 +1,45 @@
import re
from enum import Enum
from typing import List
class EpcConstructionAgeBand(Enum):
before_1900: str = 'England and Wales: before 1900'
from_1900_to_1929: str = 'England and Wales: 1900-1929'
from_1930_to_1949: str = 'England and Wales: 1930-1949'
from_1950_to_1966: str = 'England and Wales: 1950-1966'
from_1967_to_1975: str = 'England and Wales: 1967-1975'
from_1976_to_1982: str = 'England and Wales: 1976-1982'
from_1983_to_1990: str = 'England and Wales: 1983-1990'
from_1991_to_1995: str = 'England and Wales: 1991-1995'
from_1996_to_2002: str = 'England and Wales: 1996-2002'
from_2003_to_2006: str = 'England and Wales: 2003-2006'
from_2007_to_2011: str = 'England and Wales: 2007-2011'
from_2012_onwards: str = 'England and Wales: 2012-onwards'
from_2012_to_2022: str = 'England and Wales: 2012-2022'
from_2023_onwards: str = 'England and Wales: 2023 onwards'
def start_year(self) -> int:
"""
Extract the starting year of the age band.
"""
value = self.value.lower()
if 'before' in value:
return 0
match = re.search(r'(\d{4})', value)
if not match:
raise ValueError(f"Cannot determine start year from '{self.value}'")
return int(match.group(1))
@classmethod
def from_year_onwards(cls, year: int) -> List["EpcConstructionAgeBand"]:
"""
Return all age bands whose starting year is >= the given year.
"""
return [
band
for band in cls
if band.start_year() >= year
]

View file

@ -0,0 +1,10 @@
from enum import Enum
class EpcEfficiency(Enum):
VERY_POOR: str = "Very Poor"
POOR: str = "Poor"
AVERAGE: str = "Average"
GOOD: str = "Good"
VERY_GOOD: str = "Very Good"
NA: str = "N/A"

17
datatypes/epc/floor.py Normal file
View file

@ -0,0 +1,17 @@
from enum import Enum
class EpcFloorDescriptions(Enum):
# Solid floor
solid_insulated = "Solid, insulated"
solid_insulated_assumed = "Solid, insulated (assumed)"
solid_no_insulation_assumed = "Solid, no insulation (assumed)"
solid_limited_insulation_assumed = "Solid, limited insulation (assumed)"
# Suspended floor
suspended_insulated = "Suspended, insulated"
suspended_insulated_assumed = "Suspended, insulated (assumed)"
suspended_no_insulation_assumed = "Suspended, no insulation (assumed)"
suspended_limited_insulation_assumed = "Suspended, limited insulation (assumed)"
unknown = None # We don't resolve anything

10
datatypes/epc/fuel.py Normal file
View file

@ -0,0 +1,10 @@
from enum import Enum
class EpcFuel(Enum):
electricity_not_community = "electricity (not community)"
lpg_not_community = "LPG (not community)"
mains_gas_not_community = "mains gas (not community)"
oil_not_community = "oil (not community)"
manufactured_smokeless_fuel = "Solid fuel: manufactured smokeless fuel"
smokeless_coal = "smokeless coal"

View file

@ -0,0 +1,18 @@
from enum import Enum
class EpcHeatingControls(Enum):
programmer_room_thermostat_trvs = "Programmer, room thermostat and TRVs"
programmers_trvs_bypass = "Programmer, TRVs and bypass"
time_and_temperature_zone_control = "Time and temperature zone control"
# Room heaters
programmer_and_appliance_thermostats = "Programmer and appliance thermostats"
appliance_thermostats = "Appliance thermostats"
# Storage heaters
automatic_charge_control = "Automatic charge control"
manual_charge_control = "Manual charge control"
# Warm air
programmer_and_atleast_two_room_thermostats = "Programmer and at least two room thermostats"

View file

@ -0,0 +1,8 @@
from enum import Enum
class EpcHotWaterSystems(Enum):
# from primary heating system
from_main_system = "From main system"
# Common for heater-based systems, e.g. room heaters or storage heaters
electric_immersion_off_peak = "Electric immersion, off-peak"

View file

@ -0,0 +1,24 @@
from enum import Enum
class EpcHeatingSystems(Enum):
# boiler and radiators
boiler_and_radiators_electric = "Boiler and radiators, electric"
boiler_and_radiators_lpg = "Boiler and radiators, LPG"
boiler_radiators_mains_gas = "Boiler and radiators, mains gas"
boiler_radiators_oil = "Boiler and radiators, oil"
# underfloor
electric_underfloor_heating = "Electric underfloor heating"
# ashp
air_to_air_ashp = "Air source heat pump, warm air, electric"
ashp_radiators_electric = "Air source heat pump, radiators, electric"
# Room heaters
room_heaters_electric = "Room heaters, electric"
room_heaters_mains_gas = "Room heaters, mains gas"
room_heaters_smokeless_fuel = "Room heaters, smokeless fuel"
room_heaters_coal = "Room heaters, coal"
# Storage heaters
electric_storage_heaters = "Electric storage heaters"
# Warm air
warm_air_electricaire = "Warm air, Electricaire"
warm_air_mains_gas = "Warm air, mains gas"

View file

@ -0,0 +1,17 @@
from enum import Enum
class PropertyType(Enum):
flat = "Flat"
maisonette = "Maisonette"
bungalow = "Bungalow"
house = "House"
class BuiltForm(Enum):
mid_terrace = "Mid-Terrace"
end_terrace = "End-Terrace"
detached = "Detached"
semi_detached = "Semi-Detached"
enclosed_mid_terrace = "Enclosed Mid-Terrace"
enclosed_end_terrace = "Enclosed End-Terrace"

86
datatypes/epc/roof.py Normal file
View file

@ -0,0 +1,86 @@
from enum import Enum
from typing import List
class EpcRoofDescriptions(Enum):
# Loft
# Assumed options
pitched_insulated_assumed: str = "Pitched, insulated (assumed)"
pitched_no_insulation: str = "Pitched, no insulation"
# Insulation thickness options
loft_12mm_insulation: str = "Pitched, 12 mm loft insulation"
loft_25mm_insulation: str = "Pitched, 25 mm loft insulation"
loft_50mm_insulation: str = "Pitched, 50 mm loft insulation"
loft_75mm_insulation: str = "Pitched, 75 mm loft insulation"
loft_100mm_insulation: str = "Pitched, 100 mm loft insulation"
loft_125mm_insulation: str = "Pitched, 125 mm loft insulation"
loft_150mm_insulation: str = "Pitched, 150 mm loft insulation"
loft_175mm_insulation: str = "Pitched, 175 mm loft insulation"
loft_200mm_insulation: str = "Pitched, 200 mm loft insulation"
loft_250mm_insulation: str = "Pitched, 250 mm loft insulation"
loft_270mm_insulation: str = "Pitched, 270 mm loft insulation"
loft_300mm_insulation: str = "Pitched, 300 mm loft insulation"
loft_350mm_insulation: str = "Pitched, 350 mm loft insulation"
loft_400mm_plus_insulation: str = "Pitched, 400+ mm loft insulation"
# Insulated at rafters "Pitched, insulated at rafters"
# Rafters
# 400mm, 350mm = very good
# 200-300mm = good
# 125-175 = average
# 50-100 = poor
# 25 and below= very poor
loft_insulated_at_rafters: str = "Pitched, insulated at rafters"
# another dwelling above
another_dwelling_above: str = "(another dwelling above)"
# flat roof, which if there is observed insulation is just "flat, insulated", however there is a
# different efficiency rating depending on insulation thickness
# categories:
# 12mm = very poor & has limited insulation description
# 25, 50 = poor & has limited insulation description
# 75, 100, 125mm = average (Flat, insulated)
# 150, 175, 200, 225, 250mm = good (Flat, insulated)
# 270mm+ = very good (Flat, insulated)
# As built 2023 = Flat, insulated, Very good
# 2003 - 2006, up to 2012-2022 = Flat insulated, Good
# 1983-1990, 1996-2002 = Flat, insulated, Average
# 1976-1982 = Flat, limited insulation, poor
# 1967 - 1975 = Flat, limited insulation, Very Poor
# 1950-1966 and earlier bands = flat, no insulation, very poor
flat_insulated: str = "Flat, insulated"
flat_limited_insulation: str = "Flat, limited insulation"
flat_no_insulation: str = "Flat, no insulation"
# Thatched roof descriptions
# With Loft insulation at joists
# Thatched + 12mm = thatched, with additional insulation, average
# Thatched + 25, 50, 100, 150mm = thatched, with additional insulation, good
# Thatched + 175mm+ = thatched, with additional insulation, very good
# With loft insulation at rafters [out of scope atm]
# Unknown insulation
# Pre 1900, 1930-1949, 1967-1975, 1983-1990, 1996-2002 = "Thatched", Average
# 2003-2006, 2012-2022 = "Thatched", Good
# 2023 onwards = "Thatched", Very Good
thatched: str = "Thatched" # We see this for no insulation, has average performance
thatched_with_additional_insulation: str = "Thatched, with additional insulation"
# Sloping ceiling
# For sloping ceiling tags, we don't use any (assumed) tags so that it's unambiguous that the roof is sloped
sloping_pitched_no_insulation: str = "Pitched, no insulation"
sloping_pitched_limited_insulation: str = "Pitched, limited insulation"
sloping_pitched_insulated: str = "Pitched, insulated"
# Unknown descriptions which may get mapped later or handled via fallback
flat_as_built_unknown: str = "Flat, as built, unknown insulation"
loft_as_built_unknown: str = "Loft, as built, unknown insulation"
thatched_as_built_unknown: str = "Thatched, as built, unknown insulation"
sloping_pitched_as_built_unknown: str = "Pitched, as built, unknown insulation"
@property
def unknown_descriptions(self) -> List["EpcRoofDescriptions"]:
return [
EpcRoofDescriptions.flat_as_built_unknown,
EpcRoofDescriptions.loft_as_built_unknown,
EpcRoofDescriptions.thatched_as_built_unknown,
EpcRoofDescriptions.sloping_pitched_as_built_unknown,
]

74
datatypes/epc/walls.py Normal file
View file

@ -0,0 +1,74 @@
from enum import Enum
from typing import List
class EpcWallDescriptions(Enum):
# Cavity wall descriptions
cavity_insulated_assumed: str = "Cavity wall, as built, insulated (assumed)"
cavity_partial_insulated_assumed: str = "Cavity wall, as built, partial insulation (assumed)"
cavity_no_insulation_assumed: str = "Cavity wall, as built, no insulation (assumed)"
cavity_filled_cavity: str = "Cavity wall, filled cavity"
cavity_internal_insulation: str = "Cavity wall, with internal insulation"
cavity_external_insulation: str = "Cavity wall, with external insulation"
cavity_filled_plus_internal: str = "Cavity wall, filled cavity and internal insulation"
cavity_filled_plus_external: str = "Cavity wall, filled cavity and external insulation"
# Solid wall descriptions
solid_brick_internal_insulation: str = "Solid brick, with internal insulation"
solid_brick_external_insulation: str = "Solid brick, with external insulation"
solid_brick_no_insulation_assumed: str = 'Solid brick, as built, no insulation (assumed)'
solid_brick_partial_insulated_assumed: str = 'Solid brick, as built, partial insulation (assumed)'
solid_brick_insulated_assumed: str = 'Solid brick, as built, insulated (assumed)'
# System
system_external_insulation: str = "System built, with external insulation"
system_internal_insulation: str = "System built, with internal insulation"
system_no_insulation_assumed: str = "System built, as built, no insulation (assumed)"
system_partial_insulated_assumed: str = "System built, as built, partial insulation (assumed)"
system_insulated_assumed: str = "System built, as built, insulated (assumed)"
# Timber
timber_frame_internal_insulation: str = "Timber frame, with internal insulation"
timber_frame_external_insulation: str = "Timber frame, with external insulation"
timber_frame_no_insulation_assumed: str = "Timber frame, as built, no insulation (assumed)"
timber_frame_partial_insulated_assumed: str = "Timber frame, as built, partial insulation (assumed)"
timber_frame_insulated_assumed: str = "Timber frame, as built, insulated (assumed)"
# Granite/whinstone
granite_whinstone_external_insulation: str = "Granite or whin, with external insulation"
granite_whinstone_internal_insulation: str = "Granite or whin, with internal insulation"
granite_whinstone_no_insulation_assumed: str = "Granite or whin, as built, no insulation (assumed)"
granite_whinstone_partial_insulated_assumed: str = "Granite or whin, as built, partial insulation (assumed)"
granite_whinestone_insulated_assumed: str = "Granite or whin, as built, insulated (assumed)"
# Sandstone/limestone
sandstone_limestone_internal_insulation: str = "Sandstone, with internal insulation"
sandstone_limestone_external_insulation: str = "Sandstone, with external insulation"
sandstone_limestone_no_insulation_assumed: str = "Sandstone, as built, no insulation (assumed)"
sandstone_limestone_partial_insulated_assumed: str = "Sandstone, as built, partial insulation (assumed)"
sandstone_limestone_insulated_assumed: str = "Sandstone, as built, insulated (assumed)"
# Cob
cob_as_built_average: str = "Cob, as built"
cob_as_built_good: str = "Cob, as built"
# unknown descriptions which may get mapped later or handled via fallback
cavity_as_built_unknown: str = "Cavity wall, as built, unknown insulation"
solid_brick_as_built_unknown: str = "Solid brick, as built, unknown insulation"
system_as_built_unknown: str = "System built, as built, unknown insulation"
timber_frame_as_built_unknown: str = "Timber frame, as built, unknown insulation"
granite_as_built_unknown: str = "Granite or whin, as built, unknown insulation"
sandstone_as_built_unknown: str = "Sandstone, as built, unknown insulation"
cob_as_built_unknown: str = "Cob, as built, unknown insulation"
@property
def unknown_descriptions(self) -> List["EpcWallDescriptions"]:
return [
EpcWallDescriptions.cavity_as_built_unknown,
EpcWallDescriptions.solid_brick_as_built_unknown,
EpcWallDescriptions.system_as_built_unknown,
EpcWallDescriptions.timber_frame_as_built_unknown,
EpcWallDescriptions.granite_as_built_unknown,
EpcWallDescriptions.sandstone_as_built_unknown,
EpcWallDescriptions.cob_as_built_unknown,
]

View file

@ -196,6 +196,10 @@ class KwhData:
if save and self.bucket is None: if save and self.bucket is None:
raise Exception("bucket not set, cannot save data") raise Exception("bucket not set, cannot save data")
if data.empty:
# If we have no data
return data
# TODO: New is a temporary parameter, which will transform the epc descriptions to their transformed features # TODO: New is a temporary parameter, which will transform the epc descriptions to their transformed features
# in anticipation of the new model # in anticipation of the new model

View file

@ -0,0 +1,43 @@
data "aws_secretsmanager_secret_version" "db_credentials" {
secret_id = "${var.stage}/assessment_model/db_credentials"
}
data "terraform_remote_state" "shared" {
backend = "s3"
config = {
bucket = "assessment-model-terraform-state"
key = "env:/${var.stage}/terraform.tfstate" # TODO: dont hardcode this
region = "eu-west-2"
}
}
locals {
db_credentials = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string)
}
module "lambda" {
source = "../modules/lambda_with_sqs"
name = "condition-etl"
stage = var.stage
image_uri = local.image_uri
timeout = 180
environment = merge(
{
STAGE = var.stage
LOG_LEVEL = "info"
DB_USERNAME = local.db_credentials.db_assessment_model_username
DB_PASSWORD = local.db_credentials.db_assessment_model_password
},
)
}
resource "aws_iam_role_policy_attachment" "attach_condition_etl_s3_read" {
role = module.lambda.role_name
policy_arn = data.terraform_remote_state.shared.outputs.condition_etl_s3_read_arn
}

View file

@ -0,0 +1,16 @@
terraform {
required_providers {
aws = {
source = "hashicorp/aws"
version = "~> 4.16"
}
}
backend "s3" {
bucket = "condition-etl-terraform-state"
key = "terraform.tfstate"
region = "eu-west-2"
}
required_version = ">= 1.2.0"
}

View file

@ -0,0 +1,27 @@
variable "lambda_name" {
type = string
description = "Logical name of the lambda (e.g. address2uprn)"
}
variable "stage" {
description = "Deployment stage (e.g. dev, prod)"
type = string
}
variable "ecr_repo_url" {
type = string
description = "ECR repository URL (no tag, no digest)"
}
variable "image_digest" {
type = string
description = "Image digest (sha256:...)"
}
locals {
image_uri = "${var.ecr_repo_url}@${var.image_digest}"
}
output "resolved_image_uri" {
value = local.image_uri
}

View file

@ -6,6 +6,10 @@ module "role" {
name = "${var.name}-lambda-${var.stage}" name = "${var.name}-lambda-${var.stage}"
} }
output "role_name" {
value = module.role.role_name
}
############################################ ############################################
# SQS queue + DLQ # SQS queue + DLQ
############################################ ############################################

View file

@ -0,0 +1,14 @@
module "lambda" {
source = "../modules/lambda_with_sqs"
name = "postcode-splitter"
stage = var.stage
image_uri = local.image_uri
environment = {
STAGE = var.stage
LOG_LEVEL = "info"
}
}

View file

@ -0,0 +1,16 @@
terraform {
required_providers {
aws = {
source = "hashicorp/aws"
version = "~> 4.16"
}
}
backend "s3" {
bucket = "postcode-splitter-terraform-state"
key = "terraform.tfstate"
region = "eu-west-2"
}
required_version = ">= 1.2.0"
}

View file

@ -0,0 +1,26 @@
variable "lambda_name" {
type = string
description = "Logical name of the lambda (e.g. address2uprn)"
}
variable "stage" {
description = "Deployment stage (e.g. dev, prod)"
type = string
}
variable "ecr_repo_url" {
type = string
description = "ECR repository URL (no tag, no digest)"
}
variable "image_digest" {
type = string
description = "Image digest (sha256:...)"
}
locals {
image_uri = "${var.ecr_repo_url}@${var.image_digest}"
}
output "resolved_image_uri" {
value = local.image_uri
}

View file

@ -84,7 +84,7 @@ resource "aws_db_instance" "default" {
# Temporary to enfore immediate change # Temporary to enfore immediate change
apply_immediately = true apply_immediately = true
# Set up storage type to gp3 for better performance # Set up storage type to gp3 for better performance
storage_type = "gp3" storage_type = "gp3"
} }
# Set up the bucket that recieve the csv uploads of epc to be retrofit # Set up the bucket that recieve the csv uploads of epc to be retrofit
@ -298,10 +298,6 @@ module "address2uprn_state_bucket" {
} }
output "address2uprn_state_bucket_name" {
value = module.address2uprn_state_bucket.bucket_name
}
module "address2uprn_registry" { module "address2uprn_registry" {
source = "../modules/container_registry" source = "../modules/container_registry"
name = "address2uprn" name = "address2uprn"
@ -309,6 +305,62 @@ module "address2uprn_registry" {
} }
output "address2uprn_repository_url" { ################################################
value = module.address2uprn_registry.repository_url # Condition ETL Lambda ECR
################################################
module "condition_etl_state_bucket" {
source = "../modules/tf_state_bucket"
bucket_name = "condition-etl-terraform-state"
}
module "condition_etl_registry" {
source = "../modules/container_registry"
name = "condition-etl"
stage = var.stage
}
################################################
# Postcode Splitter Lambda ECR
################################################
module "postcode_splitter_state_bucket" {
source = "../modules/tf_state_bucket"
bucket_name = "postcode-splitter-terraform-state"
}
module "postcode_splitter_registry" {
source = "../modules/container_registry"
name = "postcode_splitter"
stage = var.stage
}
################################################
# Conidition data S3 bucket
################################################
module "condition_data_bucket" {
source = "../modules/s3"
bucketname = "condition-data-${var.stage}"
allowed_origins = var.allowed_origins
}
resource "aws_iam_policy" "condition_etl_s3_read" {
name = "ConditionETLReadS3"
description = "Allow Lambda to read objects from condition-data-${var.stage}"
policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Effect = "Allow"
Action = ["s3:GetObject"]
Resource = "arn:aws:s3:::condition-data-${var.stage}/*"
}
]
})
}
output "condition_etl_s3_read_arn" {
value = aws_iam_policy.condition_etl_s3_read.arn
} }

View file

@ -1,4 +1,4 @@
[pytest] [pytest]
pythonpath = . pythonpath = .
addopts = --cov-report term-missing --cov=etl/epc --cov=recommendations --cov=backend --cov=etl/epc_clean --cov=etl/spatial addopts = --cov-report term-missing --cov=etl/epc --cov=recommendations --cov=backend --cov=etl/epc_clean --cov=etl/spatial
testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests backend/address2UPRN/tests testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests backend/address2UPRN/tests backend/onboarders/tests

View file

@ -1090,6 +1090,7 @@ class Recommendations:
ashp_cop = ashp_cop if ashp_cop else assumptions.AVERAGE_ASHP_EFFICIENCY ashp_cop = ashp_cop if ashp_cop else assumptions.AVERAGE_ASHP_EFFICIENCY
#
kwh_impact_table = kwh_simulation_predictions["heating_kwh_predictions"][ kwh_impact_table = kwh_simulation_predictions["heating_kwh_predictions"][
kwh_simulation_predictions["heating_kwh_predictions"]["property_id"] == str(property_instance.id) kwh_simulation_predictions["heating_kwh_predictions"]["property_id"] == str(property_instance.id)
].merge( ].merge(

View file

@ -331,18 +331,18 @@ class RoofRecommendations:
""" """
# Can a non-primary part satisfy loft insulation? # Can a non-primary part satisfy loft insulation?
primary_needs_loft = component_needs[1]["needs_loft_insulation"] primary_needs_loft = component_needs[0]["needs_loft_insulation"]
secondary_needs_loft = any( secondary_needs_loft = any(
p['needs_loft_insulation'] for idx, p in component_needs.items() if idx != 1 p['needs_loft_insulation'] for idx, p in component_needs.items() if idx != 0
) )
if primary_needs_loft and not secondary_needs_loft: if primary_needs_loft and not secondary_needs_loft:
# Only option is loft # Only option is loft
return "loft" return "loft"
primary_needs_sloping = component_needs[1]["needs_sloping_ceiling"] primary_needs_sloping = component_needs[0]["needs_sloping_ceiling"]
secondary_needs_sloping = any( secondary_needs_sloping = any(
p['needs_sloping_ceiling'] for idx, p in component_needs.items() if idx != 1 p['needs_sloping_ceiling'] for idx, p in component_needs.items() if idx != 0
) )
if primary_needs_sloping and not secondary_needs_sloping: if primary_needs_sloping and not secondary_needs_sloping:
@ -418,11 +418,13 @@ class RoofRecommendations:
return needs_sloping, not needs_loft # Indicates that the property needs sloping ceiling as we only run return needs_sloping, not needs_loft # Indicates that the property needs sloping ceiling as we only run
# this in that case # this in that case
roof_components = [x for x in find_my_epc_components if x["component_name"] == "Roof"]
extracted_roof_descriptions = { extracted_roof_descriptions = {
idx: { idx: {
"description": component["description"], "description": component["description"],
**RoofAttributes(component["description"]).process() **RoofAttributes(component["description"]).process()
} for idx, component in enumerate(find_my_epc_components) if component["component_name"] == "Roof" } for idx, component in enumerate(roof_components)
} }
component_needs = {} component_needs = {}

View file

@ -2,6 +2,10 @@
This script prepares the data for the financial model This script prepares the data for the financial model
""" """
from dotenv import load_dotenv
load_dotenv(".env.local")
import pandas as pd import pandas as pd
import numpy as np import numpy as np
from backend.app.utils import sap_to_epc from backend.app.utils import sap_to_epc
@ -24,12 +28,12 @@ from sqlalchemy import func
# PORTFOLIO_ID = 206 # PORTFOLIO_ID = 206
# SCENARIOS = [389] # SCENARIOS = [389]
PORTFOLIO_ID = 502 # Peabody PORTFOLIO_ID = 524
SCENARIOS = [ SCENARIOS = [
986, 1009,
] ]
scenario_names = { scenario_names = {
986: "EPC C", 1009: "EPC C; Most Economic",
} }

View file

@ -264,6 +264,7 @@ def save_excel_to_s3(df, bucket_name, file_key):
def read_csv_from_s3(bucket_name, filepath): def read_csv_from_s3(bucket_name, filepath):
logger.info(f"Reading CSV file from S3 bucket '{bucket_name}' with key '{filepath}'")
s3 = boto3.client('s3') s3 = boto3.client('s3')
# Get the object from s3 # Get the object from s3