Merge branch 'main' into anchor-sal

This commit is contained in:
Daniel Roth 2026-02-10 14:48:47 +00:00
commit 8fb58ebe56
98 changed files with 3600 additions and 351 deletions

View file

@ -27,8 +27,9 @@ RUN useradd -m -s /usr/bin/bash ${USER} \
# # 4) Python deps - if you want to run assest list
ENV PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1
ADD asset_list/requirements.txt requirements.txt
RUN pip install -r requirements.txt
ADD .devcontainer/asset_list/requirements.txt requirements2.txt
ADD asset_list/requirements.txt requirements1.txt
RUN cat requirements1.txt requirements2.txt >> requirements.txt
RUN pip install -r requirements.txt
# 5) Workdir

View file

@ -15,10 +15,9 @@ uvicorn[standard]
pytest==9.0.2
pytest-cov==7.0.0
ipykernel>=6.25,<7
pydantic-settings<2
pyyaml>=6.0.1
pydantic>=1.10.7,<2
sqlmodel
# Formatting
black==26.1.0
dotenv
pydantic-settings

View file

@ -13,6 +13,9 @@ on:
required: false
default: "."
type: string
build_args:
required: false
type: string
outputs:
image_digest:
@ -29,11 +32,22 @@ on:
required: true
AWS_REGION:
required: true
DEV_DB_HOST:
required: false
DEV_DB_PORT:
required: false
DEV_DB_NAME:
required: false
jobs:
build:
runs-on: ubuntu-latest
env:
DEV_DB_HOST: ${{ secrets.DEV_DB_HOST }}
DEV_DB_PORT: ${{ secrets.DEV_DB_PORT }}
DEV_DB_NAME: ${{ secrets.DEV_DB_NAME }}
outputs:
image_digest: ${{ steps.digest.outputs.image_digest }}
ecr_repo_url: ${{ steps.repo.outputs.ecr_repo_url }}
@ -64,7 +78,22 @@ jobs:
- name: Build & push image
run: |
IMAGE_URI="${{ steps.repo.outputs.ecr_repo_url }}:${GITHUB_SHA}"
docker build -f ${{ inputs.dockerfile_path }} -t $IMAGE_URI ${{ inputs.build_context }}
# Writes build args and removes line breaks
BUILD_ARGS=""
while IFS= read -r line; do
# skip empty lines
[ -n "$line" ] || continue
temp=$(eval echo "$line")
BUILD_ARGS="$BUILD_ARGS --build-arg $temp"
done <<< "${{ inputs.build_args }}"
docker build \
-f ${{ inputs.dockerfile_path }} \
$BUILD_ARGS \
-t $IMAGE_URI \
${{ inputs.build_context }}
docker push $IMAGE_URI
- name: Resolve image digest

View file

@ -16,6 +16,7 @@ jobs:
id: set-stage
shell: bash
run: |
env
BRANCH="${GITHUB_REF_NAME}"
if [[ "$BRANCH" == "prod" ]]; then
@ -73,8 +74,8 @@ jobs:
uses: ./.github/workflows/_build_image.yml
with:
ecr_repo: address2uprn-${{ needs.determine_stage.outputs.stage }}
dockerfile_path: backend/address2UPRN/Dockerfile
build_context: backend/address2UPRN
dockerfile_path: backend/address2UPRN/handler/Dockerfile
build_context: .
secrets:
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
@ -96,3 +97,76 @@ jobs:
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
# ============================================================
# 2⃣ Build Postcode Splitter image and Push
# ============================================================
postcodeSplitter_image:
needs: [determine_stage, shared_terraform]
uses: ./.github/workflows/_build_image.yml
with:
ecr_repo: postcode_splitter-${{ needs.determine_stage.outputs.stage }}
dockerfile_path: backend/postcode_splitter/handler/Dockerfile
build_context: .
secrets:
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
# ============================================================
# 3⃣ Deploy Postcode Splitter Lambda
# ============================================================
postcodeSplitter_lambda:
needs: [postcodeSplitter_image, determine_stage]
uses: ./.github/workflows/_deploy_lambda.yml
with:
lambda_name: postcodeSplitter
lambda_path: infrastructure/terraform/lambda/postcodeSplitter
stage: ${{ needs.determine_stage.outputs.stage }}
ecr_repo: postcode_splitter-${{ needs.determine_stage.outputs.stage }}
image_digest: ${{ needs.postcodeSplitter_image.outputs.image_digest }}
secrets:
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
# ============================================================
# Condition ETL image and Push
# ============================================================
condition_etl_image:
needs: [determine_stage, shared_terraform]
uses: ./.github/workflows/_build_image.yml
with:
ecr_repo: condition-etl-${{ needs.determine_stage.outputs.stage }}
dockerfile_path: backend/condition/handler/Dockerfile
build_context: .
build_args: |
DEV_DB_HOST=$DEV_DB_HOST
DEV_DB_PORT=$DEV_DB_PORT
DEV_DB_NAME=$DEV_DB_NAME
secrets:
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
DEV_DB_HOST: ${{ secrets.DEV_DB_HOST }}
DEV_DB_PORT: ${{ secrets.DEV_DB_PORT }}
DEV_DB_NAME: ${{ secrets.DEV_DB_NAME }}
# ============================================================
# Deploy Condition ETL Lambda
# ============================================================
condition_etl_lambda:
needs: [condition_etl_image, determine_stage]
uses: ./.github/workflows/_deploy_lambda.yml
with:
lambda_name: condition-etl
lambda_path: infrastructure/terraform/lambda/condition-etl
stage: ${{ needs.determine_stage.outputs.stage }}
ecr_repo: condition-etl-${{ needs.determine_stage.outputs.stage }}
image_digest: ${{ needs.condition_etl_image.outputs.image_digest }}
secrets:
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}

View file

@ -69,14 +69,51 @@ def app():
Property UPRN
"""
data_folder = "/workspaces/home/Downloads"
data_filename = "Anchor 1.xlsx"
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Aspire"
data_filename = "ASPIRE ASSET LIST.xlsx"
sheet_name = "Asset List"
postcode_column = "Postcode"
address1_column = None
address1_method = "house_number_extraction"
fulladdress_column = "Address"
address_cols_to_concat = []
missing_postcodes_method = None
landlord_year_built = None
landlord_os_uprn = None
landlord_property_type = "Property Type"
landlord_built_form = None
landlord_wall_construction = None
landlord_roof_construction = None
landlord_heating_system = None
landlord_existing_pv = None
landlord_property_id = "LLUPRN"
landlord_sap = None
outcomes_filename = None
outcomes_sheetname = None
outcomes_postcode = None
outcomes_houseno = None
outcomes_id = None
outcomes_address = None
master_filepaths = []
master_id_colnames = []
master_to_asset_list_filepath = None
phase = False
ecosurv_landlords = None
asset_list_header = 0
landlord_block_reference = None
# Peabody data for cleaning
data_folder = (
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
"Project/data_validation"
)
data_filename = "to_standardise_uprns.xlsx"
sheet_name = "Sheet1"
postcode_column = "Postcode"
address1_column = "House Number"
address1_method = None
fulladdress_column = None
address_cols_to_concat = ["House Number", "Address Line 1", "Address Line 2"]
address1_column = None
address1_method = "house_number_extraction"
fulladdress_column = "Address"
address_cols_to_concat = None
missing_postcodes_method = None
landlord_year_built = None
landlord_os_uprn = None

View file

@ -520,4 +520,14 @@ BUILT_FORM_MAPPINGS = {
'2.EXT.WALL FLAT': 'mid-terrace',
'2 EXT. WALL FLAT': 'mid-terrace',
'Maisonette: Detached: Ground Floor': 'detached',
'Maisonette: Enclosed End Terrace: Top Floor': 'enclosed end-terrace',
'Flat: End Terrace: Basement': 'end-terrace',
'Flat: Mid Terrace: Basement': 'mid-terrace',
'Flat: Enclosed Mid Terrace: Basement': 'enclosed mid-terrace',
'House: Semi Detached: Top Floor': 'semi-detached',
'House: End Terrace: Ground Floor': 'end-terrace',
'Maisonette: Enclosed End Terrace: Mid Floor': 'enclosed end-terrace',
'Bungalow: EnclosedEndTerrace': 'enclosed end-terrace'
}

View file

@ -17,5 +17,10 @@ EXISTING_PV_MAPPINGS = {
'PV: 10% roof area, PV: 2kWp array': 'already has PV',
'PV: 50% roof area': 'already has PV',
'Solar PV': 'already has PV',
'SOLAR PV': 'already has PV'
'SOLAR PV': 'already has PV',
'PV: 40% roof area, PV: 2kWp array': 'already has PV',
'PV: 33% roof area, PV: 2kWp array': 'already has PV',
'PV: 30% roof area': 'already has PV'
}

View file

@ -494,6 +494,10 @@ HEATING_MAPPINGS = {
'Gas (including LPG) room heaters: Gas fire, open flue, 1980 or later (open fronted), sitting proud of, '
'and sealed to, fireplace opening': 'room heaters',
'Boiler: A rated Regular Boiler, System 2: Boiler: C rated Regular Boiler': 'boiler - other fuel',
'Boiler: G rated Combi': 'gas condensing combi'
'Boiler: G rated Combi': 'gas condensing combi',
'Boiler: A rated Combi, System 2: Boiler: A rated Combi': 'gas combi boiler',
'System 2: Boiler: A rated Regular Boiler, Boiler: A rated Regular Boiler': 'gas boiler, radiators',
'Boiler: A rated Combi, System 2: Boiler: C rated Combi': 'gas combi boiler'
}

View file

@ -427,6 +427,23 @@ PROPERTY_MAPPING = {
'End Terrace': 'unknown',
'Detached': 'unknown',
'Mid-terrace': 'unknown',
'MID - TERRACE': 'unknown'
'MID - TERRACE': 'unknown',
'COMOFF': 'unknown',
'LOTS': 'unknown',
'Maisonette: Detached: Ground Floor': 'maisonette',
'Maisonette: Enclosed End Terrace: Top Floor': 'maisonette',
'Flat: End Terrace: Basement': 'flat',
'Bungalow: EnclosedEndTerrace': 'bungalow',
'Flat: Mid Terrace: Basement': 'flat',
'House: Semi Detached: Top Floor': 'house',
'House: End Terrace: Ground Floor': 'house',
'Maisonette: Enclosed End Terrace: Mid Floor': 'maisonette',
'Flat: Enclosed Mid Terrace: Basement': 'flat',
'Warden Bungalow': 'bungalow',
'Warden Flat': 'flat',
'Upper Floor Flat': 'flat',
'Extracare Scheme': 'other'
}

View file

@ -301,4 +301,13 @@ ROOF_CONSTRUCTION_MAPPINGS = {
'PitchedWithSlopingCeiling: As Built': 'pitched insulated',
'PitchedNormalLoftAccess: As Built': 'pitched unknown insulation',
'Flat: 150mm, Flat: Unknown': 'flat insulated',
'AnotherDwellingAbove: Unknown, Flat: Unknown': 'another dwelling above',
'AnotherDwellingAbove, AnotherDwellingAbove: Unknown': 'another dwelling above',
'PitchedNormalNoLoftAccess: Unknown, PitchedWithSlopingCeiling: As Built': 'pitched unknown access to loft',
'Flat: No Insulation': 'flat uninsulated',
'AnotherDwellingAbove: Unknown, PitchedNormalLoftAccess: 250mm': 'another dwelling above',
'PitchedNormalLoftAccess: 175mm': 'pitched insulated',
'AnotherDwellingAbove: 300mm': 'another dwelling above'
}

View file

@ -354,6 +354,15 @@ WALL_CONSTRUCTION_MAPPINGS = {
'System built Internal': 'insulated system built',
'Cavity: AsBuilt (1976-1982), TimberFrame: AsBuilt': 'cavity unknown insulation',
'Cavity: FilledCavityPlusExternal': 'filled cavity'
'Cavity: FilledCavityPlusExternal': 'filled cavity',
'Cavity, Filled Cavity': 'filled cavity',
'Solid Brick, As Built': 'solid brick unknown insulation',
'Cavity, As Built': 'cavity unknown insulation',
'Sandstone, As Built': 'sandstone or limestone unknown insulation',
'Timber Frame, As Built': 'timber frame unknown insulation',
'Solid Brick, Internal Insulation': 'insulated solid brick',
'Granite or Whinstone, As Built': 'granite or whinstone unknown insulation',
'Solid Brick, External': 'insulated solid brick'
}

View file

@ -1,22 +0,0 @@
DB_HOST=db
DB_PORT=5432
DB_NAME=tech_team_local_db
DB_USERNAME=postgres
DB_PASSWORD=makingwarmerhomes
#not used
GOOGLE_SOLAR_API_KEY="test"
SAP_PREDICTIONS_BUCKET="test"
CARBON_PREDICTIONS_BUCKET="test"
HEAT_PREDICTIONS_BUCKET="test"
HEATING_KWH_PREDICTIONS_BUCKET="test"
HOTWATER_KWH_PREDICTIONS_BUCKET="test"
API_KEY="test"
ENVIRONMENT="test"
SECRET_KEY="test"
PLAN_TRIGGER_BUCKET="test"
DATA_BUCKET="test"
EPC_AUTH_TOKEN="test"
ENGINE_SQS_URL="test"
ENERGY_ASSESSMENTS_BUCKET="test"

22
backend/.env.test Normal file
View file

@ -0,0 +1,22 @@
DB_HOST=db
DB_PORT=5432
DB_NAME=tech_team_local_db
DB_USERNAME=postgres
DB_PASSWORD=makingwarmerhomes
#not used
GOOGLE_SOLAR_API_KEY=test
SAP_PREDICTIONS_BUCKET=test
CARBON_PREDICTIONS_BUCKET=test
HEAT_PREDICTIONS_BUCKET=test
HEATING_KWH_PREDICTIONS_BUCKET=test
HOTWATER_KWH_PREDICTIONS_BUCKET=test
API_KEY=test
ENVIRONMENT=test
SECRET_KEY=test
PLAN_TRIGGER_BUCKET=test
DATA_BUCKET=test
EPC_AUTH_TOKEN=test
ENGINE_SQS_URL=test
ENERGY_ASSESSMENTS_BUCKET=test

View file

@ -1256,7 +1256,8 @@ class Property:
"biodiesel": "Smokeless Fuel",
"b30d": "B30K Biofuel",
"coal": "Coal",
"oil": "Oil"
"oil": "Oil",
"unknown": None # Handle - anything post 2020 is electricity else gas
}
self.heating_energy_source = list({
@ -1326,7 +1327,16 @@ class Property:
if self.heating_energy_source == "Varied (Community Scheme)":
if self.main_fuel["fuel_type"] in fuel_map: # We assume when None as it's unknown
self.heating_energy_source = fuel_map[self.main_fuel["fuel_type"]]
mapped_to = fuel_map[self.main_fuel["fuel_type"]]
if mapped_to is None and self.main_fuel["fuel_type"] == "unknown":
# Handle logic based on age band
if self.year_built >= 2020:
self.heating_energy_source = "Electricity"
else:
self.heating_energy_source = "Natural Gas (Community Scheme)"
else:
self.heating_energy_source = mapped_to
else:
raise NotImplementedError(f"Unhandled fuel {self.main_fuel['fuel_type']}")

View file

@ -1,7 +0,0 @@
FROM public.ecr.aws/lambda/python:3.10
# Copy function code
COPY main.py .
# Set the handler
CMD ["main.handler"]

View file

@ -0,0 +1,23 @@
FROM public.ecr.aws/lambda/python:3.10
# Set working directory (Lambda task root)
WORKDIR /var/task
# -----------------------------
# Copy requirements FIRST (for Docker layer caching)
# -----------------------------
COPY backend/address2UPRN/handler/requirements.txt .
# Install dependencies into Lambda runtime
RUN pip install --no-cache-dir -r requirements.txt
# -----------------------------
# Copy application code
# -----------------------------
COPY utils/ utils/
COPY backend/address2UPRN/main.py .
# -----------------------------
# Lambda handler
# -----------------------------
CMD ["main.handler"]

View file

@ -0,0 +1,3 @@
epc-api-python==1.0.2
tqdm
pandas

View file

@ -212,6 +212,8 @@ def get_epc_data_with_postcode(postcode, size=500, attempt=1, max_attempts=3):
method="get",
params={"postcode": postcode},
)
if not search_resp or "rows" not in search_resp:
return pd.DataFrame()
results_df = pd.DataFrame(search_resp["rows"], columns=search_resp["column-names"])
@ -298,7 +300,7 @@ def get_uprn_candidates(
)
def get_uprn(user_inputed_address: str, postcode: str):
def get_uprn(user_inputed_address: str, postcode: str, return_address=False):
"""
Return uprn (str)
Return False if failed to find a sensible matching epc
@ -337,6 +339,8 @@ def get_uprn(user_inputed_address: str, postcode: str):
if found_uprn == "":
return None
if return_address:
return found_uprn, address
return found_uprn

View file

@ -1,17 +1,24 @@
import pandas as pd
from tqdm import tqdm
from backend.address2UPRN.main import get_uprn
# Enable tqdm for pandas
tqdm.pandas()
df = pd.read_excel("address2.xlsx")
# use Address 1
junte_df = pd.read_excel("hackney_uprn_failures.xlsx")
def extract_uprn(row):
print(row["User Input"], row["Postcode"])
result = get_uprn(row["User Input"], row["Postcode"], return_address=True)
if result is None:
return pd.Series([None, None])
uprn, found_address = result
return pd.Series([uprn, found_address])
# use domna_address_1
khalim_df = pd.read_excel("khalim_standard.xlsx")
combined_df = junte_df.merge(khalim_df, how="left", left_on="Address 1", right_on='domna_address_1')
# Find the row in khalim_df that does not app
result = combined_df[~pd.isnull(combined_df["epc_os_uprn"])]
df[["juntes uprn", "junte found address"]] = df.progress_apply(extract_uprn, axis=1)
df.to_excel("outputs2.xlsx", index=False)

View file

@ -1,8 +1,22 @@
import os
from functools import lru_cache
from pydantic_settings import BaseSettings
from pydantic_settings import BaseSettings, SettingsConfigDict
from typing import Optional
def resolve_env_file() -> Optional[str]:
env = os.getenv("ENVIRONMENT", "local")
if env == "local":
return "backend/.env"
if env == "test":
return "backend/.env.test"
# prod = no env file
return None
class Settings(BaseSettings):
API_KEY: str
API_KEY_NAME: str = "X-API-KEY"
@ -41,8 +55,10 @@ class Settings(BaseSettings):
AWS_SECRET_KEY_ID: Optional[str] = None
AWS_DEFAULT_REGION: Optional[str] = None
class Config:
env_file = "backend/.env.local"
model_config = SettingsConfigDict(
env_file=resolve_env_file(),
env_file_encoding="utf-8",
)
@lru_cache()

View file

@ -24,7 +24,7 @@ def get_cleaned():
cleaned = read_from_s3(
s3_file_name="cleaned_epc_data/cleaned.bson",
bucket_name="retrofit-data-{environment}".format(environment=get_settings().ENVIRONMENT)
bucket_name=get_settings().DATA_BUCKET
)
cleaned = msgpack.unpackb(cleaned, raw=False)

View file

@ -0,0 +1,33 @@
from enum import Enum
from typing import Optional
from pydantic import BaseModel
class ConditionFileType(Enum):
LBWF = "LBWF"
Peabody = "Peabody"
# TODO: make these asset management systems rather than client names
class ConditionTriggerRequest(BaseModel):
file_type: ConditionFileType
trigger_file_bucket: str # TODO: get this from settings
trigger_file_key: str
uprn_lookup_file_bucket: Optional[str] = None # TODO: get this from settings
uprn_lookup_file_key: Optional[str] = None
# {
# "file_type": "Peabody",
# "trigger_file_bucket": "condition-data-dev",
# "trigger_file_key": "input/peabody/2026_01_06 - Peabody - Stock Condition Data - Survey Records - D Lower.xlsx",
# "uprn_lookup_file_bucket": "condition-data-dev",
# "uprn_lookup_file_key": "input/peabody/uprn-lookup/PeabodyPropertymatched_Dec25_propref_UPRN.csv"
# }
# {
# "file_type": "LBWF",
# "trigger_file_bucket": "condition-data-dev",
# "trigger_file_key": "input/lbwf/LBWF - Example Asset Data September 2025.xlsx",
# }

View file

@ -1,4 +1,4 @@
from typing import Any, Dict, List, Optional, Tuple
from typing import Any, Dict, Optional, Tuple
from datetime import date
from backend.condition.domain.aspect_condition import AspectCondition

View file

@ -1,16 +0,0 @@
from enum import Enum
class FileType(Enum):
LBWF = "lbwf"
Peabody = "peabody"
def detect_file_type(filepath: str) -> FileType:
path = filepath.lower()
if "lbwf" in path:
return FileType.LBWF
if "peabody" in path:
return FileType.Peabody
raise ValueError("Unrecognised file path")

View file

@ -1,16 +0,0 @@
from typing import Mapping, Any
from io import BytesIO
from utils.logger import setup_logger
from backend.condition.processor import process_file
logger = setup_logger()
def handler(event: Mapping[str, Any], context: Any) -> None:
# Temporary stub for PoC wiring
dummy_stream = BytesIO(b"")
source_key = event.get("source_key", "unknown-source")
process_file(dummy_stream, source_key)

View file

@ -0,0 +1,48 @@
FROM public.ecr.aws/lambda/python:3.11
# For local running:
# FROM python:3.11.10-bullseye
ARG DEV_DB_HOST
ARG DEV_DB_PORT
ARG DEV_DB_NAME
# Set working directory (Lambda task root)
WORKDIR /var/task
# Environment
ENV DB_HOST=${DEV_DB_HOST}
ENV DB_PORT=${DEV_DB_PORT}
ENV DB_NAME=${DEV_DB_NAME}
COPY backend/.env.test backend/.env
# -----------------------------
# Copy requirements FIRST (for Docker layer caching)
# -----------------------------
COPY backend/condition/handler/requirements.txt .
# Install dependencies into Lambda runtime
RUN pip install --no-cache-dir -r requirements.txt
# -----------------------------
# Copy application code
# -----------------------------
COPY utils/ utils/
COPY backend/condition/ backend/condition/
COPY backend/app/db/models/condition.py backend/app/db/models/condition.py
COPY backend/app/db/connection.py backend/app/db/connection.py
COPY backend/app/config.py backend/app/config.py
COPY backend/__init__.py backend/__init__.py
COPY backend/app/__init__.py backend/app/__init__.py
COPY backend/app/db/__init__.py backend/app/db/__init__.py
# -----------------------------
# Lambda handler
# -----------------------------
CMD ["backend/condition/handler/handler.handler"]
# For local running
# CMD ["python", "-m", "backend.condition.handler.handler"]

View file

@ -0,0 +1,51 @@
import json
from typing import Mapping, Any
from io import BytesIO
from backend.condition.condition_trigger_request import ConditionTriggerRequest
from backend.condition.lookups.uprn_lookup_s3 import UprnLookupS3
from backend.condition.processor import process_file
from utils.logger import setup_logger
from utils.s3 import read_io_from_s3
logger = setup_logger()
def handler(event: Mapping[str, Any], context: Any) -> None:
for record in event.get("Records", []):
try:
body_dict = json.loads(record["body"])
logger.debug("Validating request body")
payload = ConditionTriggerRequest.model_validate(body_dict)
logger.debug("Successfully validated request body")
if payload.uprn_lookup_file_bucket and payload.uprn_lookup_file_key:
logger.debug("Getting UPRN lookup file from s3")
uprn_lookup = UprnLookupS3(
bucket=payload.uprn_lookup_file_bucket,
key=payload.uprn_lookup_file_key,
) # TODO: replace with postgres implementation
logger.debug("Successfully got UPRN lookup file from s3")
else:
uprn_lookup = None
logger.debug("Getting conditions data from s3")
file_bytes: BytesIO = read_io_from_s3(
bucket_name=payload.trigger_file_bucket,
file_key=payload.trigger_file_key,
)
logger.debug(
"Successfully got conditions data from s3. Moving on to process file..."
)
process_file(
file_stream=file_bytes,
file_type=payload.file_type,
uprn_lookup=uprn_lookup,
)
except Exception as e:
logger.error(f"Failed to process record: {e}")

View file

@ -0,0 +1,9 @@
openpyxl
sqlmodel
pydantic-settings
psycopg2-binary==2.9.10
# pandas isn't used, but needed for importing from utils.s3
pandas==2.2.2
numpy==1.26.4
openpyxl

View file

@ -1,5 +1,7 @@
from pathlib import Path
from backend.condition.condition_trigger_request import ConditionFileType
from backend.condition.lookups.uprn_lookup_csv import UprnLookupLocal
from backend.condition.processor import process_file
@ -20,15 +22,27 @@ def main() -> None:
/ "peabody"
/ "2026_01_06 - Peabody - Stock Condition Data - Survey Records - D Lower.xlsx"
)
filepaths = [lbwf_path, peabody_path]
# filepaths = [lbwf_path]
peabody_uprn_lookup_path: Path = (
path / "peabody" / "PeabodyPropertymatched_Dec25_propref_UPRN.csv"
)
# filepaths = [lbwf_path, peabody_path]
filepaths = [lbwf_path]
# filepaths = [peabody_path]
uprn_lookup = UprnLookupLocal(csv_path=peabody_uprn_lookup_path.as_posix())
def get_file_type(file_path: str) -> ConditionFileType:
if "peabody" in file_path:
return ConditionFileType.Peabody
if "lbwf" in file_path:
return ConditionFileType.LBWF
for fp in filepaths:
with fp.open("rb") as f:
process_file(
file_stream=f,
source_key=fp.as_posix(),
file_type=get_file_type(fp.as_posix()),
uprn_lookup=uprn_lookup,
)

View file

@ -0,0 +1,8 @@
from abc import ABC, abstractmethod
from typing import BinaryIO, Dict
class UprnLookup(ABC):
@abstractmethod
def get_property_ref_to_uprn_lookup(self) -> Dict[str, int]:
pass

View file

@ -0,0 +1,23 @@
import csv
from io import TextIOWrapper
from typing import BinaryIO, Dict, TextIO
from backend.condition.lookups.uprn_lookup import UprnLookup
class UprnLookupLocal(UprnLookup):
def __init__(self, csv_path: str):
self.csv_path = csv_path
def get_property_ref_to_uprn_lookup(self) -> Dict[str, int]:
with open(self.csv_path, "rb") as f:
return self.parse_csv(f)
def parse_csv(self, file_stream: BinaryIO) -> Dict[str, int]:
text_stream: TextIO = TextIOWrapper(file_stream, encoding="utf-8")
mapping: Dict[str, int] = {}
reader = csv.DictReader(text_stream)
for row in reader:
if not row["reference"] or not row["out_uprn"]:
continue
mapping[row["reference"].strip()] = int(row["out_uprn"].strip())
return mapping

View file

@ -0,0 +1,29 @@
import csv
from io import BytesIO, TextIOWrapper
from typing import BinaryIO, Dict, TextIO
from backend.condition.lookups.uprn_lookup import UprnLookup
from utils.s3 import read_io_from_s3
class UprnLookupS3(UprnLookup):
def __init__(self, bucket: str = "", key: str = ""):
self.bucket = bucket
self.key = key
def get_property_ref_to_uprn_lookup(self) -> Dict[str, int]:
file_bytes: BytesIO = read_io_from_s3(
bucket_name=self.bucket, file_key=self.key
)
return self._parse_csv_bytes(file_bytes)
def _parse_csv_bytes(self, file_stream: BinaryIO) -> Dict[str, int]:
text_stream: TextIO = TextIOWrapper(file_stream, encoding="utf-8")
mapping: Dict[str, int] = {}
reader = csv.DictReader(text_stream)
for row in reader:
if not row["reference"] or not row["out_uprn"]:
continue
mapping[row["reference"].strip()] = int(row["out_uprn"].strip())
return mapping

View file

@ -1,27 +1,35 @@
from typing import Optional
from backend.condition.condition_trigger_request import ConditionFileType
from backend.condition.domain.mapping.lbwf.lbwf_mapper import LbwfMapper
from backend.condition.domain.mapping.mapper import Mapper
from backend.condition.domain.mapping.peabody.peabody_mapper import PeabodyMapper
from backend.condition.file_type import FileType
from backend.condition.lookups.uprn_lookup import UprnLookup
from backend.condition.parsing.parser import Parser
from backend.condition.parsing.lbwf_parser import LbwfParser
from backend.condition.parsing.peabody_parser import PeabodyParser
def select_parser(file_type: FileType) -> Parser:
if file_type is FileType.LBWF:
def select_parser(
file_type: ConditionFileType, uprn_lookup: Optional[UprnLookup] = None
) -> Parser:
if file_type is ConditionFileType.LBWF:
return LbwfParser()
if file_type is FileType.Peabody:
return PeabodyParser()
if file_type is ConditionFileType.Peabody:
if not uprn_lookup:
raise ValueError(
"Cannot instantiate Peabody Parser without UPRN lookup being provided"
)
return PeabodyParser(uprn_lookup=uprn_lookup)
raise ValueError("Unrecognised file type, unable to instantiate Parser")
def select_mapper(file_type: FileType) -> Mapper:
if file_type is FileType.LBWF:
def select_mapper(file_type: ConditionFileType) -> Mapper:
if file_type is ConditionFileType.LBWF:
return LbwfMapper()
if file_type is FileType.Peabody:
if file_type is ConditionFileType.Peabody:
return PeabodyMapper()
raise ValueError("Unrecognised file type, unable to instantiate Mapper")

View file

@ -18,7 +18,6 @@ class LbwfParser(Parser):
def parse(
self,
file_stream: BinaryIO,
location_ref_to_uprn_map: Optional[Dict[str, int]] = None,
) -> Any:
wb: Workbook = load_workbook(file_stream)
address_to_uprn_map: Dict[str, int] = LbwfParser._generate_address_to_uprn_dict(

View file

@ -8,6 +8,5 @@ class Parser(ABC):
def parse(
self,
file_stream: BinaryIO,
location_ref_to_uprn_map: Optional[Dict[str, int]] = None,
) -> Any:
pass

View file

@ -4,6 +4,7 @@ from typing import Any, BinaryIO, Dict, List, Optional, Tuple, DefaultDict
from openpyxl import Workbook, load_workbook
from collections import defaultdict
from backend.condition.lookups.uprn_lookup import UprnLookup
from backend.condition.parsing.parser import Parser
from backend.condition.parsing.records.peabody.peabody_asset_condition import (
PeabodyAssetCondition,
@ -15,42 +16,29 @@ logger = setup_logger()
class PeabodyParser(Parser):
def __init__(self, uprn_lookup: UprnLookup):
self.uprn_lookup: UprnLookup = uprn_lookup # TODO: move this to the ABC?
def parse(
self,
file_stream: BinaryIO,
location_ref_to_uprn_map: Optional[Dict[str, int]] = None,
) -> Any:
wb: Workbook = load_workbook(file_stream)
if location_ref_to_uprn_map is None:
location_ref_to_uprn_map: Dict[str, int] = (
PeabodyParser._build_location_ref_to_uprn_map()
)
file_stream.seek(0)
logger.debug("[PeabodyParser] Loading workbook...")
wb: Workbook = load_workbook(file_stream, read_only=True, data_only=True)
logger.debug("[PeabodyParser] Successfully loaded workbook. Parsing assets...")
assets = PeabodyParser._parse_assets(wb)
logger.debug(
"[PeabodyParser] Successfully parsed assets. Parsing UPRN lookup..."
)
location_ref_to_uprn_map = self.uprn_lookup.get_property_ref_to_uprn_lookup()
logger.debug("[PeabodyParser] Successfully parsed UPRN lookup")
return PeabodyParser._group_assets_into_properties(
assets=assets,
location_ref_to_uprn_map=location_ref_to_uprn_map,
)
@staticmethod
def _build_location_ref_to_uprn_map() -> Dict[str, int]:
location_ref_to_uprn_filepath: Path = (
Path(__file__).resolve().parents[1]
/ "sample_data"
/ "peabody"
/ "PeabodyPropertymatched_Dec25_propref_UPRN.csv"
)
location_ref_to_uprn_map: Dict[str, int] = {}
with location_ref_to_uprn_filepath.open(newline="") as f:
reader: Any = csv.DictReader(f)
for row in reader:
location_ref_to_uprn_map[row["reference"]] = int(row["out_uprn"])
return location_ref_to_uprn_map
@staticmethod
def _parse_assets(wb: Workbook) -> List[PeabodyAssetCondition]:
assets_sheet = wb["Survey Records - D & Lower"]
@ -67,7 +55,7 @@ class PeabodyParser(Parser):
)
if not asset.is_block_level:
# Block-level condition surveys are out of scope for now
# until we have a wider think on how to handle block
# until we have a wider think on how to handle blocks
assets.append(asset) # TODO: handle block-level assets
except Exception as e:
@ -92,13 +80,14 @@ class PeabodyParser(Parser):
assets_by_location_reference[asset.lo_reference].append(asset)
properties: List[PeabodyProperty] = []
failed_mappings_count = 0
for location_ref, grouped_assets in assets_by_location_reference.items():
uprn = location_ref_to_uprn_map.get(location_ref)
if uprn is None:
logger.warning(f"No UPRN found for Location Reference: {location_ref}")
failed_mappings_count += 1
continue
properties.append(
@ -108,6 +97,7 @@ class PeabodyParser(Parser):
)
)
logger.warning(f"No UPRN found for {failed_mappings_count} Location References")
return properties
@staticmethod

View file

@ -19,18 +19,19 @@ class ConditionPostgres:
def bulk_insert_surveys(
self, surveys: List[PropertyConditionSurvey], batch_size: Optional[int] = 100
) -> None:
logger.info(
f"Preparing to load {len(surveys)} property surveys to Postgres. Mapping to SQLModel objects..."
logger.debug(
f"[ConditionPostgres] Preparing to load {len(surveys)} property surveys to Postgres. Mapping to SQLModel objects..."
)
survey_models: List[PropertyConditionSurveyModel] = [
ConditionPostgres.map_survey_to_model(s) for s in surveys
]
total: int = len(survey_models)
logger.info(
f"Finished mapping {total} surveys. Writing to database in batches of {batch_size}..."
logger.debug(
f"[ConditionPostgres] Finished mapping {total} surveys. Writing to database in batches of {batch_size}..."
)
with db_session() as session:
logger.info("[ConditionPostgres] Successfully made connection to database")
for start in range(0, total, batch_size):
end = min(start + batch_size, total)
batch = survey_models[start:end]

View file

@ -1,26 +1,31 @@
from typing import Any, BinaryIO, List
from typing import Any, BinaryIO, List, Optional
from datetime import datetime
from backend.condition.condition_trigger_request import ConditionFileType
from backend.condition.lookups.uprn_lookup import UprnLookup
from utils.logger import setup_logger
from backend.condition.domain.mapping.mapper import Mapper
from backend.condition.domain.property_condition_survey import PropertyConditionSurvey
from backend.condition.parsing.parser import Parser
from backend.condition.persistence.condition_postgres import ConditionPostgres
from backend.condition.file_type import FileType, detect_file_type
from backend.condition.parsing.factory import select_parser, select_mapper
logger = setup_logger()
def process_file(file_stream: BinaryIO, source_key: str) -> None:
logger.info(f"[processor] Received file: {source_key}")
def process_file(
file_stream: BinaryIO,
file_type: ConditionFileType,
uprn_lookup: Optional[UprnLookup],
) -> None:
# Instantiation
file_type: FileType = detect_file_type(source_key)
parser: Parser = select_parser(file_type)
logger.debug(f"[processor] Instantiating classes...")
parser: Parser = select_parser(file_type, uprn_lookup)
mapper: Mapper = select_mapper(file_type)
persistence = ConditionPostgres()
logger.debug(f"[processor] Finished instantiating classes. Calling Parser...")
# Orchestration
raw_properties: List[Any] = parser.parse(file_stream)

View file

@ -0,0 +1,34 @@
import pytest
from typing import Dict
from tempfile import NamedTemporaryFile
from backend.condition.lookups.uprn_lookup_csv import UprnLookupLocal
@pytest.fixture
def prop_ref_uprn_csv_file() -> str:
csv_content = """reference,out_uprn
ABC123,10000000001
DEF456,10000000002
GHI789,10000000003
"""
with NamedTemporaryFile(mode="w+", delete=False, suffix=".csv") as tmp:
tmp.write(csv_content)
tmp.flush()
return tmp.name
def test_generate_prop_ref_uprn_from_csv_file(prop_ref_uprn_csv_file: str) -> None:
# arrange
uprn_lookup = UprnLookupLocal(prop_ref_uprn_csv_file)
expected_map: Dict[str, int] = {
"ABC123": 10000000001,
"DEF456": 10000000002,
"GHI789": 10000000003,
}
# act
actual_map: Dict[str, int] = uprn_lookup.get_property_ref_to_uprn_lookup()
# assert
assert actual_map == expected_map

View file

@ -1,11 +1,13 @@
import pytest
from backend.condition.condition_trigger_request import ConditionFileType
from backend.condition.lookups.uprn_lookup_csv import UprnLookupLocal
from backend.condition.parsing.factory import select_parser
from backend.condition.file_type import FileType
def test_selects_lbwf_parser():
# arrange
file_type = FileType.LBWF
file_type = ConditionFileType.LBWF
expected_class_name = "LbwfParser"
# act
@ -14,13 +16,15 @@ def test_selects_lbwf_parser():
# assert
assert expected_class_name == actual_class_name
def test_selects_peabody_parser():
# arrange
file_type = FileType.Peabody
file_type = ConditionFileType.Peabody
expected_class_name = "PeabodyParser"
uprn_lookup = UprnLookupLocal(csv_path="test")
# act
actual_class_name = select_parser(file_type).__class__.__name__
actual_class_name = select_parser(file_type, uprn_lookup).__class__.__name__
# assert
assert expected_class_name == actual_class_name
assert expected_class_name == actual_class_name

View file

@ -1,9 +1,11 @@
from tempfile import NamedTemporaryFile
import pytest
from typing import Any, Dict
from io import BytesIO
from openpyxl import Workbook
from datetime import datetime
from backend.condition.lookups.uprn_lookup_csv import UprnLookupLocal
from backend.condition.parsing.peabody_parser import PeabodyParser
from backend.condition.parsing.records.peabody.peabody_asset_condition import (
PeabodyAssetCondition,
@ -145,23 +147,28 @@ def peabody_assets_xlsx_bytes() -> BytesIO:
@pytest.fixture
def location_ref_to_uprn_map() -> Dict[str, int]:
return {
"B000RAND": 1,
"B000BLOCK": 2,
"B000FAKE": 3,
"B000MIS": 4,
}
def prop_ref_uprn_csv_file() -> str:
csv_content = """reference,out_uprn
B000RAND,1
B000BLOCK,2
B000FAKE,3
B000MIS,4
"""
with NamedTemporaryFile(mode="w+", delete=False, suffix=".csv") as tmp:
tmp.write(csv_content)
tmp.flush()
return tmp.name
def test_peabody_parser_parses_conditions(
peabody_assets_xlsx_bytes, location_ref_to_uprn_map
peabody_assets_xlsx_bytes, prop_ref_uprn_csv_file
):
# arrange
parser = PeabodyParser()
uprn_lookup = UprnLookupLocal(csv_path=prop_ref_uprn_csv_file)
parser = PeabodyParser(uprn_lookup=uprn_lookup)
# act
result: Any = parser.parse(peabody_assets_xlsx_bytes, location_ref_to_uprn_map)
result: Any = parser.parse(peabody_assets_xlsx_bytes)
# assert
assert len(result) == 3

View file

@ -1,22 +0,0 @@
import pytest
from backend.condition.file_type import FileType, detect_file_type
def test_detects_lbwf_file_type():
# arrange
file_path_str = "uploads/lbwf/Exaple Asset Data.xlsx"
expected_file_type = FileType.LBWF
# act
actual_file_type: FileType = detect_file_type(file_path_str)
# assert
assert expected_file_type == actual_file_type
def test_unknown_filepath_raises_value_error():
# arrange
file_path_str = "unknown/Example Asset Data.xlsx"
# act + assert
with pytest.raises(ValueError):
detect_file_type(file_path_str)

View file

@ -978,13 +978,15 @@ async def model_engine(body: PlanTriggerRequest):
recommendations_scoring_data.extend(p.recommendations_scoring_data)
logger.info("Preparing data for scoring in sap change api")
recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data).drop(
columns=[
"rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
"carbon_ending"
]
)
# Temp putting this here
recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data)
if not recommendations_scoring_data.empty:
recommendations_scoring_data = recommendations_scoring_data.drop(
columns=[
"rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending",
"carbon_ending"
]
)
# TODO: Temp putting this here
recommendations_scoring_data["is_post_sap10_ending"] = True
all_predictions = await model_api.async_paginated_predictions(

View file

@ -313,4 +313,15 @@ class ModelApi:
logger.error(f"Batch {chunk}-{chunk + batch_size} failed (Attempt {attempts}): {e}")
await asyncio.sleep(2 ** attempts) # exponential backoff
await self.close_aiohttp_session()
# Ensure stable output structure for the datagrame to be utilised by other functions downstream
for k in all_predictions.keys():
if all_predictions[k].empty:
col_template = ['id', 'predictions', 'property_id', 'recommendation_id', 'phase'] if (
extract_ids) else ['id', 'predictions']
all_predictions[k] = pd.DataFrame(
columns=col_template
)
return all_predictions

View file

@ -0,0 +1,102 @@
# Retrofit Property Data Onboarding
This repository contains an ETL pipeline for transforming raw retrofit property data from external source systems (
currently Parity) into a standardised internal format, compatible for both address2uprn and engine.
The pipeline is designed to:
- Run as an AWS Lambda triggered by SQS
- Read raw CSV/XLSX files from S3
- Perform rule-based mappings
- Infer as built property attributes, assumed based on age
- Output a processed csv, back to s3 to be consumed by address2uprn
### Structure
SQS → Lambda handler → OnboarderFactory → System-specific Onboarder → Mapping → CSV to S3
Each source system implements its own **Onboarder**, while sharing a common base and mapping process.
---
### Repository Structure
onboarders/
├── `handler.py` # Lambda entrypoint \
├── `factory.py` # Onboarder factory \
├── `base.py` # Shared onboarding base class \
├── `parity.py` # Parity-specific transformation logic \
├── `mappings/` \
│ └── `parity/` # Parity domain mappings & classifiers \
│ ├── `age_band.py` \
│ ├── `property_type.py` \
│ ├── `built_form.py` \
│ ├── `walls.py` \
│ ├── `roof.py` \
│ ├── `floor.py` \
│ ├── `glazing.py` \
│ ├── `heating.py` \
│ ├── `as_built_wall_classifiers.py` \
│ ├── `as_built_roof_classifiers.py` \
│ └── `as_built_floor_classifiers.py` \
├── `tests/` \
├── `requirements.txt` \
└── `README.md`
---
### Lambda Entry Point (`handler.py`)
The Lambda handler:
1. Consumes SQS queue
2. Validates the payload
3. Instantiates the correct onboarder via `OnboarderFactory`
4. Runs the transformation
5. Writes the transformed CSV back to S3
### Expected Event Payload
```json
{
"s3_uri": "s3://bucket/path/to/input.xlsx",
"system": "parity",
"format": "xlsx",
"sheet_name": "Sustainability"
}
```
### Onboarder Base `(base.py)`
OnboarderBase provides shared functionality across all systems.
*Responsibilities*
- Reading CSV/XLSX files from S3
- Writing transformed CSVs to S3
- Defining canonical output column names
- Providing validation helpers
- Common output - for the moment, onboards will be expected to return a csv
### Parity Onboarder `(parity.py)`
`ParityOnboarder` contains all Parity-specific transformation logic.
Responsibilities*
- Map raw Parity fields to internal EPC-aligned enums
- Infer “as-built” constructions using age bands when insulation data is missing
- Resolve energy efficiency ratings deterministically
- Normalise output into a fixed schema
The `transform()` method orchestrates the transformation process.
### TODOs
- In `backend/onboarders/mappings/parity/glazing.py` we currently map the partiy descriptions
to duples of descriptions and efficiency ratings. This is okay for the moment but we may consider
using a data class, just given how error-prone this is.
- This is also true for heating mappings in `backend/onboarders/mappings/parity/heating.py`
- Implement a AI-enabled version, to replace the standardised asset list

View file

View file

@ -0,0 +1,84 @@
import pandas as pd
from utils.s3 import read_from_s3, read_excel_from_s3, save_csv_to_s3
class OnboarderBase:
# Input dataset to be transformed
data: pd.DataFrame | None = None
bucket_name = None
input_file_name = None
output_file_name = None
# Description columns
landlord_wall_construction: str = "landlord_wall_construction"
landlord_roof_construction: str = "landlord_roof_construction"
landlord_floor_construction: str = "landlord_floor_construction"
landlord_windows_type: str = "landlord_windows_type"
landlord_heating_construction: str = "landlord_heating_construction"
landlord_fuel_type: str = "landlord_fuel_type"
landlord_heating_controls: str = "landlord_heating_controls"
landlord_hot_water_system: str = "landlord_hot_water_system"
# Efficiency columns
landlord_roof_efficiency: str = "landlord_roof_efficiency"
landlord_windows_efficiency: str = "landlord_windows_efficiency"
landlord_heating_controls_efficiency: str = "landlord_heating_controls_efficiency"
landlord_heating_efficiency: str = "landlord_heating_efficiency"
landlord_hot_water_efficiency: str = "landlord_hot_water_efficiency"
landlord_wall_efficiency: str = "landlord_wall_efficiency"
# Additional windows features
landlord_multi_glaze_proportion: str = "landlord_multi_glaze_proportion"
landlord_glazed_type: str = "landlord_glazed_type"
landlord_glazed_area: str = "landlord_glazed_area"
# Additional roof features
landlord_has_sloping_ceiling: str = "landlord_has_sloping_ceiling"
# Shape, dimensions, age
landlord_total_floor_area_m2: str = "landlord_total_floor_area_m2"
landlord_construction_age_band: str = "landlord_construction_age_band"
landlord_property_type: str = "landlord_property_type"
landlord_built_form: str = "landlord_built_form"
def read_s3(self, file_format, **kwargs):
if self.input_file_name is None or self.bucket_name is None:
raise ValueError("Bucket name and input file name must be set before reading from S3.")
if file_format == "xlsx":
self.data = read_excel_from_s3(
bucket_name=self.bucket_name,
file_key=self.input_file_name,
sheet_name=kwargs.get("sheet_name"),
header_row=kwargs.get("header_row", 0)
)
else:
self.data = read_from_s3(bucket_name=self.bucket_name, s3_file_name=self.input_file_name)
def write(self):
if self.data is None:
raise ValueError("No data to write. Please run transform() before writing.")
if self.bucket_name is None or self.output_file_name is None:
raise ValueError("Bucket name and output file name must be set before writing to S3.")
# Store file as csv - will store in the same route location as the input file
save_csv_to_s3(dataframe=self.data, bucket_name=self.bucket_name, file_name=self.output_file_name)
@staticmethod
def assert_nulls_only_from_source_nulls(data: pd.DataFrame, original_column: str, mapped_column: str) -> bool:
# We only allow nulls if the original value was null
null_vals = data[pd.isnull(data[mapped_column])]
if null_vals.empty:
return True
# We make sure all original values were null
assert pd.isnull(null_vals[original_column]).all(), (
f"Some values in {mapped_column} were not mapped, but original values were not null"
)
@staticmethod
def assert_no_nulls(data: pd.DataFrame, column: str):
assert pd.isnull(data[column]).sum() == 0, f"column {column} contains null values, but should not"
def map_construction_age_band(self):
raise NotImplementedError(
"This method should be implemented by subclasses to map construction age bands to descriptions"
)

View file

@ -0,0 +1,10 @@
from onboarders.parity import ParityOnboarder
class OnboarderFactory:
@staticmethod
def create_onboarder(onboarder_type, **kwargs):
if onboarder_type == "parity":
return ParityOnboarder(**kwargs)
raise ValueError(f"Unknown onboarder type: {onboarder_type}")

View file

@ -0,0 +1,50 @@
import json
from pydantic import BaseModel, Field
from typing import Optional, Literal
from onboarders.factory import OnboarderFactory
from utils.logger import setup_logger
logger = setup_logger()
class OnboardingEvent(BaseModel):
s3_uri: str = Field(..., description="S3 URI of the raw ARA input file")
system: Literal["parity", "generic"] = Field(..., description="Onboarding system identifier")
format: Literal["csv", "xlsx"]
sheet_name: Optional[str] = None
def handler(event, context):
"""
Lambda handler that triggers the model engine for each SQS message.
"""
for record in event.get("Records", []):
try:
event_body = json.loads(record["body"])
# Sample input data
# event_body = {
# "s3_uri": "s3://retrofit-data-dev/ara_raw_inputs/peabody/2025_11_11 - Peabody - Data Extracts for "
# "Domna.xlsx",
# "system": "parity",
# "format": "xlsx",
# "sheet_name": "Sustainability"
# }
logger.info("Processing record with body: %s", event_body)
validated_event = OnboardingEvent(**event_body)
onboarder = OnboarderFactory.create_onboarder(
validated_event.system,
fileuri=validated_event.s3_uri,
format=validated_event.format,
sheet_name=validated_event.sheet_name,
file_format=validated_event.format
)
logger.info("Transforming data")
onboarder.transform()
logger.info(f"Writing data to {onboarder.output_file_name}, bucket: {onboarder.bucket_name}")
onboarder.write()
except Exception as e:
logger.error(f"Failed to process record: {e}")

View file

@ -1,14 +0,0 @@
party_map = {
"Before 1900": 'England and Wales: before 1900',
"1900-1929": 'England and Wales: 1900-1929',
"1930-1949": 'England and Wales: 1930-1949',
"1950-1966": 'England and Wales: 1950-1966',
"1967-1975": 'England and Wales: 1967-1975',
"1976-1982": 'England and Wales: 1976-1982',
"1983-1990": 'England and Wales: 1983-1990',
"1991-1995": 'England and Wales: 1991-1995',
"1996-2002": 'England and Wales: 1996-2002',
"2003-2006": 'England and Wales: 2003-2006',
"2007-2011": 'England and Wales: 2007-2011',
"2012 onwards": 'England and Wales: 2012-2021',
}

View file

@ -1,15 +0,0 @@
parity_map = {
"MidTerrace": "Mid-Terrace",
"EndTerrace": "End-Terrace",
"Detached": "Detached",
"SemiDetached": "Semi-Detached",
"EnclosedMidTerrace": "Enclosed Mid-Terrace",
"EnclosedEndTerrace": "Enclosed End-Terrace",
}
# MidTerrace 41462
# EndTerrace 20910
# Detached 16875
# SemiDetached 14725
# EnclosedMidTerrace 3176
# EnclosedEndTerrace 2393

View file

@ -0,0 +1,19 @@
from datatypes.epc.construction_age_band import EpcConstructionAgeBand
parity_map = {
"Before 1900": EpcConstructionAgeBand.before_1900,
"1900-1929": EpcConstructionAgeBand.from_1900_to_1929,
"1930-1949": EpcConstructionAgeBand.from_1930_to_1949,
"1950-1966": EpcConstructionAgeBand.from_1950_to_1966,
"1967-1975": EpcConstructionAgeBand.from_1967_to_1975,
"1976-1982": EpcConstructionAgeBand.from_1976_to_1982,
"1983-1990": EpcConstructionAgeBand.from_1983_to_1990,
"1991-1995": EpcConstructionAgeBand.from_1991_to_1995,
"1996-2002": EpcConstructionAgeBand.from_1996_to_2002,
"2003-2006": EpcConstructionAgeBand.from_2003_to_2006,
"2007-2011": EpcConstructionAgeBand.from_2007_to_2011,
"2012 onwards": EpcConstructionAgeBand.from_2012_onwards,
# Newer age bands, under SAP10
"2012-2022": EpcConstructionAgeBand.from_2012_to_2022,
"2023 onwards": EpcConstructionAgeBand.from_2023_onwards,
}

View file

@ -0,0 +1,60 @@
from datatypes.epc.construction_age_band import EpcConstructionAgeBand
from datatypes.epc.floor import EpcFloorDescriptions
def unknown_floor_as_built(age_band: EpcConstructionAgeBand) -> EpcFloorDescriptions:
year = age_band.start_year()
if year >= 2003:
return EpcFloorDescriptions.solid_insulated_assumed
if year >= 1996:
return EpcFloorDescriptions.solid_limited_insulation_assumed
if year >= 1930:
return EpcFloorDescriptions.solid_no_insulation_assumed
return EpcFloorDescriptions.suspended_no_insulation_assumed
def unknown_floor_retrofitted(age_band: EpcConstructionAgeBand) -> EpcFloorDescriptions:
year = age_band.start_year()
if year >= 1930:
return EpcFloorDescriptions.solid_insulated
return EpcFloorDescriptions.suspended_insulated
def map_solid_floor_as_built(age_band: EpcConstructionAgeBand) -> EpcFloorDescriptions:
year = age_band.start_year()
if year >= 2003:
return EpcFloorDescriptions.solid_insulated_assumed
if year >= 1996:
return EpcFloorDescriptions.solid_limited_insulation_assumed
return EpcFloorDescriptions.solid_no_insulation_assumed
def map_suspended_floor_as_built(age_band: EpcConstructionAgeBand) -> EpcFloorDescriptions:
year = age_band.start_year()
if year >= 2003:
return EpcFloorDescriptions.suspended_insulated_assumed
if year >= 1996:
return EpcFloorDescriptions.suspended_limited_insulation_assumed
return EpcFloorDescriptions.suspended_no_insulation_assumed
as_built_floor_classifiers = {
"Solid": map_solid_floor_as_built,
"SuspendedTimber": map_suspended_floor_as_built,
"SuspendedNotTimber": map_suspended_floor_as_built,
}
unknown_as_built_floor_classifiers = {
"RetroFitted": unknown_floor_retrofitted,
"AsBuilt": unknown_floor_as_built,
"Unknown": unknown_floor_as_built,
}

View file

@ -0,0 +1,56 @@
from datatypes.epc.roof import EpcRoofDescriptions
from datatypes.epc.construction_age_band import EpcConstructionAgeBand
def map_flat_roof(age_band: EpcConstructionAgeBand) -> EpcRoofDescriptions:
"""
For a flat, as built roof, these are the breakdowns:
2023 onwards Flat, insulated
20032022 Flat, insulated
19832002 Flat, insulated
19761982 Flat, limited insulation
19671975 Flat, limited insulation
19501966 and earlier Flat, no insulation
:param age_band: Input age band
:return: EpcRoofDescriptions
"""
year = age_band.start_year()
if year >= 1983:
return EpcRoofDescriptions.flat_insulated
if year >= 1967:
return EpcRoofDescriptions.flat_limited_insulation
return EpcRoofDescriptions.flat_no_insulation
def map_sloping_ceiling_roof(age_band: EpcConstructionAgeBand) -> EpcRoofDescriptions:
"""
For a sloping ceiling, as built roof, these are the breakdowns:
2023 onwards Sloping pitched, insulated
20032022 Sloping pitched, insulated
19832002 Sloping pitched, insulated
19761982 Sloping pitched, limited insulation
19671975 and earlier Sloping pitched, no insulation
:param age_band: Input age band
:return: EpcRoofDescriptions
"""
year = age_band.start_year()
if year >= 1983:
return EpcRoofDescriptions.sloping_pitched_insulated
if year >= 1976:
return EpcRoofDescriptions.sloping_pitched_limited_insulation
return EpcRoofDescriptions.sloping_pitched_no_insulation
as_built_roof_classifiers = {
# Only need to apply this to flat and sloping ceiling roofs
"Flat": map_flat_roof,
"PitchedWithSlopingCeiling": map_sloping_ceiling_roof,
}

View file

@ -0,0 +1,113 @@
from datatypes.epc.construction_age_band import EpcConstructionAgeBand
from datatypes.epc.walls import EpcWallDescriptions
def map_cavity_wall_insulation(age_band: EpcConstructionAgeBand):
if age_band.start_year() < 1976:
return EpcWallDescriptions.cavity_no_insulation_assumed
if age_band == EpcConstructionAgeBand.from_1976_to_1982:
return EpcWallDescriptions.cavity_partial_insulated_assumed
if age_band in EpcConstructionAgeBand.from_year_onwards(1983):
return EpcWallDescriptions.cavity_insulated_assumed
raise NotImplementedError(f"Age band {age_band} not handled for cavity wall as built insulation mapping")
def map_solid_wall_insulation(age_band: EpcConstructionAgeBand):
if age_band.start_year() < 1976:
return EpcWallDescriptions.solid_brick_no_insulation_assumed
if age_band == EpcConstructionAgeBand.from_1976_to_1982:
return EpcWallDescriptions.solid_brick_partial_insulated_assumed
if age_band in EpcConstructionAgeBand.from_year_onwards(1983):
return EpcWallDescriptions.solid_brick_insulated_assumed
raise NotImplementedError(
f"Age band {age_band.value} not handled for solid wall insulation mapping"
)
def map_timber_frame_wall_insulation(age_band: EpcConstructionAgeBand):
if age_band.start_year() < 1950:
return EpcWallDescriptions.timber_frame_no_insulation_assumed
if age_band.start_year() < 1976:
return EpcWallDescriptions.timber_frame_partial_insulated_assumed
if age_band in EpcConstructionAgeBand.from_year_onwards(1976):
return EpcWallDescriptions.timber_frame_insulated_assumed
raise NotImplementedError(
f"Age band {age_band.value} not handled for timber frame wall insulation mapping"
)
def map_system_build_wall_insulation(age_band: EpcConstructionAgeBand):
if age_band.start_year() < 1976:
return EpcWallDescriptions.system_no_insulation_assumed
if age_band == EpcConstructionAgeBand.from_1976_to_1982:
return EpcWallDescriptions.system_partial_insulated_assumed
if age_band in EpcConstructionAgeBand.from_year_onwards(1983):
return EpcWallDescriptions.system_insulated_assumed
raise NotImplementedError(
f"Age band {age_band.value} not handled for system build wall insulation mapping"
)
def map_granite_wall_insulation(age_band: EpcConstructionAgeBand):
if age_band.start_year() < 1976:
return EpcWallDescriptions.granite_whinstone_no_insulation_assumed
if age_band == EpcConstructionAgeBand.from_1976_to_1982:
return EpcWallDescriptions.granite_whinstone_partial_insulated_assumed
if age_band in EpcConstructionAgeBand.from_year_onwards(1983):
return EpcWallDescriptions.granite_whinestone_insulated_assumed
raise NotImplementedError(
f"Age band {age_band.value} not handled for granite wall insulation mapping"
)
def map_sandstone_wall_insulation(age_band: EpcConstructionAgeBand):
if age_band.start_year() < 1976:
return EpcWallDescriptions.sandstone_limestone_no_insulation_assumed
if age_band == EpcConstructionAgeBand.from_1976_to_1982:
return EpcWallDescriptions.sandstone_limestone_partial_insulated_assumed
if age_band in EpcConstructionAgeBand.from_year_onwards(1983):
return EpcWallDescriptions.sandstone_limestone_insulated_assumed
raise NotImplementedError(
f"Age band {age_band.value} not handled for sandstone wall insulation mapping"
)
def map_cob_wall_insulation(age_band: EpcConstructionAgeBand):
if age_band.start_year() < 1983:
return EpcWallDescriptions.cob_as_built_average
if age_band in EpcConstructionAgeBand.from_year_onwards(1983):
return EpcWallDescriptions.cob_as_built_good
raise NotImplementedError(
f"Age band {age_band.value} not handled for cob wall insulation mapping"
)
as_built_wall_classifiers = {
"Cavity": map_cavity_wall_insulation,
"Solid Brick": map_solid_wall_insulation,
"Timber Frame": map_timber_frame_wall_insulation,
"System": map_system_build_wall_insulation,
"Granite": map_granite_wall_insulation,
"Sandstone": map_sandstone_wall_insulation,
"Cob": map_cob_wall_insulation,
}

View file

@ -0,0 +1,10 @@
from datatypes.epc.property_type_built_form import BuiltForm
parity_map = {
"MidTerrace": BuiltForm.mid_terrace,
"EndTerrace": BuiltForm.end_terrace,
"Detached": BuiltForm.detached,
"SemiDetached": BuiltForm.semi_detached,
"EnclosedMidTerrace": BuiltForm.enclosed_mid_terrace,
"EnclosedEndTerrace": BuiltForm.enclosed_end_terrace,
}

View file

@ -0,0 +1,26 @@
from numpy import nan
from datatypes.epc.floor import EpcFloorDescriptions
floor_map = {
# Solid floor
('Solid', 'AsBuilt'): None, # Mapped
('Solid', 'Unknown'): None, # Mapped
('Solid', nan): None, # Mapped
('Solid', 'RetroFitted'): EpcFloorDescriptions.solid_insulated,
# Suspended floor
('SuspendedTimber', nan): None, # Mapped suspended_floor_as_built
('SuspendedTimber', 'AsBuilt'): None, # Mapped suspended_floor_as_built
('SuspendedTimber', 'RetroFitted'): EpcFloorDescriptions.suspended_insulated,
('SuspendedTimber', 'Unknown'): None, # Mapped suspended_floor_as_built
('SuspendedNotTimber', 'RetroFitted'): EpcFloorDescriptions.suspended_insulated,
('SuspendedNotTimber', nan): None, # Mapped suspended_floor_as_built
('SuspendedNotTimber', 'Unknown'): None, # Mapped suspended_floor_as_built
('SuspendedNotTimber', 'AsBuilt'): None, # Mapped suspended_floor_as_built
# Unknown type - mapped on age
('Unknown', 'Unknown'): None, # Mapped unknown_floor_as_built
('Unknown', 'RetroFitted'): None, # Mapped unknown_floor_retrofitted
(nan, nan): None, # No actual information!
('Unknown', 'AsBuilt'): None, # Mapped unknown_floor_as_built
}

View file

@ -0,0 +1,20 @@
from datatypes.epc.efficiency import EpcEfficiency
glazing_map = {
# (description, energy efficiency, multi_glaze_proportion, glazed_type, glazed_area
# For SAP 10 assessments, The glazed type and glazed area are not populated in the EPC API data any more
"Double 2002 or later": ("Fully double glazed", EpcEfficiency.AVERAGE, 1, None, None),
"Double before 2002": ("Fully double glazed", EpcEfficiency.POOR, 1, None, None),
"Double but age unknown": ("Fully double glazed", EpcEfficiency.POOR, 1, None, None),
"Single": ("Single glazed", EpcEfficiency.VERY_POOR, 0, None, None),
# For triple glazing, with age unknown, the performance is only average, whereas if it's a post 2022
# installation, it's classed as high performance glazing with good efficiency. We'll need to be considerate as to
# how we make updates to the windows data.
# Triple known data is high performance glazing with Good efficiency (at least)
"Triple": ("Fully triple glazed", EpcEfficiency.AVERAGE, 1, None, None),
# This is also classed as high performance glazing
"DoubleKnownData": ("High performance glazing", EpcEfficiency.GOOD, 1, None, None),
# Under SAP 10, secondary glazing is classed as poor efficiency (whereas under SAP 2012 it was generally good)
"Secondary": ("Full secondary glazing", EpcEfficiency.POOR, 1, None, None),
"TripleKnownData": ("High performance glazing", EpcEfficiency.GOOD, 1, None, None),
}

View file

@ -0,0 +1,330 @@
from datatypes.epc.main_heating import EpcHeatingSystems
from datatypes.epc.efficiency import EpcEfficiency
from datatypes.epc.fuel import EpcFuel
from datatypes.epc.heating_controls import EpcHeatingControls
from datatypes.epc.hotwater import EpcHotWaterSystems
heating_map = {
# 0
('Boilers', 'A', 'ElectricityNotCommunity', 'Optimal'): (
EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 1
('Boilers', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community,
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 2
('Boilers', 'A', 'ElectricityNotCommunity', 'Top Spec'): (
EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community,
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 3
('Boilers', 'A', 'LPGNotCommunity', 'Optimal'): (
EpcHeatingSystems.boiler_and_radiators_lpg, EpcEfficiency.POOR, EpcFuel.lpg_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 4
('Boilers', 'A', 'MainsGasNotCommunity', 'Optimal'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.VERY_GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 5
('Boilers', 'A', 'MainsGasNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.VERY_GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 6
('Boilers', 'A', 'MainsGasNotCommunity', 'Top Spec'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.VERY_GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 7
('Boilers', 'B', 'MainsGasNotCommunity', 'Optimal'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 8
('Boilers', 'B', 'MainsGasNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 9
('Boilers', 'B', 'MainsGasNotCommunity', 'Top Spec'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 10
('Boilers', 'C', 'ElectricityNotCommunity', 'Optimal'): (
EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 11
('Boilers', 'C', 'ElectricityNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community,
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 12
('Boilers', 'C', 'ElectricityNotCommunity', 'Top Spec'): (
EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community,
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 13
('Boilers', 'C', 'LPGNotCommunity', 'Optimal'): (
EpcHeatingSystems.boiler_and_radiators_lpg, EpcEfficiency.POOR, EpcFuel.lpg_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 14
('Boilers', 'C', 'LPGNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.boiler_and_radiators_lpg, EpcEfficiency.POOR, EpcFuel.lpg_not_community,
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 15
('Boilers', 'C', 'MainsGasNotCommunity', 'Optimal'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 16
('Boilers', 'C', 'MainsGasNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 17
('Boilers', 'C', 'MainsGasNotCommunity', 'Top Spec'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
('Boilers', 'C', 'OilNotCommunity', 'Optimal'): (
EpcHeatingSystems.boiler_radiators_oil, EpcEfficiency.AVERAGE, EpcFuel.oil_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 19
('Boilers', 'C', 'OilNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.boiler_radiators_oil, EpcEfficiency.AVERAGE, EpcFuel.oil_not_community,
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 20
('Boilers', 'C', 'OilNotCommunity', 'Top Spec'): (
EpcHeatingSystems.boiler_radiators_oil, EpcEfficiency.AVERAGE, EpcFuel.oil_not_community,
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 21
('Boilers', 'D', 'MainsGasNotCommunity', 'Optimal'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 22
('Boilers', 'D', 'MainsGasNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 23
('Boilers', 'D', 'MainsGasNotCommunity', 'Top Spec'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 24
('Boilers', 'E', 'ElectricityNotCommunity', 'Optimal'): (
EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 25
('Boilers', 'E', 'MainsGasNotCommunity', 'Optimal'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 26
('Boilers', 'E', 'MainsGasNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 27
('Boilers', 'E', 'MainsGasNotCommunity', 'Top Spec'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 28
('Boilers', 'E', 'OilNotCommunity', 'Optimal'): (
EpcHeatingSystems.boiler_radiators_oil, EpcEfficiency.AVERAGE, EpcFuel.oil_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 29
('Boilers', 'E', 'OilNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.boiler_radiators_oil, EpcEfficiency.AVERAGE, EpcFuel.oil_not_community,
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 30
('Boilers', 'F', 'MainsGasNotCommunity', 'Optimal'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 31
('Boilers', 'F', 'MainsGasNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 32
('Boilers', 'F', 'MainsGasNotCommunity', 'Top Spec'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 33
('Boilers', 'G', 'MainsGasNotCommunity', 'Optimal'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 34
('Boilers', 'G', 'MainsGasNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 35
('Boilers', 'G', 'MainsGasNotCommunity', 'Top Spec'): (
EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.AVERAGE, EpcFuel.mains_gas_not_community,
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 36
('Electric underfloor', 'A', 'ElectricityNotCommunity', 'Optimal'): (
EpcHeatingSystems.electric_underfloor_heating, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE
),
# 37
('Electric underfloor', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.electric_underfloor_heating, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community,
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE,
EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE
),
# 38
('Electric underfloor', 'A', 'ElectricityNotCommunity', 'Top Spec'): (
EpcHeatingSystems.electric_underfloor_heating, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community,
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD,
EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE
),
# 39
('Heat pumps (warm air)', 'A', 'ElectricityNotCommunity', 'Optimal'): (
EpcHeatingSystems.air_to_air_ashp, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 40
('Heat pumps (warm air)', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.air_to_air_ashp, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community,
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 41
('Heat pumps (wet)', 'A', 'ElectricityNotCommunity', 'Optimal'): (
EpcHeatingSystems.ashp_radiators_electric, EpcEfficiency.GOOD, EpcFuel.electricity_not_community,
EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 42
('Heat pumps (wet)', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.ashp_radiators_electric, EpcEfficiency.GOOD, EpcFuel.electricity_not_community,
EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 43
('Heat pumps (wet)', 'A', 'ElectricityNotCommunity', 'Top Spec'): (
EpcHeatingSystems.ashp_radiators_electric, EpcEfficiency.GOOD, EpcFuel.electricity_not_community,
EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD,
EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE
),
# 44
('Room heaters', 'A', 'ElectricityNotCommunity', 'Optimal'): (
EpcHeatingSystems.room_heaters_electric, EpcEfficiency.POOR, EpcFuel.electricity_not_community,
EpcHeatingControls.programmer_and_appliance_thermostats, EpcEfficiency.GOOD,
EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE
),
# 45
('Room heaters', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.room_heaters_electric, EpcEfficiency.POOR, EpcFuel.electricity_not_community,
EpcHeatingControls.appliance_thermostats, EpcEfficiency.GOOD,
EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE
),
# 46
('Room heaters', 'C', 'MainsGasNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.room_heaters_mains_gas, EpcEfficiency.AVERAGE, EpcFuel.mains_gas_not_community,
EpcHeatingControls.appliance_thermostats, EpcEfficiency.GOOD,
EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE
),
# 47 - water done from here
('Room heaters', 'F', 'MainsGasNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.room_heaters_mains_gas, EpcEfficiency.POOR, EpcFuel.mains_gas_not_community,
EpcHeatingControls.appliance_thermostats, EpcEfficiency.GOOD,
EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE
),
('Room heaters', 'G', 'MainsGasNotCommunity', 'Optimal'): (
EpcHeatingSystems.room_heaters_mains_gas, EpcEfficiency.POOR, EpcFuel.mains_gas_not_community,
EpcHeatingControls.programmer_and_appliance_thermostats, EpcEfficiency.GOOD,
EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE
),
('Room heaters', 'G', 'MainsGasNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.room_heaters_mains_gas, EpcEfficiency.POOR, EpcFuel.mains_gas_not_community,
EpcHeatingControls.appliance_thermostats, EpcEfficiency.GOOD,
EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE
),
('Room heaters', 'G', 'SmokelessCoal', 'Sub Optimal'): (
EpcHeatingSystems.room_heaters_smokeless_fuel, EpcEfficiency.VERY_POOR, EpcFuel.smokeless_coal,
EpcHeatingControls.appliance_thermostats, EpcEfficiency.GOOD,
EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE
),
('Storage heaters', 'A', 'ElectricityNotCommunity', 'Optimal'): (
EpcHeatingSystems.electric_storage_heaters, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community,
EpcHeatingControls.automatic_charge_control, EpcEfficiency.AVERAGE,
EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE
),
('Storage heaters', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.electric_storage_heaters, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community,
EpcHeatingControls.manual_charge_control, EpcEfficiency.POOR,
EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE
),
('Warm Air (not heat pump)', 'G', 'ElectricityNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.warm_air_electricaire, EpcEfficiency.GOOD, EpcFuel.electricity_not_community,
EpcHeatingControls.programmer_and_atleast_two_room_thermostats, EpcEfficiency.GOOD,
EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE
),
('Warm Air (not heat pump)', 'G', 'MainsGasNotCommunity', 'Sub Optimal'): (
EpcHeatingSystems.warm_air_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community,
EpcHeatingControls.programmer_and_atleast_two_room_thermostats, EpcEfficiency.GOOD,
EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE
)
}

View file

@ -0,0 +1,8 @@
from datatypes.epc.property_type_built_form import PropertyType
parity_map = {
"Flat": PropertyType.flat,
"Maisonette": PropertyType.maisonette,
"Bungalow": PropertyType.bungalow,
"House": PropertyType.house,
}

View file

@ -0,0 +1,461 @@
import pandas as pd
from numpy import nan
from typing import Union, Callable
from collections.abc import Mapping
from datatypes.epc.roof import EpcRoofDescriptions
from datatypes.epc.efficiency import EpcEfficiency
from datatypes.epc.construction_age_band import EpcConstructionAgeBand
roof_map = {
# Dwelling above
('AnotherDwellingAbove', 'Another Dwelling Above'): EpcRoofDescriptions.another_dwelling_above,
('SameDwellingAbove', 'Same Dwelling Above'): EpcRoofDescriptions.another_dwelling_above,
# Pitched, normal loft access, with a loft thickness
('PitchedNormalLoftAccess', 'mm25'): EpcRoofDescriptions.loft_25mm_insulation,
('PitchedNormalLoftAccess', 'mm50'): EpcRoofDescriptions.loft_50mm_insulation,
('PitchedNormalLoftAccess', 'mm75'): EpcRoofDescriptions.loft_75mm_insulation,
('PitchedNormalLoftAccess', 'mm100'): EpcRoofDescriptions.loft_100mm_insulation,
('PitchedNormalLoftAccess', 'mm150'): EpcRoofDescriptions.loft_150mm_insulation,
('PitchedNormalLoftAccess', 'mm200'): EpcRoofDescriptions.loft_200mm_insulation,
('PitchedNormalLoftAccess', 'mm250'): EpcRoofDescriptions.loft_250mm_insulation,
('PitchedNormalLoftAccess', 'mm270'): EpcRoofDescriptions.loft_270mm_insulation,
('PitchedNormalLoftAccess', 'mm300'): EpcRoofDescriptions.loft_300mm_insulation,
('PitchedNormalLoftAccess', 'mm350'): EpcRoofDescriptions.loft_350mm_insulation,
('PitchedNormalLoftAccess', 'mm400'): EpcRoofDescriptions.loft_400mm_plus_insulation,
# Pitched, no loft access, with a loft thickness
('PitchedNormalNoLoftAccess', 'mm25'): EpcRoofDescriptions.loft_25mm_insulation,
('PitchedNormalNoLoftAccess', 'mm50'): EpcRoofDescriptions.loft_50mm_insulation,
('PitchedNormalNoLoftAccess', 'mm75'): EpcRoofDescriptions.loft_75mm_insulation,
('PitchedNormalNoLoftAccess', 'mm100'): EpcRoofDescriptions.loft_100mm_insulation,
('PitchedNormalNoLoftAccess', 'mm150'): EpcRoofDescriptions.loft_150mm_insulation,
('PitchedNormalNoLoftAccess', 'mm200'): EpcRoofDescriptions.loft_200mm_insulation,
('PitchedNormalNoLoftAccess', 'mm250'): EpcRoofDescriptions.loft_250mm_insulation,
('PitchedNormalNoLoftAccess', 'mm270'): EpcRoofDescriptions.loft_270mm_insulation,
('PitchedNormalNoLoftAccess', 'mm300'): EpcRoofDescriptions.loft_300mm_insulation,
('PitchedNormalNoLoftAccess', 'mm350'): EpcRoofDescriptions.loft_350mm_insulation,
('PitchedNormalNoLoftAccess', 'mm400'): EpcRoofDescriptions.loft_400mm_plus_insulation,
# All pitched options with asbuilt or unknown got to EpcRoofDescriptions.pitched_insulated_assumed
# With access
('PitchedNormalLoftAccess', nan): EpcRoofDescriptions.pitched_insulated_assumed,
('PitchedNormalLoftAccess', 'AsBuilt'): EpcRoofDescriptions.pitched_insulated_assumed,
('PitchedNormalLoftAccess', 'Unknown'): EpcRoofDescriptions.pitched_insulated_assumed,
# No access
('PitchedNormalNoLoftAccess', nan): EpcRoofDescriptions.pitched_insulated_assumed,
('PitchedNormalNoLoftAccess', 'AsBuilt'): EpcRoofDescriptions.pitched_insulated_assumed,
('PitchedNormalNoLoftAccess', 'Unknown'): EpcRoofDescriptions.pitched_insulated_assumed,
# Flat
('Flat', 'NoInsulation'): EpcRoofDescriptions.flat_no_insulation,
# Flat - limited insulation
('Flat', '12mm'): EpcRoofDescriptions.flat_limited_insulation,
('Flat', 'mm25'): EpcRoofDescriptions.flat_limited_insulation,
('Flat', 'mm50'): EpcRoofDescriptions.flat_limited_insulation,
# Flat insulated
('Flat', 'mm75'): EpcRoofDescriptions.flat_insulated,
('Flat', 'mm100'): EpcRoofDescriptions.flat_insulated,
('Flat', 'mm150'): EpcRoofDescriptions.flat_insulated,
('Flat', 'mm200'): EpcRoofDescriptions.flat_insulated,
('Flat', 'mm250'): EpcRoofDescriptions.flat_insulated,
('Flat', 'mm300'): EpcRoofDescriptions.flat_insulated,
('Flat', 'mm350'): EpcRoofDescriptions.flat_insulated,
('Flat', 'mm400'): EpcRoofDescriptions.flat_insulated,
# Flat - as built or unknown
('Flat', 'AsBuilt'): None, # To be classified
('Flat', nan): None, # To be classified
('Flat', 'Unknown'): None, # To be classified
# 12mm = very poor & has limited insulation description
# 25, 50 = poor & has limited insulation description
# 75, 100, 125mm = average (Flat, insulated)
# 150, 175, 200, 225, 250mm = good (Flat, insulated)
# 270mm+ = very good (Flat, insulated)
# Thatched
('PitchedThatched', 'mm50'): EpcRoofDescriptions.thatched_with_additional_insulation,
('PitchedThatched', 'mm150'): EpcRoofDescriptions.thatched_with_additional_insulation,
('PitchedThatched', 'mm300'): EpcRoofDescriptions.thatched_with_additional_insulation,
('PitchedThatched', 'Unknown'): EpcRoofDescriptions.thatched, # efficiency classified based on age
# Sloping:
# Limited (12 very poor, 25-50 poor)
('PitchedWithSlopingCeiling', 'mm12'): EpcRoofDescriptions.sloping_pitched_limited_insulation,
('PitchedWithSlopingCeiling', 'mm25'): EpcRoofDescriptions.sloping_pitched_limited_insulation,
('PitchedWithSlopingCeiling', 'mm50'): EpcRoofDescriptions.sloping_pitched_limited_insulation,
# Insulated 75mm+ (75 - 125 average, 150 - 250 good, 270+ very good)
('PitchedWithSlopingCeiling', 'mm75'): EpcRoofDescriptions.sloping_pitched_insulated,
('PitchedWithSlopingCeiling', 'mm100'): EpcRoofDescriptions.sloping_pitched_insulated,
('PitchedWithSlopingCeiling', 'mm150'): EpcRoofDescriptions.sloping_pitched_insulated,
('PitchedWithSlopingCeiling', 'mm200'): EpcRoofDescriptions.sloping_pitched_insulated,
('PitchedWithSlopingCeiling', 'mm250'): EpcRoofDescriptions.sloping_pitched_insulated,
('PitchedWithSlopingCeiling', 'mm270'): EpcRoofDescriptions.sloping_pitched_insulated,
('PitchedWithSlopingCeiling', 'mm300'): EpcRoofDescriptions.sloping_pitched_insulated,
('PitchedWithSlopingCeiling', 'mm350'): EpcRoofDescriptions.sloping_pitched_insulated,
('PitchedWithSlopingCeiling', 'mm400'): EpcRoofDescriptions.sloping_pitched_insulated,
# As built/unknown
('PitchedWithSlopingCeiling', 'AsBuilt'): None, # To be classified
('PitchedWithSlopingCeiling', nan): None, # To be classified
('PitchedWithSlopingCeiling', 'Unknown'): None, #
}
roof_unknown_age_fallback = {
"Flat": EpcRoofDescriptions.flat_as_built_unknown,
"PitchedWithSlopingCeiling": EpcRoofDescriptions.sloping_pitched_as_built_unknown,
"PitchedThatched": EpcRoofDescriptions.thatched_as_built_unknown,
"PitchedNormalLoftAccess": EpcRoofDescriptions.loft_as_built_unknown,
"PitchedNormalNoLoftAccess": EpcRoofDescriptions.loft_as_built_unknown,
}
RoofEfficiencyRule = Union[
EpcEfficiency,
Callable[[EpcConstructionAgeBand, int | None], EpcEfficiency],
]
def flat_insulated_efficiency_age_band(age_band: EpcConstructionAgeBand) -> EpcEfficiency:
"""
before 1900, 1900-1929, 1930-1949, 1950-1966, 1967-1975 -> Pitched, no insulation, Very Poor
1976-1982 -> Pitched, limited insulation, Poor
1983-1990, to 1996-2002 Pitched, insulated, Average
2003 - 2006, 2012-2022 -> Pitched, insulated, Good
2023 onwards -> Pitched, insulated, Very Good
:param age_band: EpcConstructionAgeBand
:return: EpcEfficiency
"""
start_year = age_band.start_year()
if start_year >= 2023:
return EpcEfficiency.VERY_GOOD
if start_year >= 2003:
return EpcEfficiency.GOOD
if start_year >= 1983:
return EpcEfficiency.AVERAGE
if start_year >= 1976:
return EpcEfficiency.POOR
return EpcEfficiency.VERY_POOR
def flat_insulated_efficiency_thickness(insulation_thickness: int | None) -> EpcEfficiency:
"""
12mm -> Very Poor
25mm - 50mm -> Poor
75mm - 125mm -> Pitched, insulated, average
150mm - 250mm -> good
270mm+ -> very good
:param insulation_thickness: Insulation thickness in mm
:return: EpcEfficiency
"""
if insulation_thickness is None:
raise ValueError("Insulation thickness is required for flat insulated efficiency calculation")
if insulation_thickness >= 270:
return EpcEfficiency.VERY_GOOD
if 150 <= insulation_thickness <= 250:
return EpcEfficiency.GOOD
if 75 <= insulation_thickness <= 125:
return EpcEfficiency.AVERAGE
if 25 <= insulation_thickness <= 50:
return EpcEfficiency.POOR
return EpcEfficiency.VERY_POOR
def flat_efficiency(insulation_thickness: int | None, age_band: EpcConstructionAgeBand) -> EpcEfficiency:
"""
Combines both age band and insulation thickness to determine flat roof efficiency.
:param insulation_thickness: Insulation thickness in mm
:param age_band: EpcConstructionAgeBand
:return: EpcEfficiency
"""
if insulation_thickness is not None:
return flat_insulated_efficiency_thickness(insulation_thickness)
return flat_insulated_efficiency_age_band(age_band)
def loft_insulated_efficiency(age_band: EpcConstructionAgeBand) -> EpcEfficiency:
"""
2023 onwards -> Very Good
2012-2022 -> Very Good
2007-2011 -> Very Good
2003-2006 -> Very Good
1996-2002 -> Good
1991-1995 -> Good
1983-1990 -> Average
1976-1982 -> Average
1967-1975 -> Average
1950-1966 -> Average
1930-1949 -> Average
1900-1929 -> Average
before 1900 -> Average
:param age_band: Input age band, EpcConstructionAgeBand
:return: EpcEfficiency
"""
year = age_band.start_year()
if year >= 2003:
return EpcEfficiency.VERY_GOOD
if year >= 1991:
return EpcEfficiency.GOOD
return EpcEfficiency.AVERAGE
def thatched_efficiency_age_band(age_band: EpcConstructionAgeBand) -> EpcEfficiency:
"""
Maps thatched roof efficiency based on construction age band.
:param age_band: EpcConstructionAgeBand
:return: EpcEfficiency
"""
year = age_band.start_year()
if year >= 2023:
return EpcEfficiency.VERY_GOOD
if year >= 2003:
return EpcEfficiency.GOOD
return EpcEfficiency.AVERAGE
def thatched_efficiency_thickness(insulation_thickness: int | None) -> EpcEfficiency:
"""
Maps thatched roof efficiency based on insulation thickness.
:param insulation_thickness: Insulation thickness in mm
:return: EpcEfficiency
"""
if insulation_thickness is None:
raise ValueError("Insulation thickness is required for thatched efficiency calculation")
if insulation_thickness >= 175:
return EpcEfficiency.VERY_GOOD
if insulation_thickness >= 25:
return EpcEfficiency.GOOD
return EpcEfficiency.AVERAGE
def thatched_efficiency(
insulation_thickness: int | None,
age_band: EpcConstructionAgeBand,
) -> EpcEfficiency:
"""
Combines both age band and insulation thickness to determine thatched roof efficiency.
:param insulation_thickness: Insulation thickness in mm
:param age_band: EpcConstructionAgeBand
:return: EpcEfficiency
"""
if insulation_thickness is not None:
return thatched_efficiency_thickness(insulation_thickness)
return thatched_efficiency_age_band(age_band)
def sloping_ceiling_efficiency_age_band(age_band: EpcConstructionAgeBand) -> EpcEfficiency:
"""
Maps sloping ceiling roof efficiency based on construction age band.
:param age_band: EpcConstructionAgeBand
:return: EpcEfficiency
"""
year = age_band.start_year()
if year >= 2023:
return EpcEfficiency.VERY_GOOD
if year >= 2003:
return EpcEfficiency.GOOD
if year >= 1983:
return EpcEfficiency.AVERAGE
if year >= 1976:
return EpcEfficiency.POOR
return EpcEfficiency.VERY_POOR
def sloping_ceiling_efficiency_thickness(insulation_thickness: int | None) -> EpcEfficiency:
"""
Maps sloping ceiling roof efficiency based on insulation thickness.
:param insulation_thickness: Insulation thickness in mm
:return: EpcEfficiency
"""
if insulation_thickness is None:
raise ValueError("Insulation thickness is required for sloping ceiling efficiency calculation")
if insulation_thickness >= 270:
return EpcEfficiency.VERY_GOOD
if insulation_thickness >= 150:
return EpcEfficiency.GOOD
if insulation_thickness >= 75:
return EpcEfficiency.AVERAGE
if insulation_thickness >= 25:
return EpcEfficiency.POOR
return EpcEfficiency.VERY_POOR
def sloping_ceiling_efficiency(
insulation_thickness: int | None,
age_band: EpcConstructionAgeBand,
) -> EpcEfficiency:
"""
Combines both age band and insulation thickness to determine sloping ceiling roof efficiency.
:param insulation_thickness: Insulation thickness in mm
:param age_band: EpcConstructionAgeBand
:return: EpcEfficiency
"""
if insulation_thickness is not None:
return sloping_ceiling_efficiency_thickness(insulation_thickness)
return sloping_ceiling_efficiency_age_band(age_band)
def loft_insulated_at_rafters_efficiency_thickness(insulation_thickness: int | None) -> EpcEfficiency:
"""
400mm, 350mm = very good
200-300mm = good
125-175 = average
50-100 = poor
25 and below= very poor
:return:
"""
if insulation_thickness is None:
raise ValueError("Insulation thickness is required for loft insulated at rafters efficiency calculation")
if insulation_thickness >= 350:
return EpcEfficiency.VERY_GOOD
if insulation_thickness >= 200:
return EpcEfficiency.GOOD
if insulation_thickness >= 125:
return EpcEfficiency.AVERAGE
if insulation_thickness >= 50:
return EpcEfficiency.POOR
return EpcEfficiency.VERY_POOR
def loft_insulated_at_rafters_efficiency_age_band(age_band: EpcConstructionAgeBand) -> EpcEfficiency:
"""
# 2023 onwards -> Very Good
# 2003-2006, 2012-2022 -> Good
# 1983 - 1990, 1996-2002 -> Average
# 1976-1982 -> Poor
# 1967-1975 and earlier bands -> Very Poor
:param age_band: EpcConstructionAgeBand
:return: EpcEfficiency
"""
year = age_band.start_year()
if year >= 2023:
return EpcEfficiency.VERY_GOOD
if year >= 2003:
return EpcEfficiency.GOOD
if year >= 1983:
return EpcEfficiency.AVERAGE
if year >= 1976:
return EpcEfficiency.POOR
return EpcEfficiency.VERY_POOR
def loft_insulated_at_rafters_efficiency(
insulation_thickness: int | None,
age_band: EpcConstructionAgeBand,
) -> EpcEfficiency:
"""
Combines both age band and insulation thickness to determine loft insulated at rafters roof efficiency.
:param insulation_thickness: Insulation thickness in mm
:param age_band: EpcConstructionAgeBand
:return: EpcEfficiency
"""
if insulation_thickness is not None:
return loft_insulated_at_rafters_efficiency_thickness(insulation_thickness)
return loft_insulated_at_rafters_efficiency_age_band(age_band)
ROOF_DESCRIPTION_EFFICIENCIES: Mapping[EpcRoofDescriptions, RoofEfficiencyRule] = {
# Flat roof
EpcRoofDescriptions.flat_no_insulation: EpcEfficiency.VERY_POOR,
EpcRoofDescriptions.flat_limited_insulation: flat_efficiency,
EpcRoofDescriptions.flat_insulated: flat_efficiency,
# Loft:
# value mappings
EpcRoofDescriptions.loft_12mm_insulation: EpcEfficiency.VERY_POOR,
EpcRoofDescriptions.loft_25mm_insulation: EpcEfficiency.POOR,
EpcRoofDescriptions.loft_50mm_insulation: EpcEfficiency.POOR,
EpcRoofDescriptions.loft_75mm_insulation: EpcEfficiency.AVERAGE,
EpcRoofDescriptions.loft_100mm_insulation: EpcEfficiency.AVERAGE,
EpcRoofDescriptions.loft_125mm_insulation: EpcEfficiency.AVERAGE,
EpcRoofDescriptions.loft_150mm_insulation: EpcEfficiency.GOOD,
EpcRoofDescriptions.loft_175mm_insulation: EpcEfficiency.GOOD,
EpcRoofDescriptions.loft_200mm_insulation: EpcEfficiency.GOOD,
EpcRoofDescriptions.loft_250mm_insulation: EpcEfficiency.GOOD,
EpcRoofDescriptions.loft_270mm_insulation: EpcEfficiency.VERY_GOOD,
EpcRoofDescriptions.loft_300mm_insulation: EpcEfficiency.VERY_GOOD,
EpcRoofDescriptions.loft_350mm_insulation: EpcEfficiency.VERY_GOOD,
EpcRoofDescriptions.loft_400mm_plus_insulation: EpcEfficiency.VERY_GOOD,
EpcRoofDescriptions.pitched_no_insulation: EpcEfficiency.VERY_POOR,
# function mappings
EpcRoofDescriptions.pitched_insulated_assumed: loft_insulated_efficiency,
# Loft af rafters
EpcRoofDescriptions.loft_insulated_at_rafters: loft_insulated_at_rafters_efficiency,
# Another dwelling above
EpcRoofDescriptions.another_dwelling_above: EpcEfficiency.NA,
# Thatched
EpcRoofDescriptions.thatched: thatched_efficiency,
EpcRoofDescriptions.thatched_with_additional_insulation: thatched_efficiency,
# Sloping ceiling
EpcRoofDescriptions.sloping_pitched_insulated: sloping_ceiling_efficiency,
EpcRoofDescriptions.sloping_pitched_limited_insulation: sloping_ceiling_efficiency,
EpcRoofDescriptions.sloping_pitched_no_insulation: EpcEfficiency.VERY_POOR,
}
def resolve_roof_efficiency(
description: EpcRoofDescriptions,
age_band: EpcConstructionAgeBand | None,
insulation_thickness: int | None,
) -> EpcEfficiency:
"""
Resolve roof efficiency from description + age band + insulation thickness.
"""
# Unknown / holding descriptions → efficiency unknown
if description in description.unknown_descriptions:
return EpcEfficiency.NA
rule = ROOF_DESCRIPTION_EFFICIENCIES.get(description)
if rule is None:
return EpcEfficiency.NA
# Fixed efficiency
if isinstance(rule, EpcEfficiency):
return rule
# Callable rule
if age_band is None or pd.isnull(age_band):
return EpcEfficiency.NA
try:
# Try (thickness, age_band)
return rule(insulation_thickness, age_band)
except TypeError:
# Fallback to (age_band)
return rule(age_band)

View file

@ -0,0 +1,211 @@
from typing import Callable, Union
from collections.abc import Mapping
from datatypes.epc.walls import EpcWallDescriptions
from datatypes.epc.construction_age_band import EpcConstructionAgeBand
from datatypes.epc.efficiency import EpcEfficiency
# Unique combinations
wall_map = {
# Cavity walls
('Cavity', 'FilledCavity'): EpcWallDescriptions.cavity_filled_cavity,
('Cavity', 'Internal'): EpcWallDescriptions.cavity_internal_insulation,
('Cavity', 'External'): EpcWallDescriptions.cavity_external_insulation,
('Cavity', 'FilledCavityPlusInternal'): EpcWallDescriptions.cavity_filled_plus_internal,
('Cavity', 'FilledCavityPlusExternal'): EpcWallDescriptions.cavity_filled_plus_external,
('Cavity', 'AsBuilt'): None, # To be classified
('Cavity', 'Unknown'): None, # To be classified
# System built walls
('System', 'External'): EpcWallDescriptions.system_external_insulation,
('System', 'Internal'): EpcWallDescriptions.system_internal_insulation,
('System', 'AsBuilt'): None, # To be classified
('System', 'Unknown'): None,
# Timber Frame walls
('Timber Frame', 'Internal'): EpcWallDescriptions.timber_frame_internal_insulation,
('Timber Frame', 'External'): EpcWallDescriptions.timber_frame_external_insulation,
('Timber Frame', 'AsBuilt'): None, # To be classified
('Timber Frame', 'Unknown'): None,
# Solid Brick walls
('Solid Brick', 'External'): EpcWallDescriptions.solid_brick_external_insulation,
('Solid Brick', 'Internal'): EpcWallDescriptions.solid_brick_internal_insulation,
('Solid Brick', 'AsBuilt'): None, # To be classified
('Solid Brick', 'Unknown'): None,
# Granite walls
('Granite', 'External'): EpcWallDescriptions.granite_whinstone_external_insulation,
("Granite", 'Internal'): EpcWallDescriptions.granite_whinstone_internal_insulation,
('Granite', 'AsBuilt'): None,
('Granite', 'Unknown'): None,
# Sandstone walls
('Sandstone', 'Internal'): EpcWallDescriptions.sandstone_limestone_internal_insulation,
('Sandstone', 'External'): EpcWallDescriptions.sandstone_limestone_external_insulation,
('Sandstone', 'Unknown'): None,
('Sandstone', 'AsBuilt'): None,
# Cob walls
('Cob', 'AsBuilt'): None,
}
wall_unknown_age_fallback = {
"Cavity": EpcWallDescriptions.cavity_as_built_unknown,
"Solid Brick": EpcWallDescriptions.solid_brick_as_built_unknown,
"Timber Frame": EpcWallDescriptions.timber_frame_as_built_unknown,
"System": EpcWallDescriptions.system_as_built_unknown,
"Granite": EpcWallDescriptions.granite_as_built_unknown,
"Sandstone": EpcWallDescriptions.sandstone_as_built_unknown,
"Cob": EpcWallDescriptions.cob_as_built_unknown,
}
def cavity_filled_efficiency(age_band: EpcConstructionAgeBand) -> EpcEfficiency:
""""
Maps cavity filled to efficiency based on construction age band.
:param age_band: EpcConstructionAgeBand
:return: EpcEfficiency
"""
if age_band in {
EpcConstructionAgeBand.from_2023_onwards
}:
return EpcEfficiency.VERY_GOOD
return EpcEfficiency.GOOD
def internal_external_insulation_efficiency(
age_band: EpcConstructionAgeBand,
) -> EpcEfficiency:
"""
Maps:
- cavity unfilled with internal/external insulation to efficiency based on construction age band. We assumed
based on 100mm insulation
- solid brick with internal/external insulation to efficiency based on construction age band. We assumed
based on 100mm insulation
- system built with internal/external insulation to efficiency based on construction age band. We assumed
based on 100mm insulation
All of these wall types have the same behaviour in elmhurst
:param age_band: EpcConstructionAgeBand
:return: EpcEfficiency
"""
if age_band in {
EpcConstructionAgeBand.from_1983_to_1990,
EpcConstructionAgeBand.from_1991_to_1995,
EpcConstructionAgeBand.from_1996_to_2002,
EpcConstructionAgeBand.from_2003_to_2006,
EpcConstructionAgeBand.from_2007_to_2011,
EpcConstructionAgeBand.from_2012_to_2022,
EpcConstructionAgeBand.from_2023_onwards,
}:
return EpcEfficiency.VERY_GOOD
return EpcEfficiency.GOOD
def timber_granite_sandstone_internal_external_efficiency(age_band: EpcConstructionAgeBand) -> EpcEfficiency:
""""
Maps:
- timber frame with internal/external wall insulation to efficiency based on construction age band.
- sandstone/limestone with internal/external wall insulation to efficiency based on construction age band.
- granite/whinstone with internal/external wall insulation to efficiency based on construction age band.
:param age_band: EpcConstructionAgeBand
:return: EpcEfficiency
"""
if age_band in {
EpcConstructionAgeBand.from_2023_onwards
}:
return EpcEfficiency.VERY_GOOD
return EpcEfficiency.GOOD
WallEfficiencyRule = Union[
EpcEfficiency,
Callable[[EpcConstructionAgeBand, int | None], EpcEfficiency],
]
WALL_DESCRIPTION_EFFICIENCIES: Mapping[EpcWallDescriptions, WallEfficiencyRule] = {
# Note: all function mappings have been defined based on Elmhurst
# Cavity
# value mappings
EpcWallDescriptions.cavity_no_insulation_assumed: EpcEfficiency.POOR,
EpcWallDescriptions.cavity_partial_insulated_assumed: EpcEfficiency.AVERAGE,
EpcWallDescriptions.cavity_insulated_assumed: EpcEfficiency.GOOD,
EpcWallDescriptions.cavity_filled_plus_internal: EpcEfficiency.VERY_GOOD,
EpcWallDescriptions.cavity_filled_plus_external: EpcEfficiency.VERY_GOOD,
# function mappings
EpcWallDescriptions.cavity_filled_cavity: cavity_filled_efficiency,
EpcWallDescriptions.cavity_internal_insulation: internal_external_insulation_efficiency,
EpcWallDescriptions.cavity_external_insulation: internal_external_insulation_efficiency,
# Solid brick
# value mappings
EpcWallDescriptions.solid_brick_no_insulation_assumed: EpcEfficiency.POOR,
EpcWallDescriptions.solid_brick_partial_insulated_assumed: EpcEfficiency.AVERAGE,
EpcWallDescriptions.solid_brick_insulated_assumed: EpcEfficiency.GOOD,
# function mappings
EpcWallDescriptions.solid_brick_internal_insulation: internal_external_insulation_efficiency,
EpcWallDescriptions.solid_brick_external_insulation: internal_external_insulation_efficiency,
# System
# value mappings
EpcWallDescriptions.system_no_insulation_assumed: EpcEfficiency.POOR,
EpcWallDescriptions.system_partial_insulated_assumed: EpcEfficiency.AVERAGE,
EpcWallDescriptions.system_insulated_assumed: EpcEfficiency.GOOD,
# function mappings
EpcWallDescriptions.system_internal_insulation: internal_external_insulation_efficiency,
EpcWallDescriptions.system_external_insulation: internal_external_insulation_efficiency,
# Timber frame
# value mappings
EpcWallDescriptions.timber_frame_no_insulation_assumed: EpcEfficiency.POOR,
EpcWallDescriptions.timber_frame_partial_insulated_assumed: EpcEfficiency.AVERAGE,
EpcWallDescriptions.timber_frame_insulated_assumed: EpcEfficiency.GOOD,
# function mappings
EpcWallDescriptions.timber_frame_internal_insulation: timber_granite_sandstone_internal_external_efficiency,
EpcWallDescriptions.timber_frame_external_insulation: timber_granite_sandstone_internal_external_efficiency,
# Granite / whinstone
EpcWallDescriptions.granite_whinstone_no_insulation_assumed: EpcEfficiency.VERY_POOR,
EpcWallDescriptions.granite_whinstone_partial_insulated_assumed: EpcEfficiency.AVERAGE,
EpcWallDescriptions.granite_whinestone_insulated_assumed: EpcEfficiency.GOOD,
# function mappings
EpcWallDescriptions.granite_whinstone_internal_insulation: timber_granite_sandstone_internal_external_efficiency,
EpcWallDescriptions.granite_whinstone_external_insulation: timber_granite_sandstone_internal_external_efficiency,
# Sandstone / limestone
EpcWallDescriptions.sandstone_limestone_no_insulation_assumed: EpcEfficiency.VERY_POOR,
EpcWallDescriptions.sandstone_limestone_partial_insulated_assumed: EpcEfficiency.AVERAGE,
EpcWallDescriptions.sandstone_limestone_insulated_assumed: EpcEfficiency.GOOD,
# function mappings
EpcWallDescriptions.sandstone_limestone_internal_insulation: timber_granite_sandstone_internal_external_efficiency,
EpcWallDescriptions.sandstone_limestone_external_insulation: timber_granite_sandstone_internal_external_efficiency,
# Cob (special case)
EpcWallDescriptions.cob_as_built_average: EpcEfficiency.AVERAGE,
EpcWallDescriptions.cob_as_built_good: EpcEfficiency.GOOD,
# Unknown mappings which are unhandled
EpcWallDescriptions.cavity_as_built_unknown: EpcEfficiency.NA,
EpcWallDescriptions.solid_brick_as_built_unknown: EpcEfficiency.NA,
EpcWallDescriptions.system_as_built_unknown: EpcEfficiency.NA,
EpcWallDescriptions.timber_frame_as_built_unknown: EpcEfficiency.NA,
EpcWallDescriptions.granite_as_built_unknown: EpcEfficiency.NA,
EpcWallDescriptions.sandstone_as_built_unknown: EpcEfficiency.NA,
EpcWallDescriptions.cob_as_built_unknown: EpcEfficiency.NA,
}
def resolve_wall_efficiency(
description: EpcWallDescriptions,
age_band: EpcConstructionAgeBand,
) -> EpcEfficiency:
rule = WALL_DESCRIPTION_EFFICIENCIES[description]
if isinstance(rule, EpcEfficiency):
return rule
return rule(age_band)

View file

@ -1,6 +0,0 @@
parity_map = {
"Flat": "Flat",
"Maisonette": "Maisonette",
"Bungalow": "Bungalow",
"House": "House",
}

View file

@ -1,3 +0,0 @@
parity_map = {
}

View file

@ -1,93 +1,371 @@
import re
from tqdm import tqdm
import pandas as pd
from etl.epc.DataProcessor import construction_age_bounds_map
from backend.onboarders.mappings.property_type import parity_map as property_map
from backend.onboarders.mappings.age_band import party_map as age_band_map
from backend.onboarders.mappings.built_form import parity_map as built_form_map
def check_nulls(data, original_column, mapped_column):
# We only allow nulls if the oroginal value was null
null_vals = data[pd.isnull(data[mapped_column])]
if null_vals.empty:
return True
# We make sure all original values were null
assert pd.isnull(null_vals[original_column]).all(), (
f"Some values in {mapped_column} were not mapped, but original values were not null"
)
# Sample input data
data = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody "
"- Data Extracts for Domna.xlsx",
sheet_name="Sustainability"
from backend.onboarders.base import OnboarderBase
# Parity mappings
from backend.onboarders.mappings.parity.property_type import parity_map as property_map
from backend.onboarders.mappings.parity.age_band import parity_map as age_band_map
from backend.onboarders.mappings.parity.built_form import parity_map as built_form_map
from backend.onboarders.mappings.parity.walls import wall_map, wall_unknown_age_fallback, WALL_DESCRIPTION_EFFICIENCIES
from onboarders.mappings.parity.roof import roof_map, roof_unknown_age_fallback, resolve_roof_efficiency
from onboarders.mappings.parity.floor import floor_map
from onboarders.mappings.parity.heating import heating_map
from onboarders.mappings.parity.glazing import glazing_map
from backend.onboarders.mappings.parity.as_built_wall_classifiers import as_built_wall_classifiers
from backend.onboarders.mappings.parity.as_built_roof_classifiers import as_built_roof_classifiers
from backend.onboarders.mappings.parity.as_built_floor_classifiers import (
as_built_floor_classifiers, unknown_as_built_floor_classifiers
)
from datatypes.epc.roof import EpcRoofDescriptions
from datatypes.epc.floor import EpcFloorDescriptions
from datatypes.epc.construction_age_band import EpcConstructionAgeBand
from datatypes.epc.walls import EpcWallDescriptions
from datatypes.epc.efficiency import EpcEfficiency
# We want to map the parity fields to standard EPC references. This will allow us to
# 1) Estimate EPCs, more accurately
# 2) Patch incorrect EPCs with ease
# 3) Indicate already installed measures
# ------------ construction_age_band ------------
# Map to EPC age bands
# def construction_date_to_band(year):
# if pd.isnull(year):
# return None
# # Get the year from the date which is numpy datetime format
# for label, ranges in construction_age_bounds_map.items():
# if ranges["l"] <= year <= ranges["u"]:
# return label
# raise NotImplementedError("year out of bounds")
#
#
# data["construction_age_band"] = pd.to_datetime(data["Construction Date"]).dt.year.apply(construction_date_to_band)
data["construction_age_band"] = data["Construction Years"].map(age_band_map)
check_nulls(data, "Construction Years", "construction_age_band")
# ------------ property_type ------------
data["property_type"] = data["Type"].map(property_map)
assert pd.isnull(data["property_type"]).sum() == 0, "Some property types were not mapped"
# ------------ built_form ------------
data["built_form"] = data["Attachment"].map(built_form_map)
assert pd.isnull(data["built_form"]).sum() == 0, "Some built forms were not mapped"
# ------------ Wall Construction ------------
data["walls_combined"] = data["Wall Construction"] + "+" + data["Wall Insulation"].fillna("Unknown Insulation")
data["Wall Insulation"].value_counts()
data["Wall Construction"].value_counts()
as_built_map = {
"Cavity": {"insulated_age_bands": [], "partial_insulated_age_bands": []},
"Solid Brick": {"insulated_age_bands": [], "partial_insulated_age_bands": []},
"System": {"insulated_age_bands": [], "partial_insulated_age_bands": []},
"Timber Frame": {"insulated_age_bands": [], "partial_insulated_age_bands": []},
"Sandstone": {"insulated_age_bands": [], "partial_insulated_age_bands": []},
"Granite": {"insulated_age_bands": [], "partial_insulated_age_bands": []},
"Cob": {"insulated_age_bands": [], "partial_insulated_age_bands": []},
}
tqdm.pandas()
def map_wall_construction(wall_constuction, wall_insulation, construction_age_band):
if wall_insulation == "AsBuilt":
# Deduce based on wall construction and age band
bands = as_built_map.get(wall_constuction, None)
if bands is None:
raise NotImplementedError(f"Wall construction {wall_constuction} not in as built map")
class ParityOnboarder(OnboarderBase):
# We check if the age band is in insulated or partial insulated, and if neither, we assume uninsulated
def __init__(
self,
fileuri: str,
file_format: str,
**kwargs
):
# Extract bucket, and filekey; Will be in the format s3://bucket/key
self.bucket_name = fileuri.split("/")[2]
self.input_file_name = "/".join(fileuri.split("/")[3:])
# Also prepare output file name
self.output_file_name = self.input_file_name.replace("." + file_format, "") + "_transformed.csv"
# Variables we want to map
# 'Org Ref', 'Address 1', 'Address 2', 'Address 3', 'Postcode', 'Type',
# 'Attachment', 'Construction Years', 'Wall Construction',
# 'Wall Insulation', 'Roof Construction', 'Roof Insulation',
# 'Floor Construction', 'Floor Insulation', 'Glazing', 'Heating',
# 'Boiler Efficiency', 'Main Fuel', 'Controls Adequacy', 'UPRN',
# 'Total Floor Area (m2)'
self.read_s3(file_format=file_format, **kwargs)
pass
def map_construction_age_band(self):
self.data[self.landlord_construction_age_band] = self.data["Construction Years"].map(age_band_map)
self.assert_nulls_only_from_source_nulls(
self.data, "Construction Years", self.landlord_construction_age_band
)
def map_property_type(self):
self.data[self.landlord_property_type] = self.data["Type"].map(property_map)
self.assert_no_nulls(self.data, self.landlord_property_type)
def map_built_form(self):
self.data[self.landlord_built_form] = self.data["Attachment"].map(built_form_map)
self.assert_no_nulls(self.data, self.landlord_built_form)
@staticmethod
def _fill_wall_as_built(row: pd.Series) -> EpcWallDescriptions | None:
"""
Utility function, used by map_wall_construction in parity transformation module
:param row: row of input sustainability data, being transformed
:return: EpcWallDescriptions, the as built wall description for the input row, based on the wall construction
type and age band
"""
# Already resolved via direct mapping
if row.landlord_wall_construction is not None:
return row.landlord_wall_construction
wall_type = row["Wall Construction"]
# Missing construction age → conservative fallback
if pd.isnull(row.landlord_construction_age_band):
return wall_unknown_age_fallback.get(wall_type)
classifier = as_built_wall_classifiers.get(wall_type)
if classifier is None:
return None
return classifier(row.landlord_construction_age_band)
@staticmethod
def _resolve_wall_efficiency(
description: EpcWallDescriptions,
age_band: EpcConstructionAgeBand | None,
) -> EpcEfficiency:
# Unknown / holding descriptions → efficiency unknown
if "unknown insulation" in description.value.lower():
return EpcEfficiency.NA
rule = WALL_DESCRIPTION_EFFICIENCIES.get(description)
if rule is None:
return EpcEfficiency.NA
if isinstance(rule, EpcEfficiency):
return rule
# Rule needs age band but we don't have one
if age_band is None or pd.isnull(age_band):
return EpcEfficiency.NA
return rule(age_band)
def map_wall_construction(self):
self.data[self.landlord_wall_construction] = (
self.data[["Wall Construction", "Wall Insulation"]]
.apply(tuple, axis=1)
.map(wall_map)
)
self.data[self.landlord_wall_construction] = self.data.progress_apply(self._fill_wall_as_built, axis=1)
# Sanity check
self.assert_no_nulls(self.data, self.landlord_wall_construction)
self.data[self.landlord_wall_efficiency] = self.data.progress_apply(
lambda row: self._resolve_wall_efficiency(
row.landlord_wall_construction,
row.landlord_construction_age_band,
),
axis=1,
)
# Additional santify check
self.assert_no_nulls(self.data, self.landlord_wall_efficiency)
@staticmethod
def _fill_roof_as_built(row: pd.Series) -> EpcRoofDescriptions | None:
# Already resolved
if not pd.isnull(row.landlord_roof_construction):
return row.landlord_roof_construction
roof_type = row["Roof Construction"]
classifier = as_built_roof_classifiers.get(roof_type)
if classifier is None:
raise NotImplementedError(f"No roof classifier for roof type '{roof_type}'")
if pd.isnull(row.landlord_construction_age_band):
return roof_unknown_age_fallback.get(roof_type)
output = classifier(row.landlord_construction_age_band)
if output is None:
raise NotImplementedError(
f"Roof classification returned None for roof type '{roof_type}'"
)
return output
@staticmethod
def _extract_insulation_thickness(value: str | None) -> int | None:
"""
Extract insulation thickness in mm from a string like 'mm150'.
Returns None if not present or not parseable.
"""
if value is None or pd.isnull(value):
return None
match = re.search(r"(\d+)", str(value))
if not match:
return None
return int(match.group(1))
def map_roof_construction(self):
self.data[self.landlord_roof_construction] = (
self.data[["Roof Construction", "Roof Insulation"]]
.progress_apply(tuple, axis=1)
.map(roof_map)
)
self.data[self.landlord_roof_construction] = self.data.progress_apply(
self._fill_roof_as_built,
axis=1,
)
# sanity check
self.assert_no_nulls(self.data, self.landlord_roof_construction)
self.data["roof_insulation_thickness_mm"] = self.data["Roof Insulation"].apply(
self._extract_insulation_thickness
)
self.data[self.landlord_roof_efficiency] = self.data.progress_apply(
lambda row: resolve_roof_efficiency(
description=row.landlord_roof_construction,
age_band=row.landlord_construction_age_band,
insulation_thickness=row.roof_insulation_thickness_mm,
),
axis=1,
)
# sanity check
self.assert_no_nulls(self.data, self.landlord_roof_efficiency)
# Flag sloping ceiling
self.data[self.landlord_has_sloping_ceiling] = self.data["Roof Construction"].apply(
lambda x: x == "PitchedWithSlopingCeiling"
)
@staticmethod
def _fill_floor_as_built(row: pd.Series):
# 1. Already resolved
if row.landlord_floor_construction is not None:
return row.landlord_floor_construction
age_band = row.landlord_construction_age_band
floor_type = row["Floor Construction"]
insulation = row["Floor Insulation"]
# 2. Missing age band → conservative fallback
if pd.isnull(age_band):
return EpcFloorDescriptions.unknown
# 3. Known floor types
if floor_type in ["Solid", "SuspendedTimber", "SuspendedNotTimber"]:
classifier = as_built_floor_classifiers[floor_type]
return classifier(age_band)
# 4. Unknown floor type
if floor_type == "Unknown":
classifier = unknown_as_built_floor_classifiers[insulation]
return classifier(age_band)
# 5. Truly missing / garbage input
return EpcFloorDescriptions.unknown
def map_floor_construction(self):
self.data[self.landlord_floor_construction] = (
self.data[["Floor Construction", "Floor Insulation"]]
.progress_apply(tuple, axis=1)
.map(floor_map)
)
self.data[self.landlord_floor_construction] = self.data.progress_apply(
self._fill_floor_as_built,
axis=1,
)
self.assert_no_nulls(self.data, self.landlord_floor_construction)
def map_glazing(self):
# TODO: probably doesn't make sense to store multi glazed proportion, glazed type or glazed area.
# There is maybe an argument for landlord_multi_glaze_proportion as this could be variable,
# however
self.data[
[
self.landlord_windows_type,
self.landlord_windows_efficiency,
self.landlord_multi_glaze_proportion,
self.landlord_glazed_type,
self.landlord_glazed_area
]
] = self.data["Glazing"].map(glazing_map).progress_apply(pd.Series)
def map_heating(self):
# TODO - when mapping heating controls, we should check the existing heating controls and the efficiency rating
# For sub optimal heating controls, we're going to make an assumption as to what the heating controls are
# and the energy efficiency rating we prescribe here may not be accurate. We therefore use this as an
# upper limit
# as opposed to a guaranteed efficiency rating. To stress, this is only relevant for sub optimal heating
# controls. E.g. it may be programmer and room thermostat
self.data[
[
self.landlord_heating_construction,
self.landlord_heating_efficiency,
self.landlord_fuel_type,
self.landlord_heating_controls,
self.landlord_heating_controls_efficiency,
self.landlord_hot_water_system,
self.landlord_hot_water_efficiency
]
] = self.data[
[
"Heating",
"Boiler Efficiency",
"Main Fuel",
"Controls Adequacy"
]
].progress_apply(tuple, axis=1).map(heating_map).progress_apply(pd.Series)
def map_floor_area(self):
# This is just a rename
self.data = self.data.rename(
columns={"Total Floor Area (m2)": self.landlord_total_floor_area_m2}
)
def select_columns(self):
self.data = self.data[
[
"Org Ref",
"UPRN",
"Address 1",
"Address 2",
"Address 3",
"Postcode",
self.landlord_total_floor_area_m2,
self.landlord_construction_age_band,
self.landlord_property_type,
self.landlord_built_form,
self.landlord_wall_construction,
self.landlord_wall_efficiency,
self.landlord_roof_construction,
self.landlord_roof_efficiency,
self.landlord_has_sloping_ceiling,
self.landlord_floor_construction,
self.landlord_windows_type,
self.landlord_windows_efficiency,
self.landlord_multi_glaze_proportion,
self.landlord_glazed_type,
self.landlord_glazed_area,
self.landlord_heating_construction,
self.landlord_heating_efficiency,
self.landlord_fuel_type,
self.landlord_heating_controls,
self.landlord_heating_controls_efficiency,
self.landlord_hot_water_system,
self.landlord_hot_water_efficiency
]
].rename(
columns={
"Org Ref": "landlord_property_id",
"Address1": "address1",
"Address2": "address2",
"Address3": "address3",
"Postcode": "postcode",
}
)
def extract_values(self):
for columns in [
self.landlord_construction_age_band, self.landlord_property_type, self.landlord_built_form,
self.landlord_wall_construction, self.landlord_wall_efficiency, self.landlord_roof_construction,
self.landlord_roof_efficiency, self.landlord_floor_construction, self.landlord_windows_type,
self.landlord_windows_efficiency, self.landlord_heating_construction, self.landlord_heating_efficiency,
self.landlord_fuel_type, self.landlord_heating_controls, self.landlord_heating_controls_efficiency,
self.landlord_hot_water_system, self.landlord_hot_water_efficiency
]:
self.data[columns] = self.data[columns].progress_apply(lambda x: x.value if hasattr(x, "value") else x)
def transform(self):
# ------------ construction_age_band ------------
self.map_construction_age_band()
# ------------ property_type ------------
self.map_property_type()
# ------------ built_form ------------
self.map_built_form()
# ------------ Wall Construction ------------
self.map_wall_construction()
# ------------ Roof Construction ------------
self.map_roof_construction()
# ------------ Floor Construction ------------
self.map_floor_construction()
# ------------ Glazing ------------
self.map_glazing()
# ------------ Heating, fuel, controls & hot water ------------
self.map_heating()
# ------------ Floor Area ------------
self.map_floor_area()
# ------------ Formating ------------
self.select_columns()
self.extract_values()

View file

@ -0,0 +1,6 @@
boto3
numpy==2.1.2
pandas==2.2.3
tqdm==4.66.5
pydantic==2.9.2
openpyxl==3.1.2

View file

@ -0,0 +1,97 @@
import pytest
from datatypes.epc.construction_age_band import EpcConstructionAgeBand
from datatypes.epc.floor import EpcFloorDescriptions
from backend.onboarders.mappings.parity.as_built_floor_classifiers import (
unknown_floor_as_built,
unknown_floor_retrofitted,
map_solid_floor_as_built,
map_suspended_floor_as_built,
)
@pytest.mark.parametrize(
"age_band,expected",
[
# Before 1900 / 19001929 → suspended, no insulation
(EpcConstructionAgeBand.before_1900, EpcFloorDescriptions.suspended_no_insulation_assumed),
(EpcConstructionAgeBand.from_1900_to_1929, EpcFloorDescriptions.suspended_no_insulation_assumed),
# 19301995 → solid, no insulation
(EpcConstructionAgeBand.from_1930_to_1949, EpcFloorDescriptions.solid_no_insulation_assumed),
(EpcConstructionAgeBand.from_1950_to_1966, EpcFloorDescriptions.solid_no_insulation_assumed),
(EpcConstructionAgeBand.from_1967_to_1975, EpcFloorDescriptions.solid_no_insulation_assumed),
(EpcConstructionAgeBand.from_1976_to_1982, EpcFloorDescriptions.solid_no_insulation_assumed),
(EpcConstructionAgeBand.from_1983_to_1990, EpcFloorDescriptions.solid_no_insulation_assumed),
(EpcConstructionAgeBand.from_1991_to_1995, EpcFloorDescriptions.solid_no_insulation_assumed),
# 19962002 → solid, limited insulation
(EpcConstructionAgeBand.from_1996_to_2002, EpcFloorDescriptions.solid_limited_insulation_assumed),
# 2003+ → solid, insulated
(EpcConstructionAgeBand.from_2003_to_2006, EpcFloorDescriptions.solid_insulated_assumed),
(EpcConstructionAgeBand.from_2012_to_2022, EpcFloorDescriptions.solid_insulated_assumed),
(EpcConstructionAgeBand.from_2023_onwards, EpcFloorDescriptions.solid_insulated_assumed),
],
)
def test_unknown_floor_as_built(age_band, expected):
assert unknown_floor_as_built(age_band) == expected
@pytest.mark.parametrize(
"age_band,expected",
[
# Pre-1930 → suspended, insulated
(EpcConstructionAgeBand.before_1900, EpcFloorDescriptions.suspended_insulated),
(EpcConstructionAgeBand.from_1900_to_1929, EpcFloorDescriptions.suspended_insulated),
# 1930+ → solid, insulated
(EpcConstructionAgeBand.from_1930_to_1949, EpcFloorDescriptions.solid_insulated),
(EpcConstructionAgeBand.from_1950_to_1966, EpcFloorDescriptions.solid_insulated),
(EpcConstructionAgeBand.from_1976_to_1982, EpcFloorDescriptions.solid_insulated),
(EpcConstructionAgeBand.from_2023_onwards, EpcFloorDescriptions.solid_insulated),
],
)
def test_unknown_floor_retrofitted(age_band, expected):
assert unknown_floor_retrofitted(age_band) == expected
@pytest.mark.parametrize(
"age_band,expected",
[
# 19831995 → no insulation
(EpcConstructionAgeBand.from_1983_to_1990, EpcFloorDescriptions.solid_no_insulation_assumed),
(EpcConstructionAgeBand.from_1991_to_1995, EpcFloorDescriptions.solid_no_insulation_assumed),
# 19962002 → limited insulation
(EpcConstructionAgeBand.from_1996_to_2002, EpcFloorDescriptions.solid_limited_insulation_assumed),
# 2003+ → insulated
(EpcConstructionAgeBand.from_2003_to_2006, EpcFloorDescriptions.solid_insulated_assumed),
(EpcConstructionAgeBand.from_2012_to_2022, EpcFloorDescriptions.solid_insulated_assumed),
(EpcConstructionAgeBand.from_2023_onwards, EpcFloorDescriptions.solid_insulated_assumed),
],
)
def test_solid_floor_as_built(age_band, expected):
assert map_solid_floor_as_built(age_band) == expected
@pytest.mark.parametrize(
"age_band,expected",
[
# 19831995 → no insulation
(EpcConstructionAgeBand.from_1983_to_1990, EpcFloorDescriptions.suspended_no_insulation_assumed),
(EpcConstructionAgeBand.from_1991_to_1995, EpcFloorDescriptions.suspended_no_insulation_assumed),
# 19962002 → limited insulation
(EpcConstructionAgeBand.from_1996_to_2002, EpcFloorDescriptions.suspended_limited_insulation_assumed),
# 2003+ → insulated
(EpcConstructionAgeBand.from_2003_to_2006, EpcFloorDescriptions.suspended_insulated_assumed),
(EpcConstructionAgeBand.from_2012_to_2022, EpcFloorDescriptions.suspended_insulated_assumed),
(EpcConstructionAgeBand.from_2023_onwards, EpcFloorDescriptions.suspended_insulated_assumed),
],
)
def test_suspended_floor_as_built(age_band, expected):
assert map_suspended_floor_as_built(age_band) == expected

View file

@ -0,0 +1,173 @@
import pytest
from datatypes.epc.construction_age_band import EpcConstructionAgeBand
from datatypes.epc.roof import EpcRoofDescriptions
from datatypes.epc.efficiency import EpcEfficiency
from backend.onboarders.mappings.parity.as_built_roof_classifiers import (
map_flat_roof,
map_sloping_ceiling_roof,
)
from backend.onboarders.mappings.parity.roof import resolve_roof_efficiency
# ---------------------------------------------------------------------
# As-built roof description classification
# ---------------------------------------------------------------------
@pytest.mark.parametrize(
"age_band, expected",
[
(EpcConstructionAgeBand.before_1900, EpcRoofDescriptions.flat_no_insulation),
(EpcConstructionAgeBand.from_1950_to_1966, EpcRoofDescriptions.flat_no_insulation),
(EpcConstructionAgeBand.from_1967_to_1975, EpcRoofDescriptions.flat_limited_insulation),
(EpcConstructionAgeBand.from_1976_to_1982, EpcRoofDescriptions.flat_limited_insulation),
(EpcConstructionAgeBand.from_1983_to_1990, EpcRoofDescriptions.flat_insulated),
(EpcConstructionAgeBand.from_2007_to_2011, EpcRoofDescriptions.flat_insulated),
(EpcConstructionAgeBand.from_2023_onwards, EpcRoofDescriptions.flat_insulated),
],
)
def test_classify_flat_roof(age_band, expected):
assert map_flat_roof(age_band) == expected
@pytest.mark.parametrize(
"age_band, expected",
[
(EpcConstructionAgeBand.before_1900, EpcRoofDescriptions.sloping_pitched_no_insulation),
(EpcConstructionAgeBand.from_1967_to_1975, EpcRoofDescriptions.sloping_pitched_no_insulation),
(EpcConstructionAgeBand.from_1976_to_1982, EpcRoofDescriptions.sloping_pitched_limited_insulation),
(EpcConstructionAgeBand.from_1983_to_1990, EpcRoofDescriptions.sloping_pitched_insulated),
(EpcConstructionAgeBand.from_2012_to_2022, EpcRoofDescriptions.sloping_pitched_insulated),
(EpcConstructionAgeBand.from_2023_onwards, EpcRoofDescriptions.sloping_pitched_insulated),
],
)
def test_classify_sloping_ceiling_roof(age_band, expected):
assert map_sloping_ceiling_roof(age_band) == expected
# ---------------------------------------------------------------------
# Roof efficiency — fixed & age-band driven
# ---------------------------------------------------------------------
@pytest.mark.parametrize(
"description, age_band, expected",
[
# Flat roof, no insulation
(EpcRoofDescriptions.flat_no_insulation, EpcConstructionAgeBand.before_1900, EpcEfficiency.VERY_POOR),
# Flat roof, limited insulation (age-band driven)
(EpcRoofDescriptions.flat_limited_insulation, EpcConstructionAgeBand.from_1976_to_1982, EpcEfficiency.POOR),
(
EpcRoofDescriptions.flat_limited_insulation, EpcConstructionAgeBand.from_1967_to_1975,
EpcEfficiency.VERY_POOR),
# Flat roof, insulated (age-band driven)
(EpcRoofDescriptions.flat_insulated, EpcConstructionAgeBand.from_1983_to_1990, EpcEfficiency.AVERAGE),
(EpcRoofDescriptions.flat_insulated, EpcConstructionAgeBand.from_2003_to_2006, EpcEfficiency.GOOD),
(EpcRoofDescriptions.flat_insulated, EpcConstructionAgeBand.from_2023_onwards, EpcEfficiency.VERY_GOOD),
# Pitched, insulated assumed (loft)
(EpcRoofDescriptions.pitched_insulated_assumed, EpcConstructionAgeBand.from_1996_to_2002, EpcEfficiency.GOOD),
(EpcRoofDescriptions.pitched_insulated_assumed, EpcConstructionAgeBand.from_2007_to_2011,
EpcEfficiency.VERY_GOOD),
],
)
def test_roof_efficiency_age_band_only(description, age_band, expected):
assert resolve_roof_efficiency(
description=description,
age_band=age_band,
insulation_thickness=None,
) == expected
# ---------------------------------------------------------------------
# Roof efficiency — insulation thickness driven
# ---------------------------------------------------------------------
@pytest.mark.parametrize(
"description, thickness, expected",
[
# Loft insulation
(EpcRoofDescriptions.loft_12mm_insulation, 12, EpcEfficiency.VERY_POOR),
(EpcRoofDescriptions.loft_25mm_insulation, 25, EpcEfficiency.POOR),
(EpcRoofDescriptions.loft_75mm_insulation, 75, EpcEfficiency.AVERAGE),
(EpcRoofDescriptions.loft_150mm_insulation, 150, EpcEfficiency.GOOD),
(EpcRoofDescriptions.loft_300mm_insulation, 300, EpcEfficiency.VERY_GOOD),
# Flat insulated — thickness overrides age band
(EpcRoofDescriptions.flat_insulated, 50, EpcEfficiency.POOR),
(EpcRoofDescriptions.flat_insulated, 100, EpcEfficiency.AVERAGE),
(EpcRoofDescriptions.flat_insulated, 200, EpcEfficiency.GOOD),
(EpcRoofDescriptions.flat_insulated, 300, EpcEfficiency.VERY_GOOD),
# Sloping ceiling
(EpcRoofDescriptions.sloping_pitched_insulated, 75, EpcEfficiency.AVERAGE),
(EpcRoofDescriptions.sloping_pitched_insulated, 150, EpcEfficiency.GOOD),
(EpcRoofDescriptions.sloping_pitched_insulated, 350, EpcEfficiency.VERY_GOOD),
],
)
def test_roof_efficiency_thickness_based(description, thickness, expected):
assert resolve_roof_efficiency(
description=description,
age_band=EpcConstructionAgeBand.before_1900, # should be ignored
insulation_thickness=thickness,
) == expected
# ---------------------------------------------------------------------
# Thatched roofs
# ---------------------------------------------------------------------
@pytest.mark.parametrize(
"description, age_band, expected",
[
(EpcRoofDescriptions.thatched, EpcConstructionAgeBand.before_1900, EpcEfficiency.AVERAGE),
(EpcRoofDescriptions.thatched, EpcConstructionAgeBand.from_2003_to_2006, EpcEfficiency.GOOD),
(EpcRoofDescriptions.thatched, EpcConstructionAgeBand.from_2023_onwards, EpcEfficiency.VERY_GOOD),
],
)
def test_thatched_efficiency_age_band(description, age_band, expected):
assert resolve_roof_efficiency(
description=description,
age_band=age_band,
insulation_thickness=None,
) == expected
@pytest.mark.parametrize(
"thickness, expected",
[
(12, EpcEfficiency.AVERAGE),
(50, EpcEfficiency.GOOD),
(150, EpcEfficiency.GOOD),
(200, EpcEfficiency.VERY_GOOD),
],
)
def test_thatched_efficiency_thickness(thickness, expected):
assert resolve_roof_efficiency(
description=EpcRoofDescriptions.thatched_with_additional_insulation,
age_band=EpcConstructionAgeBand.before_1900,
insulation_thickness=thickness,
) == expected
# ---------------------------------------------------------------------
# Unknown / holding descriptions
# ---------------------------------------------------------------------
@pytest.mark.parametrize(
"description",
[
EpcRoofDescriptions.flat_as_built_unknown,
EpcRoofDescriptions.loft_as_built_unknown,
EpcRoofDescriptions.thatched_as_built_unknown,
EpcRoofDescriptions.sloping_pitched_as_built_unknown,
],
)
def test_unknown_roof_descriptions_return_na(description):
assert resolve_roof_efficiency(
description=description,
age_band=None,
insulation_thickness=None,
) == EpcEfficiency.NA

View file

@ -0,0 +1,161 @@
import pytest
from datatypes.epc.construction_age_band import EpcConstructionAgeBand
from datatypes.epc.walls import EpcWallDescriptions
from datatypes.epc.efficiency import EpcEfficiency
from backend.onboarders.mappings.parity.walls import resolve_wall_efficiency
from backend.onboarders.mappings.parity.as_built_wall_classifiers import (
map_cavity_wall_insulation,
map_solid_wall_insulation,
map_timber_frame_wall_insulation,
map_system_build_wall_insulation,
map_granite_wall_insulation,
map_sandstone_wall_insulation,
map_cob_wall_insulation,
)
# ---------------------------------------------------------------------
# As-built wall description classification
# ---------------------------------------------------------------------
@pytest.mark.parametrize(
"age_band, expected",
[
(EpcConstructionAgeBand.before_1900, EpcWallDescriptions.cavity_no_insulation_assumed),
(EpcConstructionAgeBand.from_1950_to_1966, EpcWallDescriptions.cavity_no_insulation_assumed),
(EpcConstructionAgeBand.from_1976_to_1982, EpcWallDescriptions.cavity_partial_insulated_assumed),
(EpcConstructionAgeBand.from_1983_to_1990, EpcWallDescriptions.cavity_insulated_assumed),
(EpcConstructionAgeBand.from_2023_onwards, EpcWallDescriptions.cavity_insulated_assumed),
],
)
def test_map_cavity_wall_insulation(age_band, expected):
assert map_cavity_wall_insulation(age_band) == expected
@pytest.mark.parametrize(
"age_band, expected",
[
(EpcConstructionAgeBand.before_1900, EpcWallDescriptions.solid_brick_no_insulation_assumed),
(EpcConstructionAgeBand.from_1976_to_1982, EpcWallDescriptions.solid_brick_partial_insulated_assumed),
(EpcConstructionAgeBand.from_1996_to_2002, EpcWallDescriptions.solid_brick_insulated_assumed),
],
)
def test_map_solid_wall_insulation(age_band, expected):
assert map_solid_wall_insulation(age_band) == expected
@pytest.mark.parametrize(
"age_band, expected",
[
(EpcConstructionAgeBand.before_1900, EpcWallDescriptions.timber_frame_no_insulation_assumed),
(EpcConstructionAgeBand.from_1950_to_1966, EpcWallDescriptions.timber_frame_partial_insulated_assumed),
(EpcConstructionAgeBand.from_1983_to_1990, EpcWallDescriptions.timber_frame_insulated_assumed),
],
)
def test_map_timber_frame_wall_insulation(age_band, expected):
assert map_timber_frame_wall_insulation(age_band) == expected
@pytest.mark.parametrize(
"age_band, expected",
[
(EpcConstructionAgeBand.before_1900, EpcWallDescriptions.system_no_insulation_assumed),
(EpcConstructionAgeBand.from_1976_to_1982, EpcWallDescriptions.system_partial_insulated_assumed),
(EpcConstructionAgeBand.from_2003_to_2006, EpcWallDescriptions.system_insulated_assumed),
],
)
def test_map_system_wall_insulation(age_band, expected):
assert map_system_build_wall_insulation(age_band) == expected
@pytest.mark.parametrize(
"age_band, expected",
[
(EpcConstructionAgeBand.before_1900, EpcWallDescriptions.granite_whinstone_no_insulation_assumed),
(EpcConstructionAgeBand.from_1976_to_1982, EpcWallDescriptions.granite_whinstone_partial_insulated_assumed),
(EpcConstructionAgeBand.from_2012_to_2022, EpcWallDescriptions.granite_whinestone_insulated_assumed),
],
)
def test_map_granite_wall_insulation(age_band, expected):
assert map_granite_wall_insulation(age_band) == expected
@pytest.mark.parametrize(
"age_band, expected",
[
(EpcConstructionAgeBand.before_1900, EpcWallDescriptions.sandstone_limestone_no_insulation_assumed),
(EpcConstructionAgeBand.from_1976_to_1982, EpcWallDescriptions.sandstone_limestone_partial_insulated_assumed),
(EpcConstructionAgeBand.from_2007_to_2011, EpcWallDescriptions.sandstone_limestone_insulated_assumed),
],
)
def test_map_sandstone_wall_insulation(age_band, expected):
assert map_sandstone_wall_insulation(age_band) == expected
@pytest.mark.parametrize(
"age_band, expected",
[
(EpcConstructionAgeBand.before_1900, EpcWallDescriptions.cob_as_built_average),
(EpcConstructionAgeBand.from_1976_to_1982, EpcWallDescriptions.cob_as_built_average),
(EpcConstructionAgeBand.from_1983_to_1990, EpcWallDescriptions.cob_as_built_good),
],
)
def test_map_cob_wall_insulation(age_band, expected):
assert map_cob_wall_insulation(age_band) == expected
# ---------------------------------------------------------------------
# Wall efficiency resolution
# ---------------------------------------------------------------------
@pytest.mark.parametrize(
"description, age_band, expected",
[
# Fixed efficiencies
(EpcWallDescriptions.cavity_no_insulation_assumed, None, EpcEfficiency.POOR),
(EpcWallDescriptions.cavity_partial_insulated_assumed, None, EpcEfficiency.AVERAGE),
(EpcWallDescriptions.cavity_insulated_assumed, None, EpcEfficiency.GOOD),
# Function-based efficiencies
(
EpcWallDescriptions.cavity_filled_cavity,
EpcConstructionAgeBand.from_2023_onwards,
EpcEfficiency.VERY_GOOD,
),
(
EpcWallDescriptions.cavity_filled_cavity,
EpcConstructionAgeBand.from_1991_to_1995,
EpcEfficiency.GOOD,
),
(
EpcWallDescriptions.solid_brick_internal_insulation,
EpcConstructionAgeBand.from_2003_to_2006,
EpcEfficiency.VERY_GOOD,
),
(
EpcWallDescriptions.solid_brick_internal_insulation,
EpcConstructionAgeBand.from_1950_to_1966,
EpcEfficiency.GOOD,
),
],
)
def test_resolve_wall_efficiency(description, age_band, expected):
assert resolve_wall_efficiency(description, age_band) == expected
@pytest.mark.parametrize(
"description",
[
EpcWallDescriptions.cavity_as_built_unknown,
EpcWallDescriptions.solid_brick_as_built_unknown,
EpcWallDescriptions.system_as_built_unknown,
EpcWallDescriptions.timber_frame_as_built_unknown,
EpcWallDescriptions.granite_as_built_unknown,
EpcWallDescriptions.sandstone_as_built_unknown,
EpcWallDescriptions.cob_as_built_unknown,
],
)
def test_unknown_wall_descriptions_return_na(description):
assert resolve_wall_efficiency(description, None) == EpcEfficiency.NA

View file

@ -0,0 +1,9 @@
FROM public.ecr.aws/lambda/python:3.10
# Set working directory (Lambda task root)
WORKDIR /var/task
# -----------------------------
# Lambda handler
# -----------------------------
CMD ["main.handler"]

View file

@ -1,10 +1,12 @@
import pandas as pd
import requests
from backend.address2UPRN.main import resolve_uprns_for_postcode_group, get_epc_data_with_postcode
from backend.address2UPRN.main import (
resolve_uprns_for_postcode_group,
get_epc_data_with_postcode,
)
from tqdm import tqdm
def sanitise_postcode(postcode: str) -> str | None:
"""
Normalise postcode for grouping.
@ -51,11 +53,7 @@ def main():
# --- validate AFTER grouping (save API calls) ---
# Get unique, non-null postcodes
unique_postcodes = (
df["postcode_clean"]
.dropna()
.unique()
)
unique_postcodes = df["postcode_clean"].dropna().unique()
# Validate each postcode once, TODOadd a progress bar
postcode_validity = {
@ -66,7 +64,6 @@ def main():
# Map validity back onto dataframe
df["postcode_valid"] = df["postcode_clean"].map(postcode_validity)
results = []
for postcode, group_df in tqdm(
@ -98,17 +95,33 @@ def main():
results.append(tmp)
final_df = pd.concat(results, ignore_index=True)
a = final_df[[
"best_match_lexiscore","Address 1",
"best_match_address", "Postcode",
"UPRN", "best_match_uprn"
]] # add levi score to viewing
b = final_df[final_df["best_match_lexiscore"]>0] # add levi score to viewing
b = b[[
"best_match_lexiscore","Address 1",
"best_match_address", "Postcode",
"UPRN", "best_match_uprn"
]]
a = final_df[
[
"best_match_lexiscore",
"Address 1",
"best_match_address",
"Postcode",
"UPRN",
"best_match_uprn",
]
] # add levi score to viewing
b = final_df[final_df["best_match_lexiscore"] > 0] # add levi score to viewing
b = b[
[
"best_match_lexiscore",
"Address 1",
"best_match_address",
"Postcode",
"UPRN",
"best_match_uprn",
]
]
def handler(event, context):
print("hello Postcode splitter world")
return {"statusCode": 200, "body": "hello world"}
if __name__ == "__main__":
main()

View file

View file

@ -0,0 +1,45 @@
import re
from enum import Enum
from typing import List
class EpcConstructionAgeBand(Enum):
before_1900: str = 'England and Wales: before 1900'
from_1900_to_1929: str = 'England and Wales: 1900-1929'
from_1930_to_1949: str = 'England and Wales: 1930-1949'
from_1950_to_1966: str = 'England and Wales: 1950-1966'
from_1967_to_1975: str = 'England and Wales: 1967-1975'
from_1976_to_1982: str = 'England and Wales: 1976-1982'
from_1983_to_1990: str = 'England and Wales: 1983-1990'
from_1991_to_1995: str = 'England and Wales: 1991-1995'
from_1996_to_2002: str = 'England and Wales: 1996-2002'
from_2003_to_2006: str = 'England and Wales: 2003-2006'
from_2007_to_2011: str = 'England and Wales: 2007-2011'
from_2012_onwards: str = 'England and Wales: 2012-onwards'
from_2012_to_2022: str = 'England and Wales: 2012-2022'
from_2023_onwards: str = 'England and Wales: 2023 onwards'
def start_year(self) -> int:
"""
Extract the starting year of the age band.
"""
value = self.value.lower()
if 'before' in value:
return 0
match = re.search(r'(\d{4})', value)
if not match:
raise ValueError(f"Cannot determine start year from '{self.value}'")
return int(match.group(1))
@classmethod
def from_year_onwards(cls, year: int) -> List["EpcConstructionAgeBand"]:
"""
Return all age bands whose starting year is >= the given year.
"""
return [
band
for band in cls
if band.start_year() >= year
]

View file

@ -0,0 +1,10 @@
from enum import Enum
class EpcEfficiency(Enum):
VERY_POOR: str = "Very Poor"
POOR: str = "Poor"
AVERAGE: str = "Average"
GOOD: str = "Good"
VERY_GOOD: str = "Very Good"
NA: str = "N/A"

17
datatypes/epc/floor.py Normal file
View file

@ -0,0 +1,17 @@
from enum import Enum
class EpcFloorDescriptions(Enum):
# Solid floor
solid_insulated = "Solid, insulated"
solid_insulated_assumed = "Solid, insulated (assumed)"
solid_no_insulation_assumed = "Solid, no insulation (assumed)"
solid_limited_insulation_assumed = "Solid, limited insulation (assumed)"
# Suspended floor
suspended_insulated = "Suspended, insulated"
suspended_insulated_assumed = "Suspended, insulated (assumed)"
suspended_no_insulation_assumed = "Suspended, no insulation (assumed)"
suspended_limited_insulation_assumed = "Suspended, limited insulation (assumed)"
unknown = None # We don't resolve anything

10
datatypes/epc/fuel.py Normal file
View file

@ -0,0 +1,10 @@
from enum import Enum
class EpcFuel(Enum):
electricity_not_community = "electricity (not community)"
lpg_not_community = "LPG (not community)"
mains_gas_not_community = "mains gas (not community)"
oil_not_community = "oil (not community)"
manufactured_smokeless_fuel = "Solid fuel: manufactured smokeless fuel"
smokeless_coal = "smokeless coal"

View file

@ -0,0 +1,18 @@
from enum import Enum
class EpcHeatingControls(Enum):
programmer_room_thermostat_trvs = "Programmer, room thermostat and TRVs"
programmers_trvs_bypass = "Programmer, TRVs and bypass"
time_and_temperature_zone_control = "Time and temperature zone control"
# Room heaters
programmer_and_appliance_thermostats = "Programmer and appliance thermostats"
appliance_thermostats = "Appliance thermostats"
# Storage heaters
automatic_charge_control = "Automatic charge control"
manual_charge_control = "Manual charge control"
# Warm air
programmer_and_atleast_two_room_thermostats = "Programmer and at least two room thermostats"

View file

@ -0,0 +1,8 @@
from enum import Enum
class EpcHotWaterSystems(Enum):
# from primary heating system
from_main_system = "From main system"
# Common for heater-based systems, e.g. room heaters or storage heaters
electric_immersion_off_peak = "Electric immersion, off-peak"

View file

@ -0,0 +1,24 @@
from enum import Enum
class EpcHeatingSystems(Enum):
# boiler and radiators
boiler_and_radiators_electric = "Boiler and radiators, electric"
boiler_and_radiators_lpg = "Boiler and radiators, LPG"
boiler_radiators_mains_gas = "Boiler and radiators, mains gas"
boiler_radiators_oil = "Boiler and radiators, oil"
# underfloor
electric_underfloor_heating = "Electric underfloor heating"
# ashp
air_to_air_ashp = "Air source heat pump, warm air, electric"
ashp_radiators_electric = "Air source heat pump, radiators, electric"
# Room heaters
room_heaters_electric = "Room heaters, electric"
room_heaters_mains_gas = "Room heaters, mains gas"
room_heaters_smokeless_fuel = "Room heaters, smokeless fuel"
room_heaters_coal = "Room heaters, coal"
# Storage heaters
electric_storage_heaters = "Electric storage heaters"
# Warm air
warm_air_electricaire = "Warm air, Electricaire"
warm_air_mains_gas = "Warm air, mains gas"

View file

@ -0,0 +1,17 @@
from enum import Enum
class PropertyType(Enum):
flat = "Flat"
maisonette = "Maisonette"
bungalow = "Bungalow"
house = "House"
class BuiltForm(Enum):
mid_terrace = "Mid-Terrace"
end_terrace = "End-Terrace"
detached = "Detached"
semi_detached = "Semi-Detached"
enclosed_mid_terrace = "Enclosed Mid-Terrace"
enclosed_end_terrace = "Enclosed End-Terrace"

86
datatypes/epc/roof.py Normal file
View file

@ -0,0 +1,86 @@
from enum import Enum
from typing import List
class EpcRoofDescriptions(Enum):
# Loft
# Assumed options
pitched_insulated_assumed: str = "Pitched, insulated (assumed)"
pitched_no_insulation: str = "Pitched, no insulation"
# Insulation thickness options
loft_12mm_insulation: str = "Pitched, 12 mm loft insulation"
loft_25mm_insulation: str = "Pitched, 25 mm loft insulation"
loft_50mm_insulation: str = "Pitched, 50 mm loft insulation"
loft_75mm_insulation: str = "Pitched, 75 mm loft insulation"
loft_100mm_insulation: str = "Pitched, 100 mm loft insulation"
loft_125mm_insulation: str = "Pitched, 125 mm loft insulation"
loft_150mm_insulation: str = "Pitched, 150 mm loft insulation"
loft_175mm_insulation: str = "Pitched, 175 mm loft insulation"
loft_200mm_insulation: str = "Pitched, 200 mm loft insulation"
loft_250mm_insulation: str = "Pitched, 250 mm loft insulation"
loft_270mm_insulation: str = "Pitched, 270 mm loft insulation"
loft_300mm_insulation: str = "Pitched, 300 mm loft insulation"
loft_350mm_insulation: str = "Pitched, 350 mm loft insulation"
loft_400mm_plus_insulation: str = "Pitched, 400+ mm loft insulation"
# Insulated at rafters "Pitched, insulated at rafters"
# Rafters
# 400mm, 350mm = very good
# 200-300mm = good
# 125-175 = average
# 50-100 = poor
# 25 and below= very poor
loft_insulated_at_rafters: str = "Pitched, insulated at rafters"
# another dwelling above
another_dwelling_above: str = "(another dwelling above)"
# flat roof, which if there is observed insulation is just "flat, insulated", however there is a
# different efficiency rating depending on insulation thickness
# categories:
# 12mm = very poor & has limited insulation description
# 25, 50 = poor & has limited insulation description
# 75, 100, 125mm = average (Flat, insulated)
# 150, 175, 200, 225, 250mm = good (Flat, insulated)
# 270mm+ = very good (Flat, insulated)
# As built 2023 = Flat, insulated, Very good
# 2003 - 2006, up to 2012-2022 = Flat insulated, Good
# 1983-1990, 1996-2002 = Flat, insulated, Average
# 1976-1982 = Flat, limited insulation, poor
# 1967 - 1975 = Flat, limited insulation, Very Poor
# 1950-1966 and earlier bands = flat, no insulation, very poor
flat_insulated: str = "Flat, insulated"
flat_limited_insulation: str = "Flat, limited insulation"
flat_no_insulation: str = "Flat, no insulation"
# Thatched roof descriptions
# With Loft insulation at joists
# Thatched + 12mm = thatched, with additional insulation, average
# Thatched + 25, 50, 100, 150mm = thatched, with additional insulation, good
# Thatched + 175mm+ = thatched, with additional insulation, very good
# With loft insulation at rafters [out of scope atm]
# Unknown insulation
# Pre 1900, 1930-1949, 1967-1975, 1983-1990, 1996-2002 = "Thatched", Average
# 2003-2006, 2012-2022 = "Thatched", Good
# 2023 onwards = "Thatched", Very Good
thatched: str = "Thatched" # We see this for no insulation, has average performance
thatched_with_additional_insulation: str = "Thatched, with additional insulation"
# Sloping ceiling
# For sloping ceiling tags, we don't use any (assumed) tags so that it's unambiguous that the roof is sloped
sloping_pitched_no_insulation: str = "Pitched, no insulation"
sloping_pitched_limited_insulation: str = "Pitched, limited insulation"
sloping_pitched_insulated: str = "Pitched, insulated"
# Unknown descriptions which may get mapped later or handled via fallback
flat_as_built_unknown: str = "Flat, as built, unknown insulation"
loft_as_built_unknown: str = "Loft, as built, unknown insulation"
thatched_as_built_unknown: str = "Thatched, as built, unknown insulation"
sloping_pitched_as_built_unknown: str = "Pitched, as built, unknown insulation"
@property
def unknown_descriptions(self) -> List["EpcRoofDescriptions"]:
return [
EpcRoofDescriptions.flat_as_built_unknown,
EpcRoofDescriptions.loft_as_built_unknown,
EpcRoofDescriptions.thatched_as_built_unknown,
EpcRoofDescriptions.sloping_pitched_as_built_unknown,
]

74
datatypes/epc/walls.py Normal file
View file

@ -0,0 +1,74 @@
from enum import Enum
from typing import List
class EpcWallDescriptions(Enum):
# Cavity wall descriptions
cavity_insulated_assumed: str = "Cavity wall, as built, insulated (assumed)"
cavity_partial_insulated_assumed: str = "Cavity wall, as built, partial insulation (assumed)"
cavity_no_insulation_assumed: str = "Cavity wall, as built, no insulation (assumed)"
cavity_filled_cavity: str = "Cavity wall, filled cavity"
cavity_internal_insulation: str = "Cavity wall, with internal insulation"
cavity_external_insulation: str = "Cavity wall, with external insulation"
cavity_filled_plus_internal: str = "Cavity wall, filled cavity and internal insulation"
cavity_filled_plus_external: str = "Cavity wall, filled cavity and external insulation"
# Solid wall descriptions
solid_brick_internal_insulation: str = "Solid brick, with internal insulation"
solid_brick_external_insulation: str = "Solid brick, with external insulation"
solid_brick_no_insulation_assumed: str = 'Solid brick, as built, no insulation (assumed)'
solid_brick_partial_insulated_assumed: str = 'Solid brick, as built, partial insulation (assumed)'
solid_brick_insulated_assumed: str = 'Solid brick, as built, insulated (assumed)'
# System
system_external_insulation: str = "System built, with external insulation"
system_internal_insulation: str = "System built, with internal insulation"
system_no_insulation_assumed: str = "System built, as built, no insulation (assumed)"
system_partial_insulated_assumed: str = "System built, as built, partial insulation (assumed)"
system_insulated_assumed: str = "System built, as built, insulated (assumed)"
# Timber
timber_frame_internal_insulation: str = "Timber frame, with internal insulation"
timber_frame_external_insulation: str = "Timber frame, with external insulation"
timber_frame_no_insulation_assumed: str = "Timber frame, as built, no insulation (assumed)"
timber_frame_partial_insulated_assumed: str = "Timber frame, as built, partial insulation (assumed)"
timber_frame_insulated_assumed: str = "Timber frame, as built, insulated (assumed)"
# Granite/whinstone
granite_whinstone_external_insulation: str = "Granite or whin, with external insulation"
granite_whinstone_internal_insulation: str = "Granite or whin, with internal insulation"
granite_whinstone_no_insulation_assumed: str = "Granite or whin, as built, no insulation (assumed)"
granite_whinstone_partial_insulated_assumed: str = "Granite or whin, as built, partial insulation (assumed)"
granite_whinestone_insulated_assumed: str = "Granite or whin, as built, insulated (assumed)"
# Sandstone/limestone
sandstone_limestone_internal_insulation: str = "Sandstone, with internal insulation"
sandstone_limestone_external_insulation: str = "Sandstone, with external insulation"
sandstone_limestone_no_insulation_assumed: str = "Sandstone, as built, no insulation (assumed)"
sandstone_limestone_partial_insulated_assumed: str = "Sandstone, as built, partial insulation (assumed)"
sandstone_limestone_insulated_assumed: str = "Sandstone, as built, insulated (assumed)"
# Cob
cob_as_built_average: str = "Cob, as built"
cob_as_built_good: str = "Cob, as built"
# unknown descriptions which may get mapped later or handled via fallback
cavity_as_built_unknown: str = "Cavity wall, as built, unknown insulation"
solid_brick_as_built_unknown: str = "Solid brick, as built, unknown insulation"
system_as_built_unknown: str = "System built, as built, unknown insulation"
timber_frame_as_built_unknown: str = "Timber frame, as built, unknown insulation"
granite_as_built_unknown: str = "Granite or whin, as built, unknown insulation"
sandstone_as_built_unknown: str = "Sandstone, as built, unknown insulation"
cob_as_built_unknown: str = "Cob, as built, unknown insulation"
@property
def unknown_descriptions(self) -> List["EpcWallDescriptions"]:
return [
EpcWallDescriptions.cavity_as_built_unknown,
EpcWallDescriptions.solid_brick_as_built_unknown,
EpcWallDescriptions.system_as_built_unknown,
EpcWallDescriptions.timber_frame_as_built_unknown,
EpcWallDescriptions.granite_as_built_unknown,
EpcWallDescriptions.sandstone_as_built_unknown,
EpcWallDescriptions.cob_as_built_unknown,
]

View file

@ -196,6 +196,10 @@ class KwhData:
if save and self.bucket is None:
raise Exception("bucket not set, cannot save data")
if data.empty:
# If we have no data
return data
# TODO: New is a temporary parameter, which will transform the epc descriptions to their transformed features
# in anticipation of the new model

View file

@ -0,0 +1,43 @@
data "aws_secretsmanager_secret_version" "db_credentials" {
secret_id = "${var.stage}/assessment_model/db_credentials"
}
data "terraform_remote_state" "shared" {
backend = "s3"
config = {
bucket = "assessment-model-terraform-state"
key = "env:/${var.stage}/terraform.tfstate" # TODO: dont hardcode this
region = "eu-west-2"
}
}
locals {
db_credentials = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string)
}
module "lambda" {
source = "../modules/lambda_with_sqs"
name = "condition-etl"
stage = var.stage
image_uri = local.image_uri
timeout = 180
environment = merge(
{
STAGE = var.stage
LOG_LEVEL = "info"
DB_USERNAME = local.db_credentials.db_assessment_model_username
DB_PASSWORD = local.db_credentials.db_assessment_model_password
},
)
}
resource "aws_iam_role_policy_attachment" "attach_condition_etl_s3_read" {
role = module.lambda.role_name
policy_arn = data.terraform_remote_state.shared.outputs.condition_etl_s3_read_arn
}

View file

@ -0,0 +1,16 @@
terraform {
required_providers {
aws = {
source = "hashicorp/aws"
version = "~> 4.16"
}
}
backend "s3" {
bucket = "condition-etl-terraform-state"
key = "terraform.tfstate"
region = "eu-west-2"
}
required_version = ">= 1.2.0"
}

View file

@ -0,0 +1,27 @@
variable "lambda_name" {
type = string
description = "Logical name of the lambda (e.g. address2uprn)"
}
variable "stage" {
description = "Deployment stage (e.g. dev, prod)"
type = string
}
variable "ecr_repo_url" {
type = string
description = "ECR repository URL (no tag, no digest)"
}
variable "image_digest" {
type = string
description = "Image digest (sha256:...)"
}
locals {
image_uri = "${var.ecr_repo_url}@${var.image_digest}"
}
output "resolved_image_uri" {
value = local.image_uri
}

View file

@ -6,6 +6,10 @@ module "role" {
name = "${var.name}-lambda-${var.stage}"
}
output "role_name" {
value = module.role.role_name
}
############################################
# SQS queue + DLQ
############################################

View file

@ -0,0 +1,14 @@
module "lambda" {
source = "../modules/lambda_with_sqs"
name = "postcode-splitter"
stage = var.stage
image_uri = local.image_uri
environment = {
STAGE = var.stage
LOG_LEVEL = "info"
}
}

View file

@ -0,0 +1,16 @@
terraform {
required_providers {
aws = {
source = "hashicorp/aws"
version = "~> 4.16"
}
}
backend "s3" {
bucket = "postcode-splitter-terraform-state"
key = "terraform.tfstate"
region = "eu-west-2"
}
required_version = ">= 1.2.0"
}

View file

@ -0,0 +1,26 @@
variable "lambda_name" {
type = string
description = "Logical name of the lambda (e.g. address2uprn)"
}
variable "stage" {
description = "Deployment stage (e.g. dev, prod)"
type = string
}
variable "ecr_repo_url" {
type = string
description = "ECR repository URL (no tag, no digest)"
}
variable "image_digest" {
type = string
description = "Image digest (sha256:...)"
}
locals {
image_uri = "${var.ecr_repo_url}@${var.image_digest}"
}
output "resolved_image_uri" {
value = local.image_uri
}

View file

@ -84,7 +84,7 @@ resource "aws_db_instance" "default" {
# Temporary to enfore immediate change
apply_immediately = true
# Set up storage type to gp3 for better performance
storage_type = "gp3"
storage_type = "gp3"
}
# Set up the bucket that recieve the csv uploads of epc to be retrofit
@ -298,10 +298,6 @@ module "address2uprn_state_bucket" {
}
output "address2uprn_state_bucket_name" {
value = module.address2uprn_state_bucket.bucket_name
}
module "address2uprn_registry" {
source = "../modules/container_registry"
name = "address2uprn"
@ -309,6 +305,62 @@ module "address2uprn_registry" {
}
output "address2uprn_repository_url" {
value = module.address2uprn_registry.repository_url
################################################
# Condition ETL Lambda ECR
################################################
module "condition_etl_state_bucket" {
source = "../modules/tf_state_bucket"
bucket_name = "condition-etl-terraform-state"
}
module "condition_etl_registry" {
source = "../modules/container_registry"
name = "condition-etl"
stage = var.stage
}
################################################
# Postcode Splitter Lambda ECR
################################################
module "postcode_splitter_state_bucket" {
source = "../modules/tf_state_bucket"
bucket_name = "postcode-splitter-terraform-state"
}
module "postcode_splitter_registry" {
source = "../modules/container_registry"
name = "postcode_splitter"
stage = var.stage
}
################################################
# Conidition data S3 bucket
################################################
module "condition_data_bucket" {
source = "../modules/s3"
bucketname = "condition-data-${var.stage}"
allowed_origins = var.allowed_origins
}
resource "aws_iam_policy" "condition_etl_s3_read" {
name = "ConditionETLReadS3"
description = "Allow Lambda to read objects from condition-data-${var.stage}"
policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Effect = "Allow"
Action = ["s3:GetObject"]
Resource = "arn:aws:s3:::condition-data-${var.stage}/*"
}
]
})
}
output "condition_etl_s3_read_arn" {
value = aws_iam_policy.condition_etl_s3_read.arn
}

View file

@ -1,4 +1,4 @@
[pytest]
pythonpath = .
addopts = --cov-report term-missing --cov=etl/epc --cov=recommendations --cov=backend --cov=etl/epc_clean --cov=etl/spatial
testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests backend/address2UPRN/tests
testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests backend/address2UPRN/tests backend/onboarders/tests

View file

@ -1090,6 +1090,7 @@ class Recommendations:
ashp_cop = ashp_cop if ashp_cop else assumptions.AVERAGE_ASHP_EFFICIENCY
#
kwh_impact_table = kwh_simulation_predictions["heating_kwh_predictions"][
kwh_simulation_predictions["heating_kwh_predictions"]["property_id"] == str(property_instance.id)
].merge(

View file

@ -331,18 +331,18 @@ class RoofRecommendations:
"""
# Can a non-primary part satisfy loft insulation?
primary_needs_loft = component_needs[1]["needs_loft_insulation"]
primary_needs_loft = component_needs[0]["needs_loft_insulation"]
secondary_needs_loft = any(
p['needs_loft_insulation'] for idx, p in component_needs.items() if idx != 1
p['needs_loft_insulation'] for idx, p in component_needs.items() if idx != 0
)
if primary_needs_loft and not secondary_needs_loft:
# Only option is loft
return "loft"
primary_needs_sloping = component_needs[1]["needs_sloping_ceiling"]
primary_needs_sloping = component_needs[0]["needs_sloping_ceiling"]
secondary_needs_sloping = any(
p['needs_sloping_ceiling'] for idx, p in component_needs.items() if idx != 1
p['needs_sloping_ceiling'] for idx, p in component_needs.items() if idx != 0
)
if primary_needs_sloping and not secondary_needs_sloping:
@ -418,11 +418,13 @@ class RoofRecommendations:
return needs_sloping, not needs_loft # Indicates that the property needs sloping ceiling as we only run
# this in that case
roof_components = [x for x in find_my_epc_components if x["component_name"] == "Roof"]
extracted_roof_descriptions = {
idx: {
"description": component["description"],
**RoofAttributes(component["description"]).process()
} for idx, component in enumerate(find_my_epc_components) if component["component_name"] == "Roof"
} for idx, component in enumerate(roof_components)
}
component_needs = {}

View file

@ -2,6 +2,10 @@
This script prepares the data for the financial model
"""
from dotenv import load_dotenv
load_dotenv(".env.local")
import pandas as pd
import numpy as np
from backend.app.utils import sap_to_epc
@ -24,12 +28,12 @@ from sqlalchemy import func
# PORTFOLIO_ID = 206
# SCENARIOS = [389]
PORTFOLIO_ID = 502 # Peabody
PORTFOLIO_ID = 524
SCENARIOS = [
986,
1009,
]
scenario_names = {
986: "EPC C",
1009: "EPC C; Most Economic",
}

View file

@ -264,6 +264,7 @@ def save_excel_to_s3(df, bucket_name, file_key):
def read_csv_from_s3(bucket_name, filepath):
logger.info(f"Reading CSV file from S3 bucket '{bucket_name}' with key '{filepath}'")
s3 = boto3.client('s3')
# Get the object from s3