Merge remote-tracking branch 'origin/main' into feature/bill-derivation

# Conflicts:
#	repositories/property/property_postgres_repository.py
#	tests/orchestration/fakes.py
This commit is contained in:
Khalim Conn-Kowlessar 2026-06-05 11:09:00 +00:00
commit 2c36a8e1d6
82 changed files with 3372 additions and 325 deletions

View file

@ -76,6 +76,8 @@ on:
required: false
TF_VAR_social_housing_wave_3_sharepoint_id:
required: false
TF_VAR_eco_sharepoint_id:
required: false
TF_VAR_pashub_email:
required: false
TF_VAR_pashub_password:
@ -159,6 +161,7 @@ jobs:
TF_VAR_osmosis_acd_sharepoint_id: ${{ secrets.TF_VAR_osmosis_acd_sharepoint_id }}
TF_VAR_private_pay_sharepoint_id: ${{ secrets.TF_VAR_private_pay_sharepoint_id }}
TF_VAR_social_housing_wave_3_sharepoint_id: ${{ secrets.TF_VAR_social_housing_wave_3_sharepoint_id }}
TF_VAR_eco_sharepoint_id: ${{ secrets.TF_VAR_eco_sharepoint_id }}
TF_VAR_pashub_email: ${{ secrets.TF_VAR_pashub_email }}
TF_VAR_pashub_password: ${{ secrets.TF_VAR_pashub_password }}
TF_VAR_pashub_coordination_email: ${{ secrets.TF_VAR_pashub_coordination_email }}
@ -210,6 +213,7 @@ jobs:
TF_VAR_osmosis_acd_sharepoint_id: ${{ secrets.TF_VAR_osmosis_acd_sharepoint_id }}
TF_VAR_private_pay_sharepoint_id: ${{ secrets.TF_VAR_private_pay_sharepoint_id }}
TF_VAR_social_housing_wave_3_sharepoint_id: ${{ secrets.TF_VAR_social_housing_wave_3_sharepoint_id }}
TF_VAR_eco_sharepoint_id: ${{ secrets.TF_VAR_eco_sharepoint_id }}
TF_VAR_pashub_email: ${{ secrets.TF_VAR_pashub_email }}
TF_VAR_pashub_password: ${{ secrets.TF_VAR_pashub_password }}
TF_VAR_pashub_coordination_email: ${{ secrets.TF_VAR_pashub_coordination_email }}

View file

@ -284,6 +284,46 @@ jobs:
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
# ============================================================
# Build Bulk Upload Finaliser image and Push
# ============================================================
bulkUploadFinaliser_image:
needs: [determine_stage, shared_terraform]
uses: ./.github/workflows/_build_image.yml
with:
ecr_repo: bulk_upload_finaliser-${{ needs.determine_stage.outputs.stage }}
dockerfile_path: applications/bulk_upload_finaliser/Dockerfile
build_context: .
build_args: |
DEV_DB_HOST=$DEV_DB_HOST
DEV_DB_PORT=$DEV_DB_PORT
DEV_DB_NAME=$DEV_DB_NAME
secrets:
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
DEV_DB_HOST: ${{ secrets.DEV_DB_HOST }}
DEV_DB_PORT: ${{ secrets.DEV_DB_PORT }}
DEV_DB_NAME: ${{ secrets.DEV_DB_NAME }}
# ============================================================
# Deploy Bulk Upload Finaliser Lambda
# ============================================================
bulkUploadFinaliser_lambda:
needs: [bulkUploadFinaliser_image, determine_stage, shared_terraform]
uses: ./.github/workflows/_deploy_lambda.yml
with:
lambda_name: bulkUploadFinaliser
lambda_path: deployment/terraform/lambda/bulkUploadFinaliser
stage: ${{ needs.determine_stage.outputs.stage }}
ecr_repo: bulk_upload_finaliser-${{ needs.determine_stage.outputs.stage }}
image_digest: ${{ needs.bulkUploadFinaliser_image.outputs.image_digest }}
terraform_apply: ${{ needs.determine_stage.outputs.terraform_apply }}
secrets:
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
# ============================================================
# Condition ETL image and Push
# ============================================================
@ -448,6 +488,7 @@ jobs:
TF_VAR_osmosis_acd_sharepoint_id: ${{ secrets.OSMOSIS_ACD_SHAREPOINT_ID }}
TF_VAR_private_pay_sharepoint_id: ${{ secrets.PRIVATE_PAY_SHAREPOINT_ID }}
TF_VAR_social_housing_wave_3_sharepoint_id: ${{ secrets.SOCIAL_HOUSING_WAVE_3_SHAREPOINT_ID }}
TF_VAR_eco_sharepoint_id: ${{ secrets.ECO_SHAREPOINT_ID }}
TF_VAR_pashub_email: ${{ secrets.PASHUB_EMAIL }}
TF_VAR_pashub_password: ${{ secrets.PASHUB_PASSWORD }}
TF_VAR_pashub_coordination_email: ${{ secrets.PASHUB_COORDINATION_EMAIL }}
@ -458,7 +499,7 @@ jobs:
# Deploy FastAPI Lambda
# ============================================================
fast_api_lambda:
needs: [determine_stage, ara_engine_lambda, categorisation_lambda, postcodeSplitter_lambda, bulk_address2uprn_combiner_lambda]
needs: [determine_stage, ara_engine_lambda, categorisation_lambda, postcodeSplitter_lambda, bulk_address2uprn_combiner_lambda, bulkUploadFinaliser_lambda]
uses: ./.github/workflows/_deploy_lambda.yml
with:
lambda_name: ara_fast_api

View file

@ -63,6 +63,16 @@ jobs:
build_context: .
service_name: bulk-address2uprn-combiner
# ============================================================
# Bulk Upload Finaliser
# ============================================================
bulk_upload_finaliser_smoke_test:
uses: ./.github/workflows/_smoke_test_lambda.yml
with:
dockerfile_path: applications/bulk_upload_finaliser/Dockerfile
build_context: .
service_name: bulk-upload-finaliser
# ============================================================
# Condition ETL
# ============================================================

View file

@ -0,0 +1,38 @@
FROM public.ecr.aws/lambda/python:3.11
# Postgres host/port/database are baked into the image at build time from the
# deploy workflow's --build-arg values (GitHub Actions DEV_DB_* secrets),
# mirroring the landlord_description_overrides Dockerfile. They map onto the
# POSTGRES_* names PostgresConfig.from_env reads. Username/password are NOT baked
# in -- Terraform injects those as Lambda env vars from Secrets Manager.
ARG DEV_DB_HOST
ARG DEV_DB_PORT
ARG DEV_DB_NAME
ENV POSTGRES_HOST=${DEV_DB_HOST}
ENV POSTGRES_PORT=${DEV_DB_PORT}
ENV POSTGRES_DATABASE=${DEV_DB_NAME}
WORKDIR /var/task
COPY applications/bulk_upload_finaliser/requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# DDD-shaped packages only -- no pandas, no legacy backend/. The finaliser writes
# both `property` and the terminal `bulk_address_uploads` status through DDD repos
# on its own PostgresConfig session (ADR-0013). `datatypes/` comes in via the
# Property aggregate's import closure (domain/property/property.py -> site_notes ->
# datatypes/epc/...), enforced by tests/test_lambda_packaging.py.
COPY datatypes/ datatypes/
COPY domain/ domain/
COPY infrastructure/ infrastructure/
COPY orchestration/ orchestration/
COPY repositories/ repositories/
COPY utilities/ utilities/
COPY applications/ applications/
# Place the handler at the Lambda task root so the runtime resolves
# ``main.handler`` without an extra package prefix.
COPY applications/bulk_upload_finaliser/handler.py /var/task/main.py
CMD ["main.handler"]

View file

@ -0,0 +1,22 @@
from uuid import UUID
from pydantic import BaseModel, ConfigDict
class BulkUploadFinaliserTriggerBody(BaseModel):
"""Trigger body for the bulk_upload_finaliser Lambda (ADR-0013).
Dispatched by the Next.js Finalise action via
``POST /v1/bulk-uploads/trigger-finaliser``. ``s3_uri`` is the combiner output
(``combined_output_s3_uri``) the same address/UPRN CSV the old synchronous
``/finalize`` route read.
"""
model_config = ConfigDict(extra="allow")
task_id: UUID
sub_task_id: UUID
s3_uri: str
# bigint in the FE schema; Python int is unbounded so Pydantic stays simple.
portfolio_id: int
bulk_upload_id: UUID

View file

@ -0,0 +1,104 @@
"""bulk_upload_finaliser Lambda (ADR-0013).
Replaces the synchronous Next.js ``/finalize`` property insert. Thin wiring: parse
the trigger, read the combiner output CSV from S3, hand the rows to the
``BulkUploadFinaliserOrchestrator`` (which owns the resolution + persist), then
write the terminal BulkUpload status directly (ADR-0005 hands terminal ownership to
the backend). ``complete`` is written in the *same* transaction as the property
insert (atomic finalise); ``failed`` is written on a fresh session on error.
PostgresConfig-only, like the landlord classifier Lambda no legacy ``backend/``
connection so a single DB config (POSTGRES_*) drives the whole run.
"""
import logging
import os
from typing import Any
from uuid import UUID
import boto3
from sqlalchemy.engine import Engine
from applications.bulk_upload_finaliser.bulk_upload_finaliser_trigger_body import (
BulkUploadFinaliserTriggerBody,
)
from infrastructure.postgres.config import PostgresConfig
from infrastructure.postgres.engine import commit_scope, make_engine, make_session
from infrastructure.s3.csv_s3_client import CsvS3Client
from infrastructure.s3.s3_uri import parse_s3_uri
from orchestration.bulk_upload_finaliser_orchestrator import (
BulkUploadFinaliserOrchestrator,
)
from orchestration.task_orchestrator import TaskOrchestrator
from repositories.bulk_upload.bulk_upload_status_writer_postgres import (
BulkUploadStatusWriterPostgresRepository,
)
from repositories.property.property_postgres_repository import (
PropertyPostgresRepository,
)
from utilities.aws_lambda.subtask_handler import subtask_handler
logger = logging.getLogger(__name__)
def _run(engine: Engine, trigger: BulkUploadFinaliserTriggerBody) -> int:
bucket, _key = parse_s3_uri(trigger.s3_uri)
boto3_client: Any = boto3.client # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType]
boto_s3: Any = boto3_client("s3")
rows = CsvS3Client(boto_s3, bucket).read_rows(trigger.s3_uri)
session = make_session(engine)
try:
orchestrator = BulkUploadFinaliserOrchestrator(
# Write-only path: no EpcRepository needed for inserts.
property_repo=PropertyPostgresRepository(session),
status_writer=BulkUploadStatusWriterPostgresRepository(session),
)
# Atomic finalise: the orchestrator inserts properties and marks `complete`
# via its injected writers; the transaction here makes them land together —
# a failure in either rolls back both, leaving the row for the failure path.
with commit_scope(session):
inserted = orchestrator.finalise(
rows, trigger.portfolio_id, trigger.task_id
)
finally:
session.close()
logger.info(
"Finalised bulk upload %s: %d rows read, %d properties inserted.",
trigger.bulk_upload_id,
len(rows),
inserted,
)
return inserted
def _mark_failed(engine: Engine, task_id: UUID) -> None:
session = make_session(engine)
try:
with commit_scope(session):
BulkUploadStatusWriterPostgresRepository(session).set_status(
task_id, "failed"
)
finally:
session.close()
@subtask_handler()
def handler(
body: dict[str, Any], context: Any, task_orchestrator: TaskOrchestrator
) -> dict[str, int]:
trigger = BulkUploadFinaliserTriggerBody.model_validate(body)
engine = make_engine(PostgresConfig.from_env(os.environ))
try:
inserted = _run(engine, trigger)
except Exception:
# Hand the BulkUpload to the terminal `failed` state so the UI leaves
# `finalising`; the @subtask_handler also marks the SubTask FAILED on the
# re-raise below.
_mark_failed(engine, trigger.task_id)
raise
return {"inserted": inserted}

View file

@ -0,0 +1,4 @@
boto3
pydantic
sqlmodel
psycopg2-binary

View file

@ -12,6 +12,7 @@ from backend.app.bulk_uploads.schema import (
CombinedResultRow,
CombinedResultsResponse,
CombinerTriggerRequest,
FinaliserTriggerRequest,
FlagsSummary,
LandlordOverridesTriggerRequest,
PostcodeSplitterTriggerRequest,
@ -113,6 +114,26 @@ async def trigger_landlord_overrides(req: LandlordOverridesTriggerRequest):
}
@router.post("/trigger-finaliser", status_code=202)
async def trigger_finaliser(req: FinaliserTriggerRequest):
settings = get_settings()
try:
sqs = boto3.client("sqs", settings.AWS_DEFAULT_REGION)
response = sqs.send_message(
QueueUrl=settings.FINALISER_SQS_URL,
MessageBody=req.model_dump_json(),
)
except Exception as e:
raise HTTPException(status_code=500, detail=f"SQS error: {e}")
return {
"task_id": req.task_id,
"sub_task_id": req.sub_task_id,
"sqs_message_id": response.get("MessageId"),
}
@router.get("/{task_id}/combined-results", response_model=CombinedResultsResponse)
async def get_combined_results(
task_id: UUID,

View file

@ -23,6 +23,14 @@ class LandlordOverridesTriggerRequest(BaseModel):
column_mapping: dict[str, str]
class FinaliserTriggerRequest(BaseModel):
task_id: str
sub_task_id: str
s3_uri: str # combiner output (combined_output_s3_uri)
portfolio_id: int
bulk_upload_id: str
class FlagsSummary(BaseModel):
duplicates: int
missing: int

View file

@ -43,6 +43,7 @@ class Settings(BaseSettings):
POSTCODE_SPLITTER_SQS_URL: str = "changeme"
COMBINER_SQS_URL: str = "changeme"
LANDLORD_OVERRIDES_SQS_URL: str = "changeme"
FINALISER_SQS_URL: str = "changeme"
# Third parties
EPC_AUTH_TOKEN: str = "changeme"
@ -82,6 +83,7 @@ class Settings(BaseSettings):
OSMOSIS_ACD_SHAREPOINT_ID: Optional[str] = None
PRIVATE_PAY_SHAREPOINT_ID: Optional[str] = None
SOCIAL_HOUSING_WAVE_3_SHAREPOINT_ID: Optional[str] = None
ECO_SHAREPOINT_ID: Optional[str] = None
OPENAI_API_KEY: Optional[str] = None
# Pas Hub

View file

@ -21,6 +21,8 @@ class FileTypeEnum(enum.Enum):
IMPROVEMENT_OPTION_EVALUATION = "improvement_option_evaluation"
MEDIUM_TERM_IMPROVEMENT_PLAN = "medium_term_improvement_plan"
RETROFIT_DESIGN_DOC = "retrofit_design_doc"
MCS_COMPLIANCE_CERTIFICATE = "mcs_compliance_certificate"
OTHER = "other"
class FileSourceEnum(enum.Enum):

View file

@ -269,8 +269,8 @@ class ElmhurstSiteNotesExtractor:
)
def _wall_details_from_lines(self, lines: List[str]) -> WallDetails:
thickness_raw = self._local_val(lines, "Wall Thickness")
thickness_mm = (
thickness_raw: Optional[str] = self._local_val(lines, "Wall Thickness")
thickness_mm: Optional[int] = (
int(thickness_raw.split()[0]) if thickness_raw else None
)
# Composite / retrofit insulation thickness — Summary §7.0
@ -280,8 +280,8 @@ class ElmhurstSiteNotesExtractor:
# is local-scoped inside the §7 block so it does not collide
# with the §8 Roofs / §9 Floors blocks. None when the PDF
# omits the line (no retrofit lodged).
ins_thickness_raw = self._local_val(lines, "Insulation Thickness")
insulation_thickness_mm = self._parse_thickness_mm(ins_thickness_raw)
ins_thickness_raw: Optional[str] = self._local_val(lines, "Insulation Thickness")
insulation_thickness_mm: Optional[int] = self._parse_thickness_mm(ins_thickness_raw)
return WallDetails(
wall_type=self._local_str(lines, "Type"),
insulation=self._local_str(lines, "Insulation"),
@ -318,8 +318,10 @@ class ElmhurstSiteNotesExtractor:
continue
if area <= 0:
continue
thickness_raw = self._local_val(lines, f"Alternative Wall {n} Thickness")
thickness_mm = self._parse_thickness_mm(thickness_raw)
thickness_raw: Optional[str] = self._local_val(
lines, f"Alternative Wall {n} Thickness"
)
thickness_mm: Optional[int] = self._parse_thickness_mm(thickness_raw)
result.append(AlternativeWall(
area_m2=area,
wall_type=self._local_str(lines, f"Alternative Wall {n} Type"),
@ -365,8 +367,8 @@ class ElmhurstSiteNotesExtractor:
return int(match.group()) if match else None
def _roof_details_from_lines(self, lines: List[str]) -> RoofDetails:
thickness_raw = self._local_val(lines, "Insulation Thickness")
thickness_mm = self._parse_thickness_mm(thickness_raw)
thickness_raw: Optional[str] = self._local_val(lines, "Insulation Thickness")
thickness_mm: Optional[int] = self._parse_thickness_mm(thickness_raw)
insulation = self._local_str(lines, "Insulation")
# The Summary PDF omits the "Insulation Thickness" line entirely
# when no retrofit insulation is lodged (e.g. "Insulation: N None"
@ -390,14 +392,14 @@ class ElmhurstSiteNotesExtractor:
return self._roof_details_from_lines(lines)
def _floor_details_from_lines(self, lines: List[str]) -> FloorDetails:
u_val_raw = self._local_val(lines, "Default U-value")
default_u = float(u_val_raw) if u_val_raw else None
u_val_raw: Optional[str] = self._local_val(lines, "Default U-value")
default_u: Optional[float] = float(u_val_raw) if u_val_raw else None
# RdSAP 10 §5.13 Table 20 — retro-fitted upper floors lodge an
# "Insulation Thickness: NNN mm" cell so the cascade can route
# via the per-thickness column. Mirror of the §8 roof extractor
# at `_roof_details_from_lines`.
thickness_raw = self._local_val(lines, "Insulation Thickness")
thickness_mm = self._parse_thickness_mm(thickness_raw)
thickness_raw: Optional[str] = self._local_val(lines, "Insulation Thickness")
thickness_mm: Optional[int] = self._parse_thickness_mm(thickness_raw)
return FloorDetails(
location=self._local_str(lines, "Location"),
floor_type=self._local_str(lines, "Type"),
@ -670,12 +672,24 @@ class ElmhurstSiteNotesExtractor:
# even when the main wall fields are inherited; merge
# them into the inherited WallDetails so the bp carries
# them through to its SapBuildingPart.
#
# "As Main Wall: Yes" inherits the EXTERNAL wall
# construction only — the PARTY WALL TYPE is lodged
# separately in the extension's §7 block and may differ
# (cert 001431: Main "CU Cavity masonry unfilled" U=0.5,
# 1st Extension "U Unable to determine" → RdSAP default
# U=0.25). Read the extension's own party wall type when
# present; fall back to the main's only when absent.
ext_party_wall_type = (
self._local_str(wall_lines, "Party Wall Type")
or main_walls.party_wall_type
)
walls = WallDetails(
wall_type=main_walls.wall_type,
insulation=main_walls.insulation,
thickness_unknown=main_walls.thickness_unknown,
u_value_known=main_walls.u_value_known,
party_wall_type=main_walls.party_wall_type,
party_wall_type=ext_party_wall_type,
thickness_mm=main_walls.thickness_mm,
insulation_thickness_mm=main_walls.insulation_thickness_mm,
alternative_walls=self._alternative_walls_from_lines(wall_lines),
@ -1538,6 +1552,7 @@ class ElmhurstSiteNotesExtractor:
wind_turbines_terrain_type=terrain,
hydro_electricity_generated_kwh=hydro,
pv_arrays=self._extract_pv_arrays(),
pv_diverter_present=self._bool_val("Diverter present"),
pv_percent_roof_area=pv_pct if pv_pct > 0 else None,
solar_hw_collector_orientation=solar_orientation,
solar_hw_collector_pitch_deg=solar_pitch,

View file

@ -42,6 +42,7 @@ from datatypes.epc.domain.mapper import (
EpcPropertyDataMapper,
UnmappedApiCode,
UnmappedElmhurstLabel,
_elmhurst_glazing_type_code, # pyright: ignore[reportPrivateUsage]
)
from domain.sap10_calculator.calculator import calculate_sap_from_inputs
from domain.sap10_calculator.rdsap.cert_to_inputs import SAP_10_2_SPEC_PRICES, cert_to_inputs
@ -1471,6 +1472,68 @@ def test_summary_000474_double_glazed_windows_route_to_code_3() -> None:
)
def test_elmhurst_glazing_label_full_coverage_per_sap10_table_6b() -> None:
# Arrange — the double_glazing recommendation fixture (Summary_001431)
# exercises every RdSAP-21 §11 glazing-type lodging in one cert. Each
# label must resolve to the SAP 10.2 Table 6b cascade code whose
# `_G_LIGHT_BY_GLAZING_CODE` daylight factor g_L is correct for the
# glazing family: single 0.90, double / secondary 0.80, triple 0.70
# (the lodged manufacturer U/g drive §3/§6; the code only sets g_L).
expected: dict[str, int] = {
"Single glazing": 1,
"Single glazing, known data": 15,
"Double pre 2002": 2,
"Double between 2002 and 2021": 3,
"Double with unknown install date": 3,
"Double glazing, known data": 3,
"Double post or during 2022": 5,
"Secondary glazing": 7,
"Secondary glazing - Normal emissivity": 11,
"Secondary glazing - Low emissivity": 12,
"Triple pre 2002": 10,
"Triple between 2002 and 2021": 9,
"Triple post or during 2022": 6,
"Triple with unknown install date": 6,
}
# Act / Assert
for label, code in expected.items():
assert _elmhurst_glazing_type_code(label) == code, (
f"{label!r} should map to SAP 10.2 Table 6b code {code}"
)
def test_extension_party_wall_type_read_independently_of_as_main_wall() -> None:
# Arrange — RdSAP 10 §3.3: "As Main Wall: Yes" inherits only the
# external wall CONSTRUCTION; the party wall type is lodged
# separately per building part and may differ. The double_glazing
# fixture (Summary_001431) lodges Main party "CU Cavity masonry
# unfilled" (SAP10 wall_construction 4 → u_party_wall 0.5) but the
# 1st Extension party "U Unable to determine" (→ wall_construction 0
# → RdSAP default u_party_wall 0.25), even though the extension is
# "As Main Wall: Yes". Pre-fix the extension inherited the Main's
# party type (both 0.5), inflating worksheet (32) party heat loss.
pages = _summary_pdf_to_textract_style_pages(
_FIXTURES / "Summary_001431_double_glazing.pdf"
)
site_notes = ElmhurstSiteNotesExtractor(pages).extract()
# Act
epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
# Assert — Main BP keeps cavity-unfilled (4); the extension BP gets
# the "Unable to determine" sentinel (0), a distinct party wall U.
party_codes = [
bp.party_wall_construction for bp in epc.sap_building_parts
]
assert party_codes == [4, 0], (
f"expected Main=4 (CU, U=0.5) + Ext=0 (Unable, U=0.25), got {party_codes}"
)
# The two map to different SAP party-wall U-values.
assert abs(u_party_wall(4) - 0.5) <= 1e-9
assert abs(u_party_wall(0) - 0.25) <= 1e-9
def test_summary_mapper_raises_on_unmapped_glazing_type_label() -> None:
# Arrange — same strict-coverage gate as the cylinder-size helper
# (Slice S0380.15 + S0380.16): silently routing an unknown glazing
@ -4361,6 +4424,32 @@ def test_elmhurst_foam_cylinder_insulation_still_maps_to_factory_code_1() -> Non
assert code == 1
def test_elmhurst_roof_construction_int_matches_api_codes() -> None:
# Arrange — cross-mapper structural parity: the gov-EPC API mapper
# populates BOTH roof_construction (int) and roof_construction_type
# (str derived via `_API_ROOF_CONSTRUCTION_TO_STR`), but the Elmhurst
# mapper set only the string, leaving the int None. The SAP cascade
# reads the string (so SAP parity held), but consumers of the int
# (e.g. domain/sap10_ml ML aggregates) saw None on every site-notes
# cert. `_elmhurst_roof_construction_int` closes the gap, mapping the
# Elmhurst roof code to the same SAP10 int the API lodges. Unmapped
# codes return None (not a raise) — the int is not cascade-load-
# bearing, so an unknown roof must not block the cert.
from datatypes.epc.domain.mapper import _elmhurst_roof_construction_int # pyright: ignore[reportPrivateUsage]
# Act / Assert — each Elmhurst roof code → the gov-EPC API int.
assert _elmhurst_roof_construction_int("F Flat") == 1
assert _elmhurst_roof_construction_int("PN Pitched (slates/tiles), no access") == 3
assert _elmhurst_roof_construction_int("PA Pitched (slates/tiles), access to loft") == 4
assert _elmhurst_roof_construction_int("PS Pitched, sloping ceiling") == 8
assert _elmhurst_roof_construction_int("S Same dwelling above") == 7
assert _elmhurst_roof_construction_int("A Another dwelling above") == 7
# Absent / unmapped → None (no raise; not cascade-load-bearing).
assert _elmhurst_roof_construction_int(None) is None
assert _elmhurst_roof_construction_int("") is None
assert _elmhurst_roof_construction_int("NR Non-residential space above") is None
def test_elmhurst_wall_is_basement_disambiguates_system_built_from_basement() -> None:
# Arrange — "SY System build" and "B Basement wall" both map to SAP10
# wall_construction=6 (canonical WALL_SYSTEM_BUILT). The explicit

View file

@ -1,4 +1,5 @@
import urllib.parse
from typing import Any
from pydantic import ValidationError
import requests
import pandas as pd
@ -7,13 +8,13 @@ from utils.logger import setup_logger
logger = setup_logger()
def os_places_results_to_dataframe(data: dict) -> pd.DataFrame:
def os_places_results_to_dataframe(data: dict[str, Any]) -> pd.DataFrame:
"""
Flatten the OS Places API response results into a DataFrame.
Each result contains either a DPA or LPI record.
"""
results = data.get("results", [])
rows = []
results: list[dict[str, Any]] = data.get("results", [])
rows: list[dict[str, Any]] = []
for r in results:
if "DPA" in r:
rows.append(r["DPA"])
@ -22,7 +23,7 @@ def os_places_results_to_dataframe(data: dict) -> pd.DataFrame:
return pd.DataFrame(rows)
def lookup_os_places(postcode: str, api_key: str) -> dict:
def lookup_os_places(postcode: str, api_key: str) -> dict[str, Any]:
"""
Lookup a postcode using the OS Places API.
Returns the full API response data or an error dict.

View file

@ -0,0 +1,21 @@
# Runtime env for the Ordnance Survey handler when run locally via docker compose.
# Variable names must match backend/app/config.py Settings (DB_*, ORDNANCE_SURVEY_API_KEY)
# plus the AWS creds boto3 needs for S3 access.
ENVIRONMENT=local
# Database (OS Places postcode cache)
DB_HOST=
DB_PORT=5432
DB_NAME=
DB_USERNAME=
DB_PASSWORD=
# Ordnance Survey Places API
ORDNANCE_SURVEY_API_KEY=
# AWS — read input CSV from / write results to S3
AWS_ACCESS_KEY_ID=
AWS_SECRET_ACCESS_KEY=
AWS_DEFAULT_REGION=eu-west-2
S3_BUCKET_NAME=retrofit-data-dev

View file

@ -12,9 +12,11 @@ payload = {
{
"body": json.dumps(
{
"task_id": "e31f2f21-175b-4a91-a3ec-a6baa325e917",
"sub_task_id": "8673913b-1a88-42d7-8578-0449123d94b0",
"s3_uri": "s3://retrofit-data-dev/ara_raw_inputs/calico/missinguprn.csv",
"task_id": "ea1a20b2-a21f-464c-9999-25f684405b69",
"sub_task_id": "747c99df-a1ff-41a0-98c2-5265d7797f90",
"s3_uri": "s3://retrofit-data-dev/ara_raw_outputs/hyde/output1(Sheet2).csv",
"lexiscore_column": "address2uprn_lexiscore",
"lexiscore_threshold": 0.2,
}
)
}

View file

@ -0,0 +1,12 @@
#!/usr/bin/env bash
set -euo pipefail
cd "$(dirname "$0")"
if [ ! -f .env.local ]; then
cp .env.local.example .env.local
echo "Created .env.local from the template — fill it in, then re-run." >&2
exit 1
fi
docker compose build --no-cache
docker compose up --force-recreate

View file

@ -22,6 +22,7 @@ import uuid
import os
import pandas as pd
from tqdm import tqdm
logger: logging.Logger = setup_logger()
@ -152,8 +153,10 @@ def handler(body: dict[str, Any], context: Any, local: bool = False) -> None:
df["ordnance_survey_lexiscore"] = None
# Process each postcode group at a time
for postcode, group in grouped:
print(f"Processing postcode: {postcode} ({len(group)} rows)")
for postcode, group in tqdm(
grouped, total=grouped.ngroups, desc="OS postcodes", unit="postcode"
):
tqdm.write(f"Processing postcode: {postcode} ({len(group)} rows)")
valid_group = AddressMatch.is_valid_postcode(postcode)
if not valid_group:
logger.warning(f"Postcode {postcode} is invalid, skipping")

View file

@ -0,0 +1,2 @@
User Input,Postcode,Expected UPRN
"115 NORTHWALK, CROYDON, SURREY",CR0 9ES,
1 User Input Postcode Expected UPRN
2 115 NORTHWALK, CROYDON, SURREY CR0 9ES

View file

@ -0,0 +1,153 @@
# backend/ordnanceSurvey/tests/test_os_match.py
"""
Debug harness for Ordnance Survey address matching.
Mirrors backend/address2UPRN/tests/test_csv.py, but for the OS Places flow:
for each (User Input, Postcode) case it hits the live OS Places API, scores every
candidate address with AddressMatch.score (exactly as backend/ordnanceSurvey/main.py
does), and prints the full ranked breakdown so you can see *why* a match was or
wasn't found.
Run with -s to see the ranking:
pytest backend/ordnanceSurvey/tests/test_os_match.py -s
Requires ORDNANCE_SURVEY_API_KEY to be set (config Settings / env); skipped otherwise.
"""
import csv
import os
import time
from pathlib import Path
from typing import Any, Optional
import pytest
from backend.ordnanceSurvey.helpers import (
lookup_os_places,
os_places_results_to_dataframe,
)
from backend.utils.addressMatch import AddressMatch
FIXTURE_PATH = Path(__file__).parent / "test_data.csv"
# Be polite to the live OS Places API between cases.
OS_THROTTLE_SECONDS = 1.0
# Handler treats best_score <= 0 as "no match" (see ordnanceSurvey/main.py).
MATCH_THRESHOLD = 0.0
@pytest.fixture(autouse=True)
def _throttle_os_requests():
yield
time.sleep(OS_THROTTLE_SECONDS)
def _api_key() -> Optional[str]:
# Read straight from the environment so the debug harness doesn't depend on
# the full Settings model loading cleanly. Falls back to Settings if unset.
key = os.getenv("ORDNANCE_SURVEY_API_KEY")
if not key:
try:
from backend.app.config import get_settings
key = get_settings().ORDNANCE_SURVEY_API_KEY
except Exception:
key = None
if not key or key == "changeme":
return None
return key
def load_test_cases():
with open(FIXTURE_PATH, newline="", encoding="utf-8") as f:
reader = csv.DictReader(f)
return [
pytest.param(
row["User Input"],
row["Postcode"],
(row.get("Expected UPRN") or "").strip(),
id=f'{row["User Input"]} [{row["Postcode"]}]',
)
for row in reader
]
def _scored_candidates(
user_input: str, postcode: str, api_key: str
) -> list[dict[str, Any]]:
"""
Fetch OS Places candidates for a postcode (bypassing the DB cache) and score
every candidate ADDRESS exactly as ordnanceSurvey/main.py does. Returned
ranked best-first.
"""
response: dict[str, Any] = lookup_os_places(postcode, api_key)
assert response.get("status") == 200, f"OS Places API failed: {response}"
assert "data" in response, f"No data in OS Places response: {response}"
candidates = os_places_results_to_dataframe(response["data"])
records: list[dict[str, Any]] = candidates.to_dict("records") # type: ignore[assignment]
scored: list[dict[str, Any]] = []
for rec in records:
address = str(rec.get("ADDRESS", ""))
scored.append(
{
"uprn": rec.get("UPRN", "?"),
"address": address,
"normalised": AddressMatch.normalise_address(address),
"score": AddressMatch.score(user_input, address),
}
)
scored.sort(key=lambda r: r["score"], reverse=True)
return scored
def _print_debug(
user_input: str, postcode: str, scored: list[dict[str, Any]]
) -> None:
print(f"\n{'=' * 80}")
print(f"User input : {user_input!r}")
print(f"Normalised : {AddressMatch.normalise_address(user_input)!r}")
print(f"Postcode : {postcode!r}")
print(f"Candidates : {len(scored)}")
print(f"{'-' * 80}")
if not scored:
print("(no OS Places candidates returned for this postcode)")
return
for row in scored[:15]:
print(f" score={row['score']:.4f} uprn={row['uprn']}")
print(f" ADDRESS : {row['address']}")
print(f" normalised : {row['normalised']}")
print(f"{'=' * 80}")
@pytest.mark.integration
@pytest.mark.parametrize("user_input,postcode,expected_uprn", load_test_cases())
def test_os_match_finds_candidate(
user_input: str,
postcode: str,
expected_uprn: str,
):
api_key = _api_key()
if api_key is None:
pytest.skip("ORDNANCE_SURVEY_API_KEY not set")
scored = _scored_candidates(user_input, postcode, api_key)
_print_debug(user_input, postcode, scored)
best = scored[0] if scored else None
best_score = float(best["score"]) if best is not None else 0.0
# The handler records a match only when best_score > 0. This assertion is
# the debug signal: when it fails, the printed ranking above shows why.
assert best is not None and best_score > MATCH_THRESHOLD, (
f"No OS match for {user_input!r} @ {postcode!r} "
f"(best_score={best_score:.4f}). See ranking above."
)
if expected_uprn:
assert str(best["uprn"]) == expected_uprn, (
f"Best match UPRN {best['uprn']!r} != expected {expected_uprn!r}"
)

View file

@ -17,6 +17,7 @@ class CoreFiles(Enum):
IMPROVEMENT_OPTION_EVALUATION = "Improvement Option Evaluation"
MEDIUM_TERM_IMPROVEMENT_PLAN = "Medium Term Improvement Plan"
RETROFIT_DESIGN_DOC = "Retrofit Design Doc"
MCS_COMPLIANCE_CERTIFICATE = "MCS Compliance Certificate"
_CORE_FILE_TO_FILE_TYPE: dict[CoreFiles, str] = {
@ -32,14 +33,21 @@ _CORE_FILE_TO_FILE_TYPE: dict[CoreFiles, str] = {
CoreFiles.IMPROVEMENT_OPTION_EVALUATION: FileTypeEnum.IMPROVEMENT_OPTION_EVALUATION.value,
CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN: FileTypeEnum.MEDIUM_TERM_IMPROVEMENT_PLAN.value,
CoreFiles.RETROFIT_DESIGN_DOC: FileTypeEnum.RETROFIT_DESIGN_DOC.value,
CoreFiles.MCS_COMPLIANCE_CERTIFICATE: FileTypeEnum.MCS_COMPLIANCE_CERTIFICATE.value,
}
def get_core_file_type(
filename: str, evidence_category: Optional[str] = None
) -> Optional[CoreFiles]:
# Identify retrofit design doc using evidence category as the name is possibly unreliable.
# Identify MCS certificate and design doc using evidence category as the names are possibly unreliable.
# We might change to always use evidence category, but needs more investigation
if (
evidence_category is not None
and evidence_category.lower() == "mcs compliance certificate"
):
return CoreFiles.MCS_COMPLIANCE_CERTIFICATE
if evidence_category is not None and evidence_category.lower() == "retrofit design":
return CoreFiles.RETROFIT_DESIGN_DOC
@ -56,6 +64,7 @@ def get_core_file_type(
CoreFiles.RETROFIT_DESIGN_DOC,
CoreFiles.IMPROVEMENT_OPTION_EVALUATION,
CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN,
CoreFiles.MCS_COMPLIANCE_CERTIFICATE,
}
for core_file in CoreFiles:
@ -68,8 +77,10 @@ def get_core_file_type(
return None
def get_file_type_string(filename: str) -> Optional[str]:
core_file: Optional[CoreFiles] = get_core_file_type(filename)
def get_file_type_string(
filename: str, evidence_category: Optional[str] = None
) -> Optional[str]:
core_file: Optional[CoreFiles] = get_core_file_type(filename, evidence_category)
if core_file is None:
return None

View file

@ -0,0 +1,63 @@
EVIDENCE_CATEGORIES = [
"Advice report",
"Air Tests - BGV",
"Air Tightness Strategy",
"Assessment report",
"Blue Site Notes (PAS Assessment)",
"Building Assessment report",
"Building Condition report",
"Building Regulations Sign-off",
"Claim of compliance PAS2030",
"Claim of compliance PAS2035",
"Commissioning checklist",
"Condition report",
"Contract / Invoice",
"Electrical Certificate",
"Energy report",
"Evidence of submission to CPS",
"Floor Plan",
"Full Property Assessment",
"Gas Appliance Benchmarking Certificate",
"Gas Appliance Commissioning Checklist",
"Gas Inspection Certificate",
"Handover and Commissioning Documents",
"Handover Documents",
"Handover documents for client",
"Heat Demand Calculations",
"Heritage Impact Assessment",
"Improvement option evaluation",
"Installation Guides",
"Insurance guarantee",
"Intended outcomes",
"MCS Compliance Certificate",
"Medium term improvement plan",
"Medium term low carbon plan",
"Mid Photo",
"Mid-Install Inspection",
"Minor Works Electrical Certificate",
"Monitoring and evaluation outcomes",
"Occupancy assessment",
"Other",
"Other commissioning certificates",
"Photo",
"Post Energy Performance Report (EPR)",
"Post installation RdSAP",
"Post Photo",
"Pre Energy Performance Report (EPR)",
"Pre installation RdSAP",
"Pre Photo",
"Pre-Design Building Survey",
"Pre-Installation Building Inspection",
"Product Data sheets",
"Product warranty",
"Property Assessment",
"Qualifications",
"Retrofit design",
"Risk assessment",
"Significance survey",
"Site Note (Green /Blue) and Certificate(s)",
"Ventilation Assessment",
"Ventilation Assessment Checklist",
"Ventilation Report",
"Welsh - Checklist",
]

View file

@ -40,10 +40,6 @@ def handler(body: Dict[str, Any], context: Any) -> List[str]:
if (not pashub_email) or (not pashub_password):
raise ValueError("Pas Hub credentials not provided")
sharepoint_client = DomnaSharepointClient(
sharepoint_location=DomnaSites.SOCIAL_HOUSING_WAVE_3
)
if coordination_hub_email and coordination_hub_password:
_coord_email, _coord_password = (
coordination_hub_email,
@ -57,6 +53,16 @@ def handler(body: Dict[str, Any], context: Any) -> List[str]:
payload = PashubToAraTriggerRequest.model_validate(body)
logger.debug("Successfully validated request body")
sharepoint_client: Optional[DomnaSharepointClient] = None
if payload.sharepoint_site is not None:
try:
resolved_site = DomnaSites[payload.sharepoint_site]
sharepoint_client = DomnaSharepointClient(sharepoint_location=resolved_site)
except KeyError:
logger.warning(
f"Unrecognised sharepoint_site '{payload.sharepoint_site}'; skipping SharePoint upload"
)
service = PashubService(
pashub_client=get_pashub_client(pashub_email, pashub_password),
sharepoint_client=sharepoint_client,

View file

@ -1,6 +1,6 @@
from collections import defaultdict
import os
from typing import Dict, List, Optional
from typing import Dict, List, NamedTuple, Optional
from datetime import datetime
import requests
@ -13,6 +13,22 @@ from utils.logger import setup_logger
logger = setup_logger()
class DownloadedFile(NamedTuple):
file_path: str
evidence_category: Optional[str]
created_utc: datetime
class _EvidenceFileGroups(NamedTuple):
core: Dict[CoreFiles, EvidenceFileData]
other: List[EvidenceFileData]
class DownloadedFiles(NamedTuple):
core: List[DownloadedFile]
other: List[DownloadedFile]
class UnauthorizedError(Exception):
pass
@ -33,42 +49,60 @@ class PashubClient:
)
logger.info("Finished initialising CotalityClient")
def get_core_evidence_files_by_job_id(self, job_id: str) -> List[str]:
logger.info(f"Getting Core Evidence Files for job ID {job_id}")
def get_evidence_files_by_job_id(
self, job_id: str, include_other: bool = False
) -> DownloadedFiles:
logger.info(f"Getting evidence files for job ID {job_id}")
evidence_list: List[EvidenceFileData] = self._get_evidence_list(job_id)
logger.info(f"Found {len(evidence_list)} Evidence files to get")
logger.info(f"Found {len(evidence_list)} evidence files")
if not evidence_list:
return []
return DownloadedFiles(core=[], other=[])
saved_files: List[str] = []
core_files: Dict[CoreFiles, EvidenceFileData] = self._select_latest_core_files(
grouped: _EvidenceFileGroups = self._group_into_core_and_other_files(
evidence_list
)
logger.info(f"Number of core files to download is {len(core_files)}")
for _, evidence in core_files.items():
evidence_id = evidence.file_id
if not evidence_id:
core_files: List[DownloadedFile] = []
for _, evidence in grouped.core.items():
if not evidence.file_id:
continue
logger.info(f"Getting metadata for file {evidence.file_name}")
metadata: EvidenceMetadata = self._get_evidence_metadata(
job_id, evidence_id
job_id, evidence.file_id
)
download_url: str = self._build_download_url(metadata, evidence.file_id)
output_dir: str = "/tmp"
file_name: str = evidence.file_name
file_path: str = os.path.join(output_dir, file_name)
file_path: str = os.path.join("/tmp", evidence.file_name)
self._download_file(download_url, file_path)
logger.info("Successfully downloaded file")
saved_files.append(file_path)
core_files.append(
DownloadedFile(
file_path=file_path,
evidence_category=evidence.evidence_category,
created_utc=datetime.fromisoformat(evidence.created_utc),
)
)
return saved_files
other_files: List[DownloadedFile] = []
if include_other:
for evidence in grouped.other:
if not evidence.file_id:
continue
metadata = self._get_evidence_metadata(job_id, evidence.file_id)
download_url = self._build_download_url(metadata, evidence.file_id)
file_path = os.path.join("/tmp", evidence.file_name)
self._download_file(download_url, file_path)
logger.info("Successfully downloaded other file")
other_files.append(
DownloadedFile(
file_path=file_path,
evidence_category=evidence.evidence_category,
created_utc=datetime.fromisoformat(evidence.created_utc),
)
)
return DownloadedFiles(core=core_files, other=other_files)
def get_uprn_by_job_id(self, job_id: str) -> Optional[str]:
logger.info(f"Getting UPRN for job ID {job_id}")
@ -92,30 +126,32 @@ class PashubClient:
)
return None
def _select_latest_core_files(
def _group_into_core_and_other_files(
self,
files: List[EvidenceFileData],
) -> Dict[CoreFiles, EvidenceFileData]:
) -> _EvidenceFileGroups:
grouped: Dict[CoreFiles, List[EvidenceFileData]] = defaultdict(list)
other: List[EvidenceFileData] = []
for file in files:
core_type: Optional[CoreFiles] = get_core_file_type(
file.file_name, file.evidence_category
)
if not core_type:
other.append(file)
continue
grouped[core_type].append(file)
latest_files: Dict[CoreFiles, EvidenceFileData] = {}
latest_core_files: Dict[CoreFiles, EvidenceFileData] = {}
for core_type, group in grouped.items():
if core_type == CoreFiles.RETROFIT_DESIGN_DOC and len(group) > 1:
osm_candidates = [f for f in group if "-OSM-" in f.file_name]
group = osm_candidates if osm_candidates else group
latest = max(group, key=lambda f: datetime.fromisoformat(f.created_utc))
latest_files[core_type] = latest
latest_core_files[core_type] = latest
return latest_files
return _EvidenceFileGroups(core=latest_core_files, other=other)
def _get_evidence_list(self, job_id: str) -> List[EvidenceFileData]:
url = f"{self.base}/jobs/{job_id}/evidence"

View file

@ -11,11 +11,15 @@ from backend.app.db.models.uploaded_file import (
from backend.documents_parser.db_writer import save_epc_property_data
from backend.documents_parser.parser import parse_site_notes_pdf
from backend.pashub_fetcher.core_files import get_file_type_string
from backend.pashub_fetcher.pashub_client import PashubClient, UnauthorizedError
from backend.pashub_fetcher.pashub_client import (
DownloadedFile,
DownloadedFiles,
PashubClient,
UnauthorizedError,
)
from backend.pashub_fetcher.pashub_to_ara_trigger_request import (
PashubToAraTriggerRequest,
)
from backend.pashub_fetcher.sharepoint_subfolders import SharepointSubfolders
from datatypes.epc.domain.epc_property_data import EpcPropertyData
from utils.logger import setup_logger
from utils.s3 import upload_file_to_s3
@ -34,7 +38,7 @@ class PashubService:
def __init__(
self,
pashub_client: PashubClient,
sharepoint_client: DomnaSharepointClient,
sharepoint_client: Optional[DomnaSharepointClient],
s3_bucket: str,
coordination_client_factory: Optional[Callable[[], PashubClient]] = None,
) -> None:
@ -75,14 +79,16 @@ class PashubService:
logger.info(f"No UPRN found for job {job_id}")
try:
job_files: List[str] = active_client.get_core_evidence_files_by_job_id(
job_id
downloaded: DownloadedFiles = active_client.get_evidence_files_by_job_id(
job_id, include_other=request.get_other_files
)
except UnauthorizedError:
if active_client is not self._pashub_client:
raise
active_client = self._get_coordination_client()
job_files = active_client.get_core_evidence_files_by_job_id(job_id)
downloaded: DownloadedFiles = active_client.get_evidence_files_by_job_id(
job_id, include_other=request.get_other_files
)
if uprn or hubspot_deal_id:
logger.info("Uploading files to s3")
@ -92,29 +98,47 @@ class PashubService:
else FileSourceEnum.COORDINATION_HUB
)
upload_records = self._upload_to_s3_and_update_db(
job_files, uprn, hubspot_deal_id, file_source
downloaded.core, uprn, hubspot_deal_id, file_source
)
self._save_site_notes(upload_records)
# SharePoint upload disabled: pashub sharepoint_link is inconsistent
# (points to property or project unpredictably)
# if request.sharepoint_link:
# self._upload_to_sharepoint(request.sharepoint_link, job_files)
if downloaded.other:
self._upload_to_s3_and_update_db(
downloaded.other,
uprn,
hubspot_deal_id,
file_source,
default_file_type=FileTypeEnum.OTHER.value,
)
for file_path in job_files:
if self._sharepoint_client and request.sharepoint_link and request.address:
folder_name = request.address.split("|")[0].strip()
folders = self._sharepoint_client.get_folders_in_path(request.sharepoint_link)
match = next(
(f["name"] for f in folders.get("value", []) if f["name"].lower() == folder_name.lower()),
None,
)
if match is None:
logger.warning(f"SharePoint folder not found for '{folder_name}' in {request.sharepoint_link}")
else:
property_folder_path = f"{request.sharepoint_link}/{match}"
self._upload_to_sharepoint(property_folder_path, downloaded.core + downloaded.other)
for df in downloaded.core + downloaded.other:
try:
os.remove(file_path)
os.remove(df.file_path)
except OSError:
logger.warning(f"Failed to delete temp file {file_path}")
logger.warning(f"Failed to delete temp file {df.file_path}")
return job_files
return [df.file_path for df in downloaded.core + downloaded.other]
def _upload_to_s3_and_update_db(
self,
job_files: List[str],
job_files: List[DownloadedFile],
uprn: Optional[str],
hubspot_deal_id: Optional[str],
file_source: FileSourceEnum,
default_file_type: Optional[str] = None,
) -> List[_FileUploadRecord]:
if not uprn and not hubspot_deal_id:
return []
@ -128,11 +152,11 @@ class PashubService:
file_paths: List[str] = []
uploaded_files: List[UploadedFile] = []
for file_path in job_files:
filename = os.path.basename(file_path)
for df in job_files:
filename = os.path.basename(df.file_path)
file_key = f"{base_path}/{filename}"
upload_file_to_s3(file_path, self._s3_bucket, file_key)
upload_file_to_s3(df.file_path, self._s3_bucket, file_key)
uploaded_file = UploadedFile(
s3_file_bucket=self._s3_bucket,
@ -141,9 +165,9 @@ class PashubService:
uprn=int(uprn) if uprn else None,
hubspot_deal_id=hubspot_deal_id,
file_source=file_source.value,
file_type=get_file_type_string(filename),
file_type=get_file_type_string(filename, df.evidence_category) or default_file_type,
)
file_paths.append(file_path)
file_paths.append(df.file_path)
uploaded_files.append(uploaded_file)
with db_session() as session:
@ -180,11 +204,13 @@ class PashubService:
def _upload_to_sharepoint(
self,
sharepoint_link: str,
job_files: List[str],
property_folder_path: str,
files: List[DownloadedFile],
) -> None:
assessment_path = f"{sharepoint_link}/{SharepointSubfolders.ASSESSMENT.value}"
for file_path in job_files:
filename = file_path.split("/")[-1]
self._sharepoint_client.upload_file(file_path, assessment_path, filename)
assert self._sharepoint_client is not None
for df in files:
filename = os.path.basename(df.file_path)
try:
self._sharepoint_client.upload_file(df.file_path, property_folder_path, filename)
except Exception:
logger.warning(f"Failed to upload {filename} to SharePoint", exc_info=True)

View file

@ -14,6 +14,10 @@ class PashubToAraTriggerRequest(BaseModel):
hubspot_listing_id: Optional[int] = None
hubspot_deal_id: Optional[str] = None
sharepoint_site: Optional[str] = None
get_other_files: bool = False
@property
def pashub_job_id(self) -> str:
match = re.search(r"/jobs/([^/]+)", self.pashub_link)

View file

@ -183,3 +183,44 @@ def test_core_file_for_osm_fallback_does_not_fire_when_evidence_category_present
# Assert
assert result is None
def test_core_file_for_mcs_compliance_certificate_returns_mcs_compliance_certificate() -> None:
# Arrange
filename = "MCS_cert_job123.pdf"
# Act
result = get_core_file_type(
filename, evidence_category="mcs compliance certificate"
)
# Assert
assert result == CoreFiles.MCS_COMPLIANCE_CERTIFICATE
def test_core_file_for_mcs_compliance_certificate_is_case_insensitive() -> None:
# Arrange
filename = "some_cert.pdf"
# Act
result = get_core_file_type(
filename, evidence_category="MCS Compliance Certificate"
)
# Assert
assert result == CoreFiles.MCS_COMPLIANCE_CERTIFICATE
def test_get_file_type_string_with_mcs_evidence_category_returns_mcs_compliance_certificate() -> (
None
):
# Arrange
filename = "some_cert.pdf"
# Act
result = get_file_type_string(
filename, evidence_category="MCS Compliance Certificate"
)
# Assert
assert result == "mcs_compliance_certificate"

View file

@ -1,9 +1,22 @@
# pyright: reportPrivateUsage=false
from typing import Optional
from unittest.mock import patch
from backend.pashub_fetcher.core_files import CoreFiles
from backend.pashub_fetcher.evidence_file_data import EvidenceFileData
from backend.pashub_fetcher.pashub_client import PashubClient
from backend.pashub_fetcher.evidence_metadata import EvidenceMetadata
from backend.pashub_fetcher.pashub_client import (
DownloadedFile,
DownloadedFiles,
PashubClient,
)
def make_metadata() -> EvidenceMetadata:
return EvidenceMetadata(
container_name="my-container",
blob_uri="https://storage.example.com/blob?sas=token",
)
def make_client() -> PashubClient:
@ -26,11 +39,27 @@ def make_file(
# ---------------------------------------------------------------------------
# _select_latest_core_files
# _group_into_core_and_other_files
# ---------------------------------------------------------------------------
def test_select_latest_core_files_returns_single_retrofit_design_doc() -> None:
def test_group_into_core_and_other_files_classifies_core_and_other_correctly() -> None:
# Arrange
client = make_client()
files = [
make_file(file_name="SiteNote_001.pdf"),
make_file(file_name="some_unknown_document.pdf"),
]
# Act
result = client._group_into_core_and_other_files(files)
# Assert
assert CoreFiles.SITENOTE in result.core
assert [f.file_name for f in result.other] == ["some_unknown_document.pdf"]
def test_group_into_core_and_other_files_returns_single_retrofit_design_doc() -> None:
# Arrange
client = make_client()
files = [
@ -42,13 +71,16 @@ def test_select_latest_core_files_returns_single_retrofit_design_doc() -> None:
]
# Act
result = client._select_latest_core_files(files)
result = client._group_into_core_and_other_files(files)
# Assert
assert result[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf"
assert (
result.core[CoreFiles.RETROFIT_DESIGN_DOC].file_name
== "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf"
)
def test_select_latest_core_files_osm_candidate_wins_over_non_osm() -> None:
def test_group_into_core_and_other_files_osm_candidate_wins_over_non_osm() -> None:
# Arrange - the non-OSM file is newer but should lose to the OSM file
client = make_client()
files = [
@ -65,13 +97,18 @@ def test_select_latest_core_files_osm_candidate_wins_over_non_osm() -> None:
]
# Act
result = client._select_latest_core_files(files)
result = client._group_into_core_and_other_files(files)
# Assert
assert result[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf"
assert (
result.core[CoreFiles.RETROFIT_DESIGN_DOC].file_name
== "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf"
)
def test_select_latest_core_files_picks_latest_when_both_candidates_have_osm() -> None:
def test_group_into_core_and_other_files_picks_latest_when_both_candidates_have_osm() -> (
None
):
# Arrange
client = make_client()
files = [
@ -88,13 +125,62 @@ def test_select_latest_core_files_picks_latest_when_both_candidates_have_osm() -
]
# Act
result = client._select_latest_core_files(files)
result = client._group_into_core_and_other_files(files)
# Assert
assert result[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "2603-OSM-B06M901-XX-DR-N-A_Alvaston Walk 022.pdf"
assert (
result.core[CoreFiles.RETROFIT_DESIGN_DOC].file_name
== "2603-OSM-B06M901-XX-DR-N-A_Alvaston Walk 022.pdf"
)
def test_select_latest_core_files_falls_back_to_latest_when_no_osm_candidates() -> None:
def test_group_into_core_and_other_files_classifies_mcs_cert_as_core() -> None:
# Arrange
client = make_client()
files = [
make_file(
file_name="MCS_cert_job123.pdf",
evidence_category="MCS Compliance Certificate",
),
]
# Act
result = client._group_into_core_and_other_files(files)
# Assert
assert CoreFiles.MCS_COMPLIANCE_CERTIFICATE in result.core
assert result.other == []
def test_group_into_core_and_other_files_picks_most_recent_mcs_cert() -> None:
# Arrange
client = make_client()
files = [
make_file(
file_name="mcs_cert_old.pdf",
evidence_category="MCS Compliance Certificate",
created_utc="2024-01-01T00:00:00",
),
make_file(
file_name="mcs_cert_new.pdf",
evidence_category="MCS Compliance Certificate",
created_utc="2024-06-01T00:00:00",
),
]
# Act
result = client._group_into_core_and_other_files(files)
# Assert
assert (
result.core[CoreFiles.MCS_COMPLIANCE_CERTIFICATE].file_name
== "mcs_cert_new.pdf"
)
def test_group_into_core_and_other_files_falls_back_to_latest_when_no_osm_candidates() -> (
None
):
# Arrange
client = make_client()
files = [
@ -111,7 +197,84 @@ def test_select_latest_core_files_falls_back_to_latest_when_no_osm_candidates()
]
# Act
result = client._select_latest_core_files(files)
result = client._group_into_core_and_other_files(files)
# Assert
assert result[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "retrofit_design_v2.pdf"
assert (
result.core[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "retrofit_design_v2.pdf"
)
# ---------------------------------------------------------------------------
# get_evidence_files_by_job_id
# ---------------------------------------------------------------------------
def test_get_evidence_files_by_job_id_returns_downloaded_files_with_empty_other_when_include_other_false() -> (
None
):
# Arrange
client = make_client()
files = [
make_file(file_name="SiteNote_001.pdf"),
make_file(file_name="unknown_doc.pdf"),
]
# Act
with (
patch.object(client, "_get_evidence_list", return_value=files),
patch.object(client, "_get_evidence_metadata", return_value=make_metadata()),
patch.object(client, "_download_file"),
):
result = client.get_evidence_files_by_job_id("job-1", include_other=False)
# Assert
assert isinstance(result, DownloadedFiles)
assert [df.file_path for df in result.core] == ["/tmp/SiteNote_001.pdf"]
assert result.other == []
def test_get_evidence_files_by_job_id_core_files_carry_evidence_category() -> None:
# Arrange
client = make_client()
files = [
make_file(
file_name="MCS_cert.pdf",
evidence_category="MCS Compliance Certificate",
),
]
# Act
with (
patch.object(client, "_get_evidence_list", return_value=files),
patch.object(client, "_get_evidence_metadata", return_value=make_metadata()),
patch.object(client, "_download_file"),
):
result = client.get_evidence_files_by_job_id("job-1", include_other=False)
# Assert
assert len(result.core) == 1
assert result.core[0].evidence_category == "MCS Compliance Certificate"
def test_get_evidence_files_by_job_id_downloads_other_files_when_include_other_true() -> (
None
):
# Arrange
client = make_client()
files = [
make_file(file_name="SiteNote_001.pdf"),
make_file(file_name="unknown_doc.pdf"),
]
# Act
with (
patch.object(client, "_get_evidence_list", return_value=files),
patch.object(client, "_get_evidence_metadata", return_value=make_metadata()),
patch.object(client, "_download_file"),
):
result = client.get_evidence_files_by_job_id("job-1", include_other=True)
# Assert
assert [df.file_path for df in result.core] == ["/tmp/SiteNote_001.pdf"]
assert [df.file_path for df in result.other] == ["/tmp/unknown_doc.pdf"]

View file

@ -1,17 +1,22 @@
import pytest
from datetime import datetime
from typing import Any, Callable, Optional
from unittest.mock import MagicMock, call, patch
from backend.app.db.models.uploaded_file import FileSourceEnum
from backend.pashub_fetcher.pashub_client import PashubClient, UnauthorizedError
from backend.app.db.models.uploaded_file import FileSourceEnum, FileTypeEnum
from backend.pashub_fetcher.pashub_client import (
DownloadedFile,
DownloadedFiles,
PashubClient,
UnauthorizedError,
)
from backend.pashub_fetcher.pashub_service import PashubService
from backend.pashub_fetcher.pashub_to_ara_trigger_request import (
PashubToAraTriggerRequest,
)
from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient
FAKE_JOB_LINK = "https://pashub.net/jobs/job-id-123/details"
@ -20,12 +25,16 @@ def make_request(
uprn: Optional[str] = None,
hubspot_deal_id: Optional[str] = None,
sharepoint_link: Optional[str] = None,
get_other_files: bool = False,
address: Optional[str] = None,
) -> PashubToAraTriggerRequest:
return PashubToAraTriggerRequest(
pashub_link=pashub_link,
uprn=uprn,
hubspot_deal_id=hubspot_deal_id,
sharepoint_link=sharepoint_link,
get_other_files=get_other_files,
address=address,
)
@ -43,6 +52,16 @@ def make_service(
)
_DEFAULT_UTC = datetime(2024, 1, 1)
def make_downloaded(core: list[str], other: list[str] = []) -> DownloadedFiles:
return DownloadedFiles(
core=[DownloadedFile(fp, None, _DEFAULT_UTC) for fp in core],
other=[DownloadedFile(fp, None, _DEFAULT_UTC) for fp in other],
)
# ---------------------------------------------------------------------------
# run(): returns file paths
# ---------------------------------------------------------------------------
@ -51,10 +70,9 @@ def make_service(
def test_run_returns_file_paths() -> None:
mock_client = MagicMock(spec=PashubClient)
mock_client.get_uprn_by_job_id.return_value = None
mock_client.get_core_evidence_files_by_job_id.return_value = [
"/tmp/a.pdf",
"/tmp/b.pdf",
]
mock_client.get_evidence_files_by_job_id.return_value = make_downloaded(
core=["/tmp/a.pdf", "/tmp/b.pdf"]
)
service = make_service(pashub_client=mock_client)
@ -64,6 +82,30 @@ def test_run_returns_file_paths() -> None:
assert result == ["/tmp/a.pdf", "/tmp/b.pdf"]
# ---------------------------------------------------------------------------
# run(): returns core + other file paths when get_other_files=True
# ---------------------------------------------------------------------------
def test_run_returns_core_and_other_file_paths() -> None:
# Arrange
mock_client = MagicMock(spec=PashubClient)
mock_client.get_uprn_by_job_id.return_value = None
mock_client.get_evidence_files_by_job_id.return_value = make_downloaded(
core=["/tmp/core.pdf"],
other=["/tmp/other.pdf"],
)
service = make_service(pashub_client=mock_client)
# Act
with patch("backend.pashub_fetcher.pashub_service.os.remove"):
result = service.run(make_request(get_other_files=True))
# Assert
assert result == ["/tmp/core.pdf", "/tmp/other.pdf"]
# ---------------------------------------------------------------------------
# run(): skips upload when neither uprn nor hubspot_deal_id
# ---------------------------------------------------------------------------
@ -72,7 +114,9 @@ def test_run_returns_file_paths() -> None:
def test_run_skips_upload_when_no_uprn_and_no_deal_id() -> None:
mock_client = MagicMock(spec=PashubClient)
mock_client.get_uprn_by_job_id.return_value = None
mock_client.get_core_evidence_files_by_job_id.return_value = ["/tmp/a.pdf"]
mock_client.get_evidence_files_by_job_id.return_value = make_downloaded(
core=["/tmp/a.pdf"]
)
service = make_service(pashub_client=mock_client)
@ -93,10 +137,9 @@ def test_run_skips_upload_when_no_uprn_and_no_deal_id() -> None:
def test_run_uploads_files_to_s3_using_uprn_path() -> None:
mock_client = MagicMock(spec=PashubClient)
mock_client.get_uprn_by_job_id.return_value = None
mock_client.get_core_evidence_files_by_job_id.return_value = [
"/tmp/SiteNote_001.pdf",
"/tmp/Photopack_002.pdf",
]
mock_client.get_evidence_files_by_job_id.return_value = make_downloaded(
core=["/tmp/SiteNote_001.pdf", "/tmp/Photopack_002.pdf"]
)
service = make_service(pashub_client=mock_client, s3_bucket="my-bucket")
@ -132,9 +175,9 @@ def test_run_uploads_files_to_s3_using_uprn_path() -> None:
def test_run_persists_uploaded_file_records_to_db() -> None:
mock_client = MagicMock(spec=PashubClient)
mock_client.get_uprn_by_job_id.return_value = None
mock_client.get_core_evidence_files_by_job_id.return_value = [
"/tmp/SiteNote_001.pdf"
]
mock_client.get_evidence_files_by_job_id.return_value = make_downloaded(
core=["/tmp/SiteNote_001.pdf"]
)
fake_session = MagicMock()
service = make_service(pashub_client=mock_client)
@ -163,9 +206,9 @@ def test_run_persists_uploaded_file_records_to_db() -> None:
def test_run_uses_hubspot_deal_id_path_when_no_uprn() -> None:
mock_client = MagicMock(spec=PashubClient)
mock_client.get_uprn_by_job_id.return_value = None
mock_client.get_core_evidence_files_by_job_id.return_value = [
"/tmp/SiteNote_001.pdf"
]
mock_client.get_evidence_files_by_job_id.return_value = make_downloaded(
core=["/tmp/SiteNote_001.pdf"]
)
service = make_service(pashub_client=mock_client, s3_bucket="my-bucket")
@ -191,9 +234,9 @@ def test_run_uses_hubspot_deal_id_path_when_no_uprn() -> None:
def test_run_parses_and_saves_site_notes_for_rd_sap_site_note_file() -> None:
mock_client = MagicMock(spec=PashubClient)
mock_client.get_uprn_by_job_id.return_value = None
mock_client.get_core_evidence_files_by_job_id.return_value = [
"/tmp/RdSAP_SiteNote_001.pdf"
]
mock_client.get_evidence_files_by_job_id.return_value = make_downloaded(
core=["/tmp/RdSAP_SiteNote_001.pdf"]
)
fake_epc_data = MagicMock()
fake_session = MagicMock()
@ -241,11 +284,15 @@ def test_run_uses_coordination_client_when_pas_401_on_uprn_lookup() -> None:
coord_client = MagicMock(spec=PashubClient)
coord_client.get_uprn_by_job_id.return_value = "99999"
coord_client.get_core_evidence_files_by_job_id.return_value = ["/tmp/a.pdf"]
coord_client.get_evidence_files_by_job_id.return_value = make_downloaded(
core=["/tmp/a.pdf"]
)
factory = MagicMock(return_value=coord_client)
service = make_service(pashub_client=pas_client, coordination_client_factory=factory)
service = make_service(
pashub_client=pas_client, coordination_client_factory=factory
)
with (
patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3"),
@ -256,20 +303,24 @@ def test_run_uses_coordination_client_when_pas_401_on_uprn_lookup() -> None:
assert result == ["/tmp/a.pdf"]
coord_client.get_uprn_by_job_id.assert_called_once()
coord_client.get_core_evidence_files_by_job_id.assert_called_once()
coord_client.get_evidence_files_by_job_id.assert_called_once()
assert factory.call_count == 1
def test_run_uses_coordination_client_when_pas_401_on_file_listing() -> None:
pas_client = MagicMock(spec=PashubClient)
pas_client.get_core_evidence_files_by_job_id.side_effect = UnauthorizedError()
pas_client.get_evidence_files_by_job_id.side_effect = UnauthorizedError()
coord_client = MagicMock(spec=PashubClient)
coord_client.get_core_evidence_files_by_job_id.return_value = ["/tmp/a.pdf"]
coord_client.get_evidence_files_by_job_id.return_value = make_downloaded(
core=["/tmp/a.pdf"]
)
factory = MagicMock(return_value=coord_client)
service = make_service(pashub_client=pas_client, coordination_client_factory=factory)
service = make_service(
pashub_client=pas_client, coordination_client_factory=factory
)
with (
patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3"),
@ -279,7 +330,7 @@ def test_run_uses_coordination_client_when_pas_401_on_file_listing() -> None:
result = service.run(make_request(uprn="12345"))
assert result == ["/tmp/a.pdf"]
coord_client.get_core_evidence_files_by_job_id.assert_called_once()
coord_client.get_evidence_files_by_job_id.assert_called_once()
pas_client.get_uprn_by_job_id.assert_not_called()
@ -302,24 +353,32 @@ def test_run_raises_unauthorized_when_both_clients_401() -> None:
factory = MagicMock(return_value=coord_client)
service = make_service(pashub_client=pas_client, coordination_client_factory=factory)
service = make_service(
pashub_client=pas_client, coordination_client_factory=factory
)
with pytest.raises(UnauthorizedError):
service.run(make_request())
def test_run_persists_coordination_hub_file_source_when_pas_401_on_uprn_lookup() -> None:
def test_run_persists_coordination_hub_file_source_when_pas_401_on_uprn_lookup() -> (
None
):
pas_client = MagicMock(spec=PashubClient)
pas_client.get_uprn_by_job_id.side_effect = UnauthorizedError()
coord_client = MagicMock(spec=PashubClient)
coord_client.get_uprn_by_job_id.return_value = "99999"
coord_client.get_core_evidence_files_by_job_id.return_value = ["/tmp/a.pdf"]
coord_client.get_evidence_files_by_job_id.return_value = make_downloaded(
core=["/tmp/a.pdf"]
)
factory = MagicMock(return_value=coord_client)
fake_session = MagicMock()
service = make_service(pashub_client=pas_client, coordination_client_factory=factory)
service = make_service(
pashub_client=pas_client, coordination_client_factory=factory
)
with (
patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3"),
@ -334,17 +393,23 @@ def test_run_persists_coordination_hub_file_source_when_pas_401_on_uprn_lookup()
assert added[0].file_source == FileSourceEnum.COORDINATION_HUB.value
def test_run_persists_coordination_hub_file_source_when_pas_401_on_file_listing() -> None:
def test_run_persists_coordination_hub_file_source_when_pas_401_on_file_listing() -> (
None
):
pas_client = MagicMock(spec=PashubClient)
pas_client.get_core_evidence_files_by_job_id.side_effect = UnauthorizedError()
pas_client.get_evidence_files_by_job_id.side_effect = UnauthorizedError()
coord_client = MagicMock(spec=PashubClient)
coord_client.get_core_evidence_files_by_job_id.return_value = ["/tmp/a.pdf"]
coord_client.get_evidence_files_by_job_id.return_value = make_downloaded(
core=["/tmp/a.pdf"]
)
factory = MagicMock(return_value=coord_client)
fake_session = MagicMock()
service = make_service(pashub_client=pas_client, coordination_client_factory=factory)
service = make_service(
pashub_client=pas_client, coordination_client_factory=factory
)
with (
patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3"),
@ -359,12 +424,231 @@ def test_run_persists_coordination_hub_file_source_when_pas_401_on_file_listing(
assert added[0].file_source == FileSourceEnum.COORDINATION_HUB.value
# ---------------------------------------------------------------------------
# run(): get_other_files=True → other temp files deleted after run
# ---------------------------------------------------------------------------
def test_run_deletes_other_temp_files_when_get_other_files_true() -> None:
# Arrange
mock_client = MagicMock(spec=PashubClient)
mock_client.get_uprn_by_job_id.return_value = None
mock_client.get_evidence_files_by_job_id.return_value = make_downloaded(
core=["/tmp/core.pdf"],
other=["/tmp/other.pdf"],
)
service = make_service(pashub_client=mock_client)
# Act
with patch("backend.pashub_fetcher.pashub_service.os.remove") as mock_remove:
service.run(make_request(get_other_files=True))
# Assert
mock_remove.assert_any_call("/tmp/core.pdf")
mock_remove.assert_any_call("/tmp/other.pdf")
# ---------------------------------------------------------------------------
# run(): get_other_files=True → other files uploaded to S3
# ---------------------------------------------------------------------------
def test_run_uploads_other_files_to_s3_when_get_other_files_true() -> None:
# Arrange
mock_client = MagicMock(spec=PashubClient)
mock_client.get_uprn_by_job_id.return_value = None
mock_client.get_evidence_files_by_job_id.return_value = make_downloaded(
core=["/tmp/SiteNote_001.pdf"],
other=["/tmp/unknown_file.pdf"],
)
service = make_service(pashub_client=mock_client, s3_bucket="my-bucket")
# Act
with (
patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3") as mock_s3,
patch("backend.pashub_fetcher.pashub_service.db_session"),
patch("backend.pashub_fetcher.pashub_service.os.remove"),
):
service.run(make_request(uprn="12345", get_other_files=True))
# Assert
mock_s3.assert_any_call(
"/tmp/unknown_file.pdf",
"my-bucket",
"documents/uprn/12345/unknown_file.pdf",
)
# ---------------------------------------------------------------------------
# run(): get_other_files=True → other files persisted with file_type OTHER
# ---------------------------------------------------------------------------
def test_run_persists_other_files_with_other_file_type() -> None:
# Arrange
mock_client = MagicMock(spec=PashubClient)
mock_client.get_uprn_by_job_id.return_value = None
mock_client.get_evidence_files_by_job_id.return_value = make_downloaded(
core=[],
other=["/tmp/unknown_file.pdf"],
)
fake_session = MagicMock()
service = make_service(pashub_client=mock_client)
# Act
with (
patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3"),
patch("backend.pashub_fetcher.pashub_service.db_session") as mock_db,
patch("backend.pashub_fetcher.pashub_service.os.remove"),
):
mock_db.return_value.__enter__.return_value = fake_session
service.run(make_request(uprn="12345", get_other_files=True))
# Assert
all_added = [item for c in fake_session.add_all.call_args_list for item in c[0][0]]
assert len(all_added) == 1
assert all_added[0].file_type == FileTypeEnum.OTHER.value
def test_run_persists_mcs_cert_with_mcs_compliance_certificate_file_type() -> None:
# Arrange
mock_client = MagicMock(spec=PashubClient)
mock_client.get_uprn_by_job_id.return_value = None
mock_client.get_evidence_files_by_job_id.return_value = DownloadedFiles(
core=[
DownloadedFile(
"/tmp/MCS_cert.pdf", "MCS Compliance Certificate", datetime(2024, 1, 1)
)
],
other=[],
)
fake_session = MagicMock()
service = make_service(pashub_client=mock_client)
# Act
with (
patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3"),
patch("backend.pashub_fetcher.pashub_service.db_session") as mock_db,
patch("backend.pashub_fetcher.pashub_service.os.remove"),
):
mock_db.return_value.__enter__.return_value = fake_session
service.run(make_request(uprn="12345"))
# Assert
fake_session.add_all.assert_called_once()
added: list[Any] = fake_session.add_all.call_args[0][0]
assert added[0].file_type == FileTypeEnum.MCS_COMPLIANCE_CERTIFICATE.value
# ---------------------------------------------------------------------------
# run(): SharePoint upload
# ---------------------------------------------------------------------------
def test_sharepoint_uploads_all_files_to_property_folder() -> None:
# Arrange
mock_client = MagicMock(spec=PashubClient)
mock_client.get_uprn_by_job_id.return_value = None
mock_client.get_evidence_files_by_job_id.return_value = make_downloaded(
core=["/tmp/core.pdf"],
other=["/tmp/other.pdf"],
)
mock_sharepoint = MagicMock(spec=DomnaSharepointClient)
mock_sharepoint.get_folders_in_path.return_value = {
"value": [{"name": "123 Main St"}]
}
service = make_service(pashub_client=mock_client, sharepoint_client=mock_sharepoint)
# Act
with patch("backend.pashub_fetcher.pashub_service.os.remove"):
service.run(
make_request(
sharepoint_link="Retrofit/Properties",
get_other_files=True,
address="123 Main St | some deal",
)
)
# Assert
mock_sharepoint.upload_file.assert_any_call(
"/tmp/core.pdf", "Retrofit/Properties/123 Main St", "core.pdf"
)
mock_sharepoint.upload_file.assert_any_call(
"/tmp/other.pdf", "Retrofit/Properties/123 Main St", "other.pdf"
)
def test_sharepoint_skips_upload_when_folder_not_found() -> None:
# Arrange
mock_client = MagicMock(spec=PashubClient)
mock_client.get_uprn_by_job_id.return_value = None
mock_client.get_evidence_files_by_job_id.return_value = make_downloaded(
core=["/tmp/core.pdf"]
)
mock_sharepoint = MagicMock(spec=DomnaSharepointClient)
mock_sharepoint.get_folders_in_path.return_value = {
"value": [{"name": "Different Property"}]
}
service = make_service(pashub_client=mock_client, sharepoint_client=mock_sharepoint)
# Act
with (
patch("backend.pashub_fetcher.pashub_service.os.remove"),
patch("backend.pashub_fetcher.pashub_service.logger") as mock_logger,
):
service.run(
make_request(
sharepoint_link="Retrofit/Properties",
address="No Such Property | deal",
)
)
# Assert
mock_sharepoint.upload_file.assert_not_called()
mock_logger.warning.assert_called()
def test_sharepoint_skips_upload_when_sharepoint_client_is_none() -> None:
# Arrange
mock_client = MagicMock(spec=PashubClient)
mock_client.get_uprn_by_job_id.return_value = None
mock_client.get_evidence_files_by_job_id.return_value = make_downloaded(
core=["/tmp/core.pdf"]
)
service = PashubService(
pashub_client=mock_client,
sharepoint_client=None,
s3_bucket="test-bucket",
)
# Act — should not raise AttributeError on None._sharepoint_client
with patch("backend.pashub_fetcher.pashub_service.os.remove"):
result = service.run(
make_request(
sharepoint_link="Retrofit/Properties",
address="123 Main St | deal",
)
)
# Assert
assert result == ["/tmp/core.pdf"]
def test_run_warns_and_continues_when_site_notes_parsing_fails() -> None:
mock_client = MagicMock(spec=PashubClient)
mock_client.get_uprn_by_job_id.return_value = None
mock_client.get_core_evidence_files_by_job_id.return_value = [
"/tmp/RdSAP_SiteNote_001.pdf"
]
mock_client.get_evidence_files_by_job_id.return_value = make_downloaded(
core=["/tmp/RdSAP_SiteNote_001.pdf"]
)
service = make_service(pashub_client=mock_client)

View file

@ -0,0 +1,50 @@
from unittest.mock import MagicMock, patch
from backend.pashub_fetcher.token_getter import get_token_from_local_storage
def _configure_playwright_mock(mock_sync_playwright: MagicMock) -> None:
mock_page = MagicMock()
mock_page.url = "https://pashub.net/dashboard"
mock_page.evaluate.return_value = "fake-token"
mock_context = MagicMock()
mock_context.new_page.return_value = mock_page
mock_browser = MagicMock()
mock_browser.new_context.return_value = mock_context
mock_p = MagicMock()
mock_p.chromium.launch.return_value = mock_browser
mock_sync_playwright.return_value.__enter__.return_value = mock_p
@patch("backend.pashub_fetcher.token_getter.shutil.rmtree")
@patch("backend.pashub_fetcher.token_getter.glob.glob")
@patch("backend.pashub_fetcher.token_getter.sync_playwright")
def test_playwright_tmp_dirs_are_cleaned_up_after_browser_close(
mock_sync_playwright: MagicMock,
mock_glob: MagicMock,
mock_rmtree: MagicMock,
) -> None:
# Arrange
fake_artifacts = ["/tmp/playwright-artifacts-abc12"]
fake_profiles = ["/tmp/playwright_chromiumdev_profile-xyz99"]
def glob_side_effect(pattern: str) -> list[str]:
if "playwright-artifacts-*" in pattern:
return fake_artifacts
if "playwright_chromiumdev_profile-*" in pattern:
return fake_profiles
return []
mock_glob.side_effect = glob_side_effect
_configure_playwright_mock(mock_sync_playwright)
# Act
get_token_from_local_storage("user@example.com", "secret")
# Assert
mock_rmtree.assert_any_call("/tmp/playwright-artifacts-abc12", ignore_errors=True)
mock_rmtree.assert_any_call("/tmp/playwright_chromiumdev_profile-xyz99", ignore_errors=True)

View file

@ -1,4 +1,6 @@
import glob
import os
import shutil
from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeoutError
@ -90,5 +92,12 @@ def get_token_from_local_storage(
context.close()
browser.close()
for pattern in (
"/tmp/playwright-artifacts-*",
"/tmp/playwright_chromiumdev_profile-*",
):
for path in glob.glob(pattern):
shutil.rmtree(path, ignore_errors=True)
if record_video and video_dir:
logger.info(f"Video(s) saved in: {video_dir}")

View file

@ -10,19 +10,19 @@ from backend.pashub_fetcher.pashub_to_ara_trigger_request import (
)
from backend.pashub_fetcher.handler.handler import handler
if __name__ == "__main__":
BASE_DIR = os.path.dirname(os.path.dirname(__file__))
filepath: str = os.path.join(
BASE_DIR,
"pashub_fetcher",
"The_Guinness_Partnership_AtkinsR_alis_Coordination_Design_Board_1774881298.xlsx",
"local_run_02-06-2026",
"ECO_Approach_Coordination_Design_KN.xlsx",
)
wb = load_workbook(filepath, data_only=True)
ws = wb["filtered_2"]
ws = wb["filtered"]
HEADER_ROW = 3
HEADER_ROW = 1
headers: Dict[str, int] = {}
for col in range(1, ws.max_column + 1):
@ -31,7 +31,7 @@ if __name__ == "__main__":
headers[value.strip()] = col
name_col = headers["Name"]
link_col = headers["PasHub Link"]
link_col = headers["PasHub ID"]
hubspot_deal_id_col = headers["HubSpot ID"]
trigger_requests: List[PashubToAraTriggerRequest] = []
@ -50,7 +50,10 @@ if __name__ == "__main__":
trigger_requests.append(
PashubToAraTriggerRequest(
pashub_link=str(link), hubspot_deal_id=str(hubspot_deal_id)
pashub_link=str(link),
hubspot_deal_id=str(hubspot_deal_id),
address=str(name),
get_other_files=True,
)
)

View file

@ -16,40 +16,48 @@ logger: logging.Logger = logging.getLogger(__name__)
DRY_RUN: bool = False
DEAL_ID_FILTER: frozenset[str] = frozenset(
{
"379452094688",
"379466504437",
"379660170452",
"380016925932",
"379848065216",
"379466504434",
"379452094690",
"379965924567",
"380016925923",
"379792072898",
"379654754502",
"379560262861",
"379969670369",
"379248717001",
"379971468493",
"379999888607",
"379606372580",
"379969603797",
"379967743213",
"379263155434",
"379855267025",
"379889899719",
"379071064307",
"379867925741",
}
)
# DEAL_ID_FILTER: frozenset[str] = frozenset(
# {
# "379452094688",
# "379466504437",
# "379660170452",
# "380016925932",
# "379848065216",
# "379466504434",
# "379452094690",
# "379965924567",
# "380016925923",
# "379792072898",
# "379654754502",
# "379560262861",
# "379969670369",
# "379248717001",
# "379971468493",
# "379999888607",
# "379606372580",
# "379969603797",
# "379967743213",
# "379263155434",
# "379855267025",
# "379889899719",
# "379071064307",
# "379867925741",
# }
# )
DEAL_ID_FILTER = None
EXCEL_PATH: str = os.path.join(
os.path.dirname(__file__),
"united-infrastructure-exports-all-deals-2026-05-14.xlsx",
"local_run_02-06-2026/ECO_Approach_Coordination_Design_KN.xlsx",
)
SHAREPOINT_PROPERTIES_FOLDER: str = (
"Housing Associations/- Client Shared Folders/Abri/Abri Property Folders (Full PAS Info)"
)
SHAREPOINT_SITE: str = "ECO"
def _build_requests(excel_path: str) -> list[PashubToAraTriggerRequest]:
wb = load_workbook(excel_path, data_only=True)
@ -61,10 +69,10 @@ def _build_requests(excel_path: str) -> list[PashubToAraTriggerRequest]:
if header_val is not None:
headers[str(header_val).strip()] = col
pashub_col: int = headers["PasHub link"]
record_id_col: int = headers["Record ID"]
deal_name_col: int = headers["Deal Name"]
deal_stage_col: int = headers["Deal Stage"]
pashub_col: int = headers["PasHub ID"]
record_id_col: int = headers["HubSpot ID"]
deal_name_col: int = headers["Name"]
deal_stage_col: Optional[int] = headers.get("Deal Stage", None)
requests: list[PashubToAraTriggerRequest] = []
@ -77,7 +85,9 @@ def _build_requests(excel_path: str) -> list[PashubToAraTriggerRequest]:
record_id_raw = ws.cell(row=row, column=record_id_col).value
deal_name_raw = ws.cell(row=row, column=deal_name_col).value
deal_stage_raw = ws.cell(row=row, column=deal_stage_col).value
deal_stage_raw = (
ws.cell(row=row, column=deal_stage_col).value if deal_stage_col else None
)
hubspot_deal_id: Optional[str] = (
str(record_id_raw) if record_id_raw is not None else None
@ -95,6 +105,9 @@ def _build_requests(excel_path: str) -> list[PashubToAraTriggerRequest]:
hubspot_deal_id=hubspot_deal_id,
address=address,
deal_stage=deal_stage,
sharepoint_link=SHAREPOINT_PROPERTIES_FOLDER or None,
sharepoint_site=SHAREPOINT_SITE,
get_other_files=True,
)
)
@ -116,7 +129,7 @@ def main() -> None:
for request in trigger_requests:
action: str = "DRY RUN" if DRY_RUN else "SENDING"
logger.info(
f"[{action}] deal_id={request.hubspot_deal_id} pashub_link={request.pashub_link}"
f"[{action}] deal_id={request.hubspot_deal_id} pashub_link={request.pashub_link} sharepoint_link={request.sharepoint_link}"
)
if not DRY_RUN:

View file

@ -324,6 +324,11 @@ class SapEnergySource:
photovoltaic_arrays: Optional[List[PhotovoltaicArray]] = None
wind_turbine_details: Optional[WindTurbineDetails] = None
pv_batteries: Optional[PvBatteries] = None
# SAP 10.2 Appendix G4 — a PV diverter present on the dwelling routes
# surplus PV to a hot-water cylinder immersion. Drives worksheet
# (63b)m. Set from the API `sap_energy_source.pv_diverter` flag or the
# Elmhurst Summary §19 "Diverter present" row.
pv_diverter_present: bool = False
@dataclass
@ -754,3 +759,25 @@ class EpcPropertyData:
solar_hw_collector_orientation: Optional[str] = None
solar_hw_collector_pitch_deg: Optional[int] = None
solar_hw_overshading: Optional[str] = None
@property
def system_build(self) -> Optional[bool]:
"""Whether the dwelling's MAIN wall is system-built.
System-built is a WALL TYPE: RdSAP10 `WALL_SYSTEM_BUILT == 6` on
the main wall (the U-value cascade table is keyed on that code).
It happens to share the integer with basement walls so a code-6
main wall is system-built only when it is NOT flagged as a
basement (`main_wall_is_basement`, the dedicated basement signal
the mapper sets from the distinct "SY"/"B" labels or the cert
addendum). Reading the wall type keeps the two concerns separate:
`wall_construction` carries the construction, the basement flag
carries the below-grade attribute. Returns None when there is no
MAIN building part (unknown)."""
for part in self.sap_building_parts:
if part.identifier is BuildingPartIdentifier.MAIN:
return (
part.wall_construction == BASEMENT_WALL_CONSTRUCTION_CODE
and not part.main_wall_is_basement
)
return None

View file

@ -1,10 +1,12 @@
import re
from dataclasses import replace
from datetime import date
from decimal import ROUND_HALF_UP, Decimal
from typing import Any, Dict, Final, List, Optional, Sequence, Union, cast
from datatypes.epc.schema.helpers import from_dict
from datatypes.epc.domain.epc_property_data import (
BASEMENT_WALL_CONSTRUCTION_CODE,
Addendum,
BuildingPartIdentifier,
EnergyElement,
@ -341,6 +343,7 @@ class EpcPropertyDataMapper:
wind_turbines_terrain_type=survey.renewables.wind_turbines_terrain_type,
electricity_smart_meter_present=survey.meters.electricity_smart_meter,
photovoltaic_arrays=_elmhurst_pv_arrays(survey.renewables),
pv_diverter_present=survey.renewables.pv_diverter_present,
# RdSAP 10 §11.1 b): when the cert lodges only a "% of
# roof area" PV figure (no detailed kWp / orientation),
# surface it through `photovoltaic_supply` so the
@ -1391,6 +1394,7 @@ class EpcPropertyDataMapper:
else None
),
pv_batteries=_first_pv_battery(es.pv_batteries),
pv_diverter_present=es.pv_diverter == "true",
),
sap_building_parts=[
SapBuildingPart(
@ -1630,11 +1634,17 @@ class EpcPropertyDataMapper:
for w in schema.sap_windows
if not _api_is_roof_window(w)
],
# Empty → None (not []) so "no roof windows" has the single
# canonical representation the domain field defaults to
# (`Optional[List] = None`), matching the 21.0.0 path and the
# persistence round-trip (roof windows aren't yet stored — doc
# §2.4 — so a reloaded cert always reads None here).
sap_roof_windows=[
_api_sap_roof_window(w)
for w in schema.sap_windows
if _api_is_roof_window(w)
],
]
or None,
# SAP energy source
sap_energy_source=SapEnergySource(
mains_gas=es.mains_gas == "Y",
@ -1658,6 +1668,7 @@ class EpcPropertyDataMapper:
else None
),
pv_batteries=_first_pv_battery(es.pv_batteries),
pv_diverter_present=es.pv_diverter == "true",
),
# SAP building parts
sap_building_parts=[
@ -1916,14 +1927,18 @@ class EpcPropertyDataMapper:
if schema == "RdSAP-Schema-21.0.1":
from datatypes.epc.schema.rdsap_schema_21_0_1 import RdSapSchema21_0_1
return EpcPropertyDataMapper.from_rdsap_schema_21_0_1(
from_dict(RdSapSchema21_0_1, data)
return _clear_basement_flag_when_system_built(
EpcPropertyDataMapper.from_rdsap_schema_21_0_1(
from_dict(RdSapSchema21_0_1, data)
)
)
if schema == "RdSAP-Schema-21.0.0":
from datatypes.epc.schema.rdsap_schema_21_0_0 import RdSapSchema21_0_0
return EpcPropertyDataMapper.from_rdsap_schema_21_0_0(
from_dict(RdSapSchema21_0_0, data)
return _clear_basement_flag_when_system_built(
EpcPropertyDataMapper.from_rdsap_schema_21_0_0(
from_dict(RdSapSchema21_0_0, data)
)
)
raise ValueError(f"Unsupported EPC schema: {schema!r}")
@ -1933,6 +1948,68 @@ class EpcPropertyDataMapper:
# ---------------------------------------------------------------------------
def _clear_basement_flag_when_system_built(
epc: EpcPropertyData,
) -> EpcPropertyData:
"""When the dwelling is system-built, a `wall_construction == 6` wall
is WALL_SYSTEM_BUILT, not a basement so the gov-EPC API code-6
basement heuristic must not fire for it. The API path can't tell the
two apart at the per-wall level (both lodge code 6), so once the
cert-level `system_build` flag is known we clear the basement signal
on every code-6 wall that hasn't been explicitly determined
(`wall_is_basement` / `is_basement` still None). No-op unless the
dwelling is system-built, so genuine basements (system_build absent /
False) keep the code-6 heuristic. Returns the same object when
nothing changes.
The Elmhurst path sets the per-wall flag directly from the distinct
"SY"/"B" labels, so it never reaches here (it routes through
`from_elmhurst_site_notes`, not `from_api_response`).
Keyed on the RAW cert `addendum.system_build` signal rather than the
derived `epc.system_build` property the property reads the wall
type AFTER this clears the basement flag, so using it here would be
circular."""
if epc.addendum is None or epc.addendum.system_build is not True:
return epc
def _clear_alt(alt: Optional[SapAlternativeWall]) -> Optional[SapAlternativeWall]:
if (
alt is not None
and alt.is_basement is None
and alt.wall_construction == BASEMENT_WALL_CONSTRUCTION_CODE
):
return replace(alt, is_basement=False)
return alt
new_parts: List[SapBuildingPart] = []
changed = False
for part in epc.sap_building_parts:
new_alt_1 = _clear_alt(part.sap_alternative_wall_1)
new_alt_2 = _clear_alt(part.sap_alternative_wall_2)
clear_main = (
part.wall_is_basement is None
and part.wall_construction == BASEMENT_WALL_CONSTRUCTION_CODE
)
if clear_main or new_alt_1 is not part.sap_alternative_wall_1 or (
new_alt_2 is not part.sap_alternative_wall_2
):
changed = True
new_parts.append(
replace(
part,
wall_is_basement=False if clear_main else part.wall_is_basement,
sap_alternative_wall_1=new_alt_1,
sap_alternative_wall_2=new_alt_2,
)
)
else:
new_parts.append(part)
if not changed:
return epc
return replace(epc, sap_building_parts=new_parts)
def _measurement_value(field: Any) -> float:
"""SAP measurements arrive as a `Measurement` (with `.value`), a raw dict
{'value': N, 'quantity': '...'} when `from_dict` didn't coerce, or a plain
@ -2246,6 +2323,40 @@ def _elmhurst_dwelling_type(
return f"{position} flat"
# Elmhurst roof-type codes → SAP10 roof_construction integer, matching the
# gov-EPC API codes in `_API_ROOF_CONSTRUCTION_TO_STR` so the two
# front-ends populate the same field. Harvested from the committed
# Elmhurst Summary fixtures (corpus + cohort): F/PN/PA/PS/S/A. Vaulted (5)
# and thatched (6) are omitted until a fixture surfaces their Elmhurst
# codes. NR ("Non-residential space above") is intentionally left
# unmapped — the gov enum's code 7 is specifically "dwelling above".
_ELMHURST_ROOF_CODE_TO_SAP10: Dict[str, int] = {
"F": 1, # Flat
"PN": 3, # Pitched (slates/tiles), no access (to loft)
"PA": 4, # Pitched (slates/tiles), access to loft
"PS": 8, # Pitched, sloping ceiling
"S": 7, # Same dwelling above
"A": 7, # Another dwelling above
}
def _elmhurst_roof_construction_int(coded: Optional[str]) -> Optional[int]:
"""Map an Elmhurst roof_type string ('PA Pitched (slates/tiles),
access to loft') to the SAP10 `roof_construction` integer the gov-EPC
API lodges (4), so the site-notes and API front-ends populate the
same field (cross-mapper structural parity).
Returns None for an absent or unmapped code and, unlike
`_elmhurst_wall_construction_int`, does NOT raise. `roof_construction`
(int) is not read by the SAP cascade (which reads the string
`roof_construction_type`, populated on both paths), so an unmapped
roof code stays None the pre-existing Elmhurst behaviour rather
than blocking the cert."""
if not coded:
return None
return _ELMHURST_ROOF_CODE_TO_SAP10.get(_leading_code(coded))
def _elmhurst_wall_construction_int(coded: str) -> Optional[int]:
"""Map an Elmhurst wall_type string ('CA Cavity') to the SAP10
integer code (4). Returns None when the lodging is absent (empty
@ -3426,6 +3537,7 @@ def _map_elmhurst_building_part(
if walls.insulation_thickness_mm is not None
else None
),
roof_construction=_elmhurst_roof_construction_int(roof.roof_type),
roof_construction_type=_strip_code(roof.roof_type),
roof_insulation_location=_strip_code(roof.insulation),
roof_insulation_thickness=_resolve_sloping_ceiling_thickness(
@ -4967,6 +5079,36 @@ _ELMHURST_GLAZING_LABEL_TO_SAP10: Dict[str, int] = {
# solar_transmittance=0.72 overrides per worksheet-pinned value.
"Triple between 2002 and 2021": 9,
"Secondary": 7,
# Elmhurst §11 lodges the full "Secondary glazing" phrase as well as
# the bare "Secondary" form — both are SAP 10.2 Table 6b "window with
# secondary glazing" (cascade code 7, g_L=0.80, g⊥=0.76). The
# RdSAP-21 schema splits secondary glazing by pane emissivity; the
# "Normal emissivity" variant is its own code 11 (g_L=0.80, identical
# cascade output to 7, kept distinct for the strict-raise audit
# trail). Surfaced on the double_glazing/before recommendation
# fixture (Summary_001431 §11 Window 9).
"Secondary glazing": 7,
"Secondary glazing - Normal emissivity": 11,
# RdSAP-21 glazed_type 12 "secondary glazing, low emissivity" — the
# low-E sibling of code 11. Cascade code 12 carries g_L=0.80 / g⊥=0.76
# (identical daylight/solar bucket to 7 and 11; the lodged U/g drive
# §3/§6). Mapped symmetrically with the "Normal emissivity" variant so
# the whole secondary-glazing family is covered.
"Secondary glazing - Low emissivity": 12,
# RdSAP-21 row "triple glazing, installed pre-2002" (cascade code 10,
# g_L=0.70, g⊥=0.68 — same triple-glazed daylight/solar bucket as the
# other triple variants {6, 8, 9, 14}). The lodged manufacturer
# U-value / solar_transmittance drive §6; the code only sets g_L.
"Triple pre 2002": 10,
# Triple glazing of unknown install date → the generic SAP 10.2
# Table 6b "triple glazed" row (cascade code 6, g_L=0.70). No
# dedicated "triple, unknown date" enum exists; 6 is the correct
# triple-glazed bucket.
"Triple with unknown install date": 6,
# RdSAP-21 row "single glazing, known data" (cascade code 15,
# g_L=0.90, g⊥=0.85 — same as plain single glazing). Manufacturer
# U/g lodged on WindowTransmissionDetails drive §6.
"Single glazing, known data": 15,
}
_ELMHURST_GLAZING_LABEL_NOISE_PREFIX_RE: Final[re.Pattern[str]] = re.compile(

View file

@ -717,11 +717,16 @@ class TestApiResolveWallInsulationThickness:
_api_resolve_wall_insulation_thickness,
)
lodged_thickness = "measured"
measured_value_mm = 100
# Act
resolved = _api_resolve_wall_insulation_thickness("measured", 100)
resolved = _api_resolve_wall_insulation_thickness(
lodged_thickness, measured_value_mm
)
# Assert
assert resolved == 100
assert resolved == measured_value_mm
def test_non_measured_lodgement_passes_through_unchanged(self) -> None:
# Arrange
@ -729,12 +734,17 @@ class TestApiResolveWallInsulationThickness:
_api_resolve_wall_insulation_thickness,
)
ni_lodgement = "NI"
measured_value_mm = 100
# Act
ni: object = _api_resolve_wall_insulation_thickness("NI", 100)
ni: object = _api_resolve_wall_insulation_thickness(
ni_lodgement, measured_value_mm
)
none_thk: object = _api_resolve_wall_insulation_thickness(None, None)
# Assert
assert ni == "NI"
assert ni == ni_lodgement
assert none_thk is None
def test_measured_without_value_passes_through(self) -> None:
@ -743,11 +753,16 @@ class TestApiResolveWallInsulationThickness:
_api_resolve_wall_insulation_thickness,
)
lodged_thickness = "measured"
measured_value_mm = None
# Act
resolved: object = _api_resolve_wall_insulation_thickness("measured", None)
resolved: object = _api_resolve_wall_insulation_thickness(
lodged_thickness, measured_value_mm
)
# Assert
assert resolved == "measured"
assert resolved == lodged_thickness
# ---------------------------------------------------------------------------

View file

@ -133,6 +133,7 @@ class SapEnergySource:
wind_turbines_terrain_type: int
electricity_smart_meter_present: str
pv_batteries: Optional[PvBatteries] = None
pv_diverter: Optional[str] = None
@dataclass

View file

@ -161,6 +161,7 @@ class SapEnergySource:
wind_turbines_terrain_type: int
electricity_smart_meter_present: str
pv_battery_count: Optional[int] = None
pv_diverter: Optional[str] = None
wind_turbine_details: Optional[WindTurbineDetails] = None
pv_batteries: Optional[Union[PvBatteries, List[PvBatteries]]] = None

View file

@ -418,6 +418,11 @@ class Renewables:
solar_hw_collector_orientation: Optional[str] = None
solar_hw_collector_pitch_deg: Optional[int] = None
solar_hw_overshading: Optional[str] = None
# Summary §19.0 "Diverter present" — a PV diverter routes surplus PV
# generation to an immersion heater in the hot-water cylinder
# (SAP 10.2 Appendix G4). Drives worksheet (63b)m. Defaults False
# when the cert lodges no PV or "Diverter present = No".
pv_diverter_present: bool = False
@dataclass

View file

@ -0,0 +1,49 @@
data "terraform_remote_state" "shared" {
backend = "s3"
config = {
bucket = "assessment-model-terraform-state"
key = "env:/${var.stage}/terraform.tfstate"
region = "eu-west-2"
}
}
data "aws_secretsmanager_secret_version" "db_credentials" {
secret_id = "${var.stage}/assessment_model/db_credentials"
}
locals {
db_credentials = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string)
}
module "lambda" {
source = "../../modules/lambda_with_sqs"
name = "bulk-upload-finaliser"
stage = var.stage
image_uri = local.image_uri
# The finaliser reads the combiner CSV and does one bulk INSERT IO-light, but
# a property list can be ~40,000 rows, so 300s leaves ample headroom under the
# queue visibility timeout. batch_size = 1 keeps one upload per invocation so a
# bad record can't redrive its siblings; maximum_concurrency caps DB write
# fan-out.
timeout = 300
batch_size = 1
maximum_concurrency = 2
environment = merge(
{
STAGE = var.stage
LOG_LEVEL = "info"
POSTGRES_USERNAME = local.db_credentials.db_assessment_model_username
POSTGRES_PASSWORD = local.db_credentials.db_assessment_model_password
},
)
}
# Attach S3 read policy so the handler can read the combiner output CSV.
resource "aws_iam_role_policy_attachment" "bulk_upload_finaliser_s3_read" {
role = module.lambda.role_name
policy_arn = data.terraform_remote_state.shared.outputs.bulk_upload_finaliser_s3_read_arn
}

View file

@ -0,0 +1,9 @@
output "bulk_upload_finaliser_queue_url" {
value = module.lambda.queue_url
description = "URL of the Bulk Upload Finaliser SQS queue (wire into the FastAPI FINALISER_SQS_URL)"
}
output "bulk_upload_finaliser_queue_arn" {
value = module.lambda.queue_arn
description = "ARN of the Bulk Upload Finaliser SQS queue"
}

View file

@ -0,0 +1,16 @@
terraform {
required_providers {
aws = {
source = "hashicorp/aws"
version = ">= 5.0"
}
}
backend "s3" {
bucket = "bulk-upload-finaliser-terraform-state"
key = "terraform.tfstate"
region = "eu-west-2"
}
required_version = ">= 1.2.0"
}

View file

@ -0,0 +1,27 @@
variable "lambda_name" {
type = string
description = "Logical name of the lambda (e.g. bulkUploadFinaliser)"
}
variable "stage" {
description = "Deployment stage (e.g. dev, prod)"
type = string
}
variable "ecr_repo_url" {
type = string
description = "ECR repository URL (no tag, no digest)"
}
variable "image_digest" {
type = string
description = "Image digest (sha256:...)"
}
locals {
image_uri = "${var.ecr_repo_url}@${var.image_digest}"
}
output "resolved_image_uri" {
value = local.image_uri
}

View file

@ -46,6 +46,15 @@ data "terraform_remote_state" "bulk_address2uprn_combiner" {
}
}
data "terraform_remote_state" "bulk_upload_finaliser" {
backend = "s3"
config = {
bucket = "bulk-upload-finaliser-terraform-state",
key = "env:/${var.stage}/terraform.tfstate"
region = "eu-west-2"
}
}
############################################
# Load Credentials
############################################
@ -105,6 +114,7 @@ module "fastapi" {
CATEGORISATION_SQS_URL = data.terraform_remote_state.categorisation.outputs.categorisation_queue_url
POSTCODE_SPLITTER_SQS_URL = data.terraform_remote_state.postcode_splitter.outputs.postcode_splitter_queue_url
COMBINER_SQS_URL = data.terraform_remote_state.bulk_address2uprn_combiner.outputs.bulk_address2uprn_combiner_queue_url
FINALISER_SQS_URL = data.terraform_remote_state.bulk_upload_finaliser.outputs.bulk_upload_finaliser_queue_url
}
}
@ -126,7 +136,8 @@ module "fastapi_sqs_policy" {
data.terraform_remote_state.engine.outputs.ara_engine_queue_arn,
data.terraform_remote_state.categorisation.outputs.categorisation_queue_arn,
data.terraform_remote_state.postcode_splitter.outputs.postcode_splitter_queue_arn,
data.terraform_remote_state.bulk_address2uprn_combiner.outputs.bulk_address2uprn_combiner_queue_arn
data.terraform_remote_state.bulk_address2uprn_combiner.outputs.bulk_address2uprn_combiner_queue_arn,
data.terraform_remote_state.bulk_upload_finaliser.outputs.bulk_upload_finaliser_queue_arn
]
conditions = null

View file

@ -22,6 +22,7 @@ module "lambda" {
stage = var.stage
image_uri = local.image_uri
timeout = var.timeout
# Optional: Set maximum_concurrency to limit concurrent SQS-triggered invocations (2-1000)
maximum_concurrency = var.maximum_concurrency
@ -47,6 +48,7 @@ module "lambda" {
OSMOSIS_ACD_SHAREPOINT_ID = var.osmosis_acd_sharepoint_id
PRIVATE_PAY_SHAREPOINT_ID = var.private_pay_sharepoint_id
SOCIAL_HOUSING_WAVE_3_SHAREPOINT_ID = var.social_housing_wave_3_sharepoint_id
ECO_SHAREPOINT_ID = var.eco_sharepoint_id
PASHUB_EMAIL = var.pashub_email
PASHUB_PASSWORD = var.pashub_password
PASHUB_COORDINATION_EMAIL = var.pashub_coordination_email

View file

@ -17,6 +17,12 @@ variable "image_digest" {
description = "Image digest (sha256:...)"
}
variable "timeout" {
type = number
default = 300
description = "Lambda timeout in seconds."
}
variable "maximum_concurrency" {
type = number
default = null
@ -92,6 +98,11 @@ variable "social_housing_wave_3_sharepoint_id" {
sensitive = true
}
variable "eco_sharepoint_id" {
type = string
sensitive = true
}
variable "pashub_email" {
type = string
sensitive = true

View file

@ -7,7 +7,7 @@ domain_name = "dev.hestia.homes"
api_url_prefix = "api"
# Database
allocated_storage = 20
allocated_storage = 50
instance_class = "db.t4g.medium"
database_name = "DevAssessmentModelDB"

View file

@ -558,6 +558,36 @@ output "bulk_address2uprn_combiner_s3_arn" {
value = module.bulk_address2uprn_combiner_s3.policy_arn
}
################################################
# Bulk Upload Finaliser Lambda ECR (ADR-0013)
################################################
module "bulk_upload_finaliser_state_bucket" {
source = "../modules/tf_state_bucket"
bucket_name = "bulk-upload-finaliser-terraform-state"
}
module "bulk_upload_finaliser_registry" {
source = "../modules/container_registry"
name = "bulk_upload_finaliser"
stage = var.stage
}
# The finaliser only reads the combiner output (bulk_final_outputs) to insert
# property rows; it writes to Postgres, not S3.
module "bulk_upload_finaliser_s3_read" {
source = "../modules/s3_iam_policy"
policy_name = "BulkUploadFinaliserReadS3"
policy_description = "Allow bulk_upload_finaliser Lambda to read combiner output from retrofit-data bucket"
bucket_arns = ["arn:aws:s3:::retrofit-data-${var.stage}"]
actions = ["s3:GetObject", "s3:ListBucket"]
resource_paths = ["/bulk_final_outputs/*"]
}
output "bulk_upload_finaliser_s3_read_arn" {
value = module.bulk_upload_finaliser_s3_read.policy_arn
}
################################################
# Categorisation Lambda ECR
################################################

View file

@ -0,0 +1,62 @@
# PR note — SY system-built vs B basement wall (issue #1177)
For the reviewer / the `feature/bill-derivation` session. Fold the relevant
parts into the PR description; delete this file before/at merge.
## What this branch changed (commit `bd25a3c7`)
`wall_construction == 6` (`WALL_SYSTEM_BUILT`) is the canonical **wall type**
for system-built — and it stays there. The basement signal, which had hijacked
code 6, is moved onto a dedicated flag:
- `SapBuildingPart.wall_is_basement: Optional[bool]` and
`SapAlternativeWall.is_basement: Optional[bool]`.
- `main_wall_is_basement` / `is_basement_wall` honour the flag when set, else
fall back to the legacy `wall_construction == 6` heuristic (so untouched API
basements and the cert 000565 "B" cohort are unchanged).
- Elmhurst: `_elmhurst_wall_is_basement` sets the flag from the distinct
"SY"/"B" labels (SY→False, B→True, other→None).
- gov-EPC API: `from_api_response` post-processes via
`_clear_basement_flag_when_system_built` — when the cert `addendum.system_build`
is True, the code-6 basement heuristic is cleared.
- `EpcPropertyData.system_build` is a **derived `@property`** (not a stored
field): the MAIN wall is system-built iff `wall_construction == 6` and it is
not flagged basement. (Per the call: "system build is the wall type and
should be on `wall_construction`.")
Acceptance verified: system-built main wall → `wall_construction == 6`,
`main_wall_is_basement is False`, `system_build is True`; genuine basement main
wall → `main_wall_is_basement is True`, `system_build is False`. Full §4 suite
green (2404 passed), zero new pyright errors.
## ⚠️ Merge collision: `system_build` field (yours) vs property (this branch)
The `#1177` prompt referenced `EpcPropertyData.system_build` as an existing
**field** on `feature/bill-derivation`. This branch adds `system_build` as a
**`@property`**. They share the name but live in different regions of the class,
so:
- **Git will likely merge them silently** (no textual conflict).
- **Python will NOT raise at import** — the class defines fine.
- It raises `AttributeError: property 'system_build' ... has no setter` at the
**first `EpcPropertyData(...)` instantiation** — i.e. the first mapper call,
so the test suite fails immediately. (Reproduced.)
So the collision is caught fast but is a landmine, not a clean signal. **Resolve
it deliberately at merge** — pick ONE representation:
- **Recommended (matches the agreed model):** drop the stored field; keep the
derived property (system-built is the wall type). If your code currently
*assigns* `epc.system_build = …`, replace those writes with setting the
underlying wall type / basement flag, or add a setter.
- **Or** keep your stored field and delete this branch's property — but then
populate the field consistently with the wall type on BOTH paths (API addendum
*and* Elmhurst "SY"), so `system_build` and `wall_construction`/the basement
flag never disagree.
## Tripwire you own
`tests/domain/modelling/test_elmhurst_cascade_pins.py::test_system_built_generator_offers_ewi_and_iwi_each_pinning_its_after`
is a strict-xfail on your branch (fixtures `system_built_{ewi,iwi}_001431_{before,after}.pdf`
are committed there, not here). With this fix the behaviour it guards is
satisfied, so it should flip xfail→xpass — delete the marker when it does.

View file

@ -0,0 +1,125 @@
# ADR-0013: The `bulk_upload_finaliser` Lambda writes properties and the terminal status
**Status:** Accepted
**Date:** 2026-06-04
> Companion to the `assessment-model` (frontend) repo's
> [ADR-0005](https://github.com/Hestia-Homes/assessment-model/blob/main/docs/adr/0005-async-bulk-upload-finaliser.md),
> which owns the state-machine change and the Drizzle schema. This ADR owns the
> **backend write path**. It applies the direct-write pattern, transaction-boundary
> rule, and port/adapter/row-mirror file layout established by
> [ADR-0003](0003-python-writes-landlord-overrides-directly.md).
## Context
Finalising a BulkUpload inserts one `property` row per source row — up to ~40,000.
Today a synchronous Next.js route does it; frontend ADR-0005 moves it to a dispatched
Lambda for the same reasons ADR-0003 moved the classifier write into Python: the work
is internal (a Lambda computes the rows, the same Lambda persists them), the Lambda
already sits next to a Postgres connection, and routing 40k inserts back through an
HTTP hop buys nothing.
`PropertyRow` in this repo
([`infrastructure/postgres/property_table.py`](../../infrastructure/postgres/property_table.py))
is currently a **defensive read-only view** — its docstring states the backend never
inserts properties. Finalise changes that.
## Decision
1. **New application `applications/bulk_upload_finaliser/`** wraps a handler in
`@subtask_handler` (auto-injected `TaskOrchestrator`; `run_subtask` owns the
subtask start/complete/fail and the Task cascade), exactly like
`applications/landlord_description_overrides/handler.py`. The trigger body:
```python
class BulkUploadFinaliserTriggerBody(SubtaskTriggerBody): # task_id, sub_task_id
s3_uri: str # combiner output (combined_output_s3_uri)
portfolio_id: int
bulk_upload_id: UUID
```
Dispatched via a new `POST /v1/bulk-uploads/trigger-finaliser` FastAPI endpoint
(auth `validate_token`) that enqueues to a new SQS queue — mirroring
`trigger-postcode-splitter` / `trigger-landlord-overrides`.
2. **`PropertyRow` drops its "backend never inserts" invariant** and gains the
insertable columns finalise needs — the **exact nine** the frontend route writes
today: `portfolio_id`, `creation_status` (`'READY'`), `uprn`,
`landlord_property_id` (← `Internal Reference`), `address` (matched ?? user
input), `postcode`, `user_inputted_address`, `user_inputted_postcode`,
`lexiscore`. Drizzle remains the schema source of truth (ADR-0003); this is a
mirror change only.
3. **Insert policy lives in SQL, idempotent:**
```sql
INSERT INTO property (portfolio_id, creation_status, uprn, landlord_property_id,
address, postcode, user_inputted_address,
user_inputted_postcode, lexiscore)
VALUES …
ON CONFLICT (portfolio_id, uprn) WHERE uprn IS NOT NULL
DO NOTHING;
```
This reproduces the frontend's `onConflictDoNothing` exactly — existing properties
are not churned on a re-run.
4. **The Lambda writes the terminal BulkUpload status directly — DDD, on its own
session.** On success it sets `status='complete'`; on failure `status='failed'`.
Rather than the combiner's `backend/app/db/functions` (which open the legacy
`backend/` connection on the separate `DB_*` config), the finaliser stays
**PostgresConfig-only like the landlord classifier Lambda**: it writes status
through a DDD `BulkUploadStatusWriter` port (`repositories/bulk_upload/`) backed
by a minimal `BulkAddressUploadRow` mirror (`infrastructure/postgres/`), on the
*same* session. So a single DB config (`POSTGRES_*`) drives the run and the
image needs no `backend/`. The `complete` flip happens **in the same transaction
as the property insert** (atomic finalise — either both land or neither);
`failed` is written on a fresh session in the error path. Next.js no longer
writes `complete`; it owns only the `awaiting_review → finalising`
compare-and-swap at dispatch (frontend ADR-0005).
5. **DDD layering (ADR-0003 + the landlord precedent).** The
`orchestration/bulk_upload_finaliser_orchestrator.py` owns the whole Finalise
domain flow via a single `finalise(rows, portfolio_id, task_id)` that depends
**only on the two repository ports** — it resolves the combiner rows, inserts
the properties, and marks the upload `complete`, with no engine/session/DB of
its own. So it is unit-testable with two in-memory fakes
(`tests/orchestration/test_bulk_upload_finaliser_orchestrator.py`). The Lambda
`applications/bulk_upload_finaliser/handler.py` is the composition root: parse
trigger, read S3, build the engine/session and the concrete adapters, open the
transaction around `finalise`, and handle the failure path. New seams:
- Property insert: `insert_all` (+ the `PropertyIdentityInsert` DTO) is added to
the existing `PropertyRepository` port and its `PropertyPostgresRepository`
adapter — **one repository per aggregate**, reads and writes together, writing
the amended `infrastructure/postgres/property_table.py` mirror. `epc_repo` is
optional: the write path creates identity rows and never hydrates, so the
finaliser constructs the repo with a session alone.
- Status: port `repositories/bulk_upload/bulk_upload_status_writer.py`, adapter
`…_postgres.py`, writing the `infrastructure/postgres/bulk_address_upload_table.py`
mirror (a separate table → a separate repository).
- Transaction boundary stays in the `infrastructure/postgres/engine` helper
(`commit_scope`); the handler opens the context (once, around insert+complete),
never calls `.commit()`; orchestrator and repositories never commit.
6. **`property_overrides` is out of scope (v2).** The table exists (frontend
migration 0221) but this Lambda does not populate it in v1. When v2 lands it adds
a `PropertyOverrideRow` mirror + `infrastructure/property/property_overrides_postgres_repository.py`
using `onConflictDoUpdate` per frontend ADR-0005.
## Consequences
**Positive.**
- 40k inserts run in a Lambda with a single `sub_task` row to observe, not behind a
synchronous HTTP request.
- Reuses the `@subtask_handler` / SQS / direct-Postgres machinery — reviewers have
the classifier handler and the combiner status-writers as precedents.
**Negative.**
- **`PropertyRow` is no longer read-only.** The defensive invariant that made
accidental backend property writes impossible is gone; correctness now rests on the
finaliser being the only inserting caller.
- **Terminal-status ownership crosses the repo boundary.** The backend now writes a
BulkUpload status (`complete`/`failed`) that Next.js used to own — coordinated only
through the status column (frontend ADR-0005's "two writers, extended").
- The Drizzle/SQLModel lockstep risk from ADR-0003 now extends to `property`'s
insertable columns.

View file

@ -32,8 +32,9 @@ and **still require the matching DB migration** wherever the physical tables liv
`heating_secondary_heating_type`, `heating_shower_outlet_type`, `energy_pv_connection`;
`epc_main_heating_detail`: `main_fuel_type`, `heat_emitter_type`, `emitter_temperature`,
`main_heating_control`; `epc_building_part`: `wall_construction`, `wall_insulation_type`,
`party_wall_construction`, `flat_roof_insulation_thickness`, `roof_insulation_location`,
`roof_insulation_thickness`; `epc_window`: `glazing_gap`, `orientation`, `window_type`,
`party_wall_construction`, `wall_insulation_thickness`, `flat_roof_insulation_thickness`,
`roof_insulation_location`, `roof_insulation_thickness`; `epc_window`: `glazing_gap`,
`orientation`, `window_type`,
`glazing_type`, `window_location`, `window_wall_type`, `draught_proofed`,
`permanent_shutters_present`, `transmission_data_source`).
- **New scalar columns**`epc_property`: `heating_number_baths`, `heating_number_baths_wwhrs`,
@ -68,7 +69,7 @@ in the forward mapper so the Python type round-trips exactly (JSON scalars prese
|---|---|
| `epc_property` | `heating_cylinder_size`, `heating_immersion_heating_type`, `heating_cylinder_insulation_type`, `heating_secondary_heating_type`, `heating_shower_outlet_type`, `energy_pv_connection` |
| `epc_main_heating_detail` | `main_fuel_type`, `heat_emitter_type`, `emitter_temperature`, `main_heating_control` |
| `epc_building_part` | `wall_construction`, `wall_insulation_type`, `party_wall_construction`, `flat_roof_insulation_thickness`, `roof_insulation_location`, `roof_insulation_thickness` |
| `epc_building_part` | `wall_construction`, `wall_insulation_type`, `party_wall_construction`, `wall_insulation_thickness`, `flat_roof_insulation_thickness`, `roof_insulation_location`, `roof_insulation_thickness` |
| `epc_window` | `glazing_gap`, `orientation`, `window_type`, `glazing_type`, `window_location`, `window_wall_type`, `draught_proofed`, `permanent_shutters_present` |
(`energy_meter_type` and `energy_wind_turbines_terrain_type` are `str` in the domain — leave as

View file

@ -4,10 +4,82 @@ Point-in-time note. Start from [`AGENT_GUIDE.md`](AGENT_GUIDE.md) for methodolog
1e-4 bar, the per-line debugging loop, the section helpers, and the suite command.
- **Branch:** `feature/per-cert-mapper-validation`
- **HEAD:** `0f6b4023` (S0380.229). Next slice: **S0380.230**.
- **HEAD:** `9521d524`. Next SAP slice: **S0380.235**.
- **Baseline (§4 suite):** `tests/domain/sap10_calculator/ backend/documents_parser/tests/`
→ green (2397 passed, 1 skipped). Pre-existing out-of-scope failures unchanged
→ green (2412 passed, 1 skipped). Pre-existing out-of-scope failures unchanged
(stone-§5.6 in `domain/sap10_ml/tests/`; `test_from_rdsap_schema.py::...test_total_floor_area`).
## Shipped this session (S0380.232-234 — the case-19 PV closure)
The PV diverter (the prior handover's S0380.232 ask) needed two prerequisite
spec bugs fixed first; all three landed:
| slice | commit | spec | what |
|---|---|---|---|
| **S0380.232** | `212b0c92` | App M1 §3a (p.93, l.5470-5476) | D_PV excludes the LOW-rate portion of an off-peak electric main: `(211)` is only PV-eligible where its §10a code ∈ {30,32,34,35,38}. Storage heaters on 7-hr charge wholly at low rate → fraction 0.0 → excluded. β_Jan 0.894→0.792 (ws 0.791). New `_main_space_heating_high_rate_fraction`. |
| **S0380.233** | `d4a8c02b` | App M1 §6 (p.94, l.5510-5513) | PV-used-in-dwelling credited at the Table 12a ALL_OTHER_USES **weighted** rate (7-hr 14.311 p/kWh), not the bare low rate (5.50). Was under-crediting onsite PV on every off-peak PV cert. Delegates to `_other_fuel_cost_gbp_per_kwh`; STANDARD unchanged. |
| **S0380.234** | `9521d524` | Appendix G4 (p.72-73) | The PV diverter. 3 layers: extractor `Diverter present` + schema `pv_diverter``pv_diverter_present` flag (Elmhurst + API mappers) → `_pv_diverter_monthly_kwh` (SPV = export×0.8×0.9, clamp ≤ (62)+(63a), → (63b)m); `cert_to_inputs` recomputes (219) + PV export, β fixed pre-diverter. |
**Case 19 now: SAP cont 50.33 → 51.34** (ws 51.2221; both round to lodged **51**),
cost (255) 1847.5→1812.3 (ws 1816.6), CO2 3331→3120 (ws 3126), (233a) dwelling
1280.6 (ws **1280.4** — the β fix pins it). The diverter formula is **exact in
summer** (Jun SPV 186.07 = export×0.72, matches ws (63b)).
**The remaining +0.11 SAP on case 19 = two separate, still-open causes:**
1. **Winter Appendix-M monthly EPV shape.** Our annual EPV (2684.17) matches the
worksheet exactly and Jun-Sep match per-month exactly, but Jan-May/Oct-Dec our
EPV is ~9-11% LOW (worksheet Jan 68.2 vs ours 62.5). Back-solve: ws EPV_m =
|(233a)_m| + |(63b)_m|/0.72. This under-diverts in winter → export (233b) 280.7
vs ws 184.2, and (219) 3322 vs ws 3188. **A two-array PV apportionment issue
(case 19 has SE + NW arrays with different overshading) — chase in §M / Appendix U
monthly radiation, NOT the diverter (which is validated).**
2. **Fabric (33) +1.0 W/K** (ours 305.04 vs ws 304.04) — a single element off by
exactly 1.0; floor=25.000 is suspiciously round. Walk the per-element §3 breakdown.
The **eval headline is flat** (42.9→43.0% <0.5; cat-7 5.254.93) expected: the
diverter is rare and the β/price effects are small on the rounded SAP. The value
was pinning the worksheet-validated case 19 + fixing three real spec bugs that the
curated cohort masked.
## Headline now (1,000-cert 2026 API sample, HEAD `f326e4eb`)
| metric | value | was (handover baseline `9c0a373f`) |
|---|---|---|
| computed | 882 / 1000 | 882 |
| **% \|err\| < 0.5** | **42.9%** | 41.8% |
| % < 1 / < 2 / < 5 | 56.7% / 74.6% / 90.1% | 54.9 / 71.9 / 87.8 |
| median / mean \|err\| | 0.73 / **2.04** | 0.79 / ~2.4 |
| mean signed | 0.41 | +0.2 |
**Error by heating cluster** (the load-bearing cut — re-run `analyse_api_sap_clusters.py`):
| cluster | n | mean \|err\| | %<0.5 | note |
|---|---|---|---|---|
| cat 2 gas boiler + PCDB | 639 | 1.27 | 49.6% | well-trodden |
| cat 2 gas, NO PCDB idx | 91 | 3.18 | 35.2% | non-PCDB Table-4b boilers |
| cat 6 community | 45 | 2.59 | 31.1% | known-hard |
| cat 7 electric storage | 40 | **5.25** | 10.0% | was 7.33 → S0380.227-229 |
| cat 10 electric room heaters | 48 | **5.26** | 16.7% | was 9.49 → S0380.230-231 (bias gone) |
| cat 4 HP + PCDB | 8 | 6.11 | 12.5% | small n, APM |
| Flats (any) | 282 | 2.57 | 30.5% | geometry / communal |
| real PV | 45 | 3.90 | 26.7% | Appendix M |
**Worst individual offenders** (the long tail — `eval` TOP 40): `2100-5421-0922-1622-3463`
(60.8, our SAP **negative** 24.8 vs lodged 36 — a flat, 2 bps, cat-2; the single worst, likely
a geometry/communal blow-up — START a per-cert dig here), `2958-8008` (+32, age 6=tiny),
`9836-5829` (29.5, cat-10 tail), several cat-7/cat-10 in the 20s.
## Work shipped (this session — S0380.227-231 + 3 mapper commits)
| commit | what |
|---|---|
| **S0380.227** | dedicated DHW-only system (WHS 911) is NOT separately timed → no Table 2b ×0.9; TF (53) 0.54→0.60, (59) h=3→5 |
| **S0380.228** | electric SECONDARY on off-peak bills at Table 12a `OTHER_DIRECT_ACTING_ELECTRIC` (1.00 high-frac), not 100% low |
| **S0380.229** | dedicated water boiler/circulator (WHC 911-931) feeds cylinder via primary loop → Table 3 primary loss applies |
| **S0380.230** | electric room heaters (cat 10) on off-peak → `OTHER_DIRECT_ACTING_ELECTRIC` (mirror of .228 for the MAIN). cat-10 9.49→7.11 |
| **S0380.231** | Dual-meter electric room heaters → 10-hour tariff (RdSAP §12 Rule 3; codes 691-694,699). cat-10 7.11→5.26, bias +5.08→0.86 |
| `bd25a3c7` | SY system-built vs B basement: code 6 stays system-built; basement → explicit `wall_is_basement`/`is_basement` flag. `system_build` is a derived property (wall type). API path post-processes via addendum. (issue #1177 — see `docs/PR_NOTE_system_built_basement_1177.md`: field-vs-property merge landmine) |
| `f326e4eb` | Elmhurst path now populates `roof_construction` (int) via `_elmhurst_roof_construction_int` for cross-mapper parity (API set it, Elmhurst didn't) |
- **Toolkit (committed):** `scripts/fetch_2026_epc_sample.py`,
`scripts/eval_api_sap_accuracy.py`, `scripts/analyse_api_sap_clusters.py`. The 1,000 cached
JSONs live in `/tmp/epc_2026_sample/` (gitignored scratch — re-fetch with the sampler;
@ -103,7 +175,7 @@ PDF — only in the P960 header).
| **S0380.228** | cost (255) | electric SECONDARY on off-peak bills at Table 12a `OTHER_DIRECT_ACTING_ELECTRIC` (7-hr high-frac **1.00** = £0.1529), not the flat off-peak low (£0.0550). Worksheet (242): "1.00*15.29 + 0.00*5.50". THE primary cost driver (340). | 60.1→**50.67** |
| **S0380.229** | (62) 2493.30→**3169.98** | dedicated water-heating boiler/circulator (WHC 911-931) feeds the cylinder via a primary loop → Table 3 row 1 primary loss applies (keyed off `water_heating_code`, since `_water_heating_main` returns the electric SPACE main). Restored the missing (59)=676.68 kWh/yr. | 50.67→50.33 |
**The ONE remaining case-19 cause — the PV diverter (63b) — is S0380.230 (next).** Worksheet
**The ONE remaining case-19 cause — the PV diverter (63b) — is S0380.232.** Worksheet
header line 124 "Diverter = Yes"; Summary §19 "Diverter present: Yes". Per **SAP 10.2 Appendix
G4 (PDF p.72-73)** surplus PV is diverted to the cylinder immersion:
`S_PV,diverter,m = EPV,m × (1 βm) × 0.8 × 0.9`, clamped to ≤ (62)m + (63a)m, entered as a
@ -134,15 +206,30 @@ pages → ElmhurstSiteNotesExtractor(...).extract() → from_elmhurst_site_notes
## Remaining work, prioritised
### A. Accuracy clusters (highest value — 80+ certs, mean err 710)
1. **Electric storage heaters (cat 7, 39 certs).** Distinct cascade — off-peak tariff split,
charge control (2401/2402), 7-hr/24-hr charge, Table 4a efficiency, responsiveness. **No
worksheet currently validates this path.** Errs both directions (27..+16).
2. **Electric room heaters (cat 10, 43 certs).** Likewise (controls 2601/2602/2603). Worst
cluster by mean (10.26).
3. **Flats (242, 29% <0.5)** and **PV (40, 28%)** — secondary.
### A. Accuracy clusters (highest value)
1. **PV diverter (S0380.232)** — closes case 19 to 1e-4 AND helps the real-PV cluster (45 certs,
mean 3.90). Fully spec'd in the case-19 section above (Appendix G4). **Has a worksheet**
1e-4 bar. Do this first: it's the one open cause on a validated worksheet.
2. **Electric storage heaters (cat 7, 40 certs, mean 5.25).** S0380.227-229 took it 7.33→5.25;
the case-19 PV diverter will help further. Beyond that the tail is per-cert — a **dedicated
cat-7 worksheet** (no PV, no diverter) would let you pin charge-control / responsiveness at
1e-4 instead of the ±0.5 lodged fallback.
3. **Electric room heaters (cat 10, 48 certs, mean 5.26).** S0380.230-231 fixed the systematic
tariff bias (mean 9.49→5.26, signed +5.08→0.86); the residual is now scattered per-cert
(e.g. `9836-5829` 29.5, an under-rater). A **cat-10 worksheet** pins the tail at 1e-4.
4. **Non-PCDB gas boilers (cat 2, no idx, 91 certs, mean 3.18)** and **Flats (282, mean 2.57)**
the next volume levers once the electric clusters are worksheet-pinned. Flats = geometry /
communal; start with the worst (`2100-5421` negative SAP).
- **`2100-5421-0922-1622-3463` diagnosed (S0380.234 session):** NOT a flat — `property_type 0`,
a **352 m² 2-storey uninsulated solid-wall** dwelling (wall_constr 3 / wall_ins 4 as-built;
roof_type 4, no roof insulation). Our space-heating demand is **71,084 kWh/yr** → (37)=995.93
W/K → SAP 24.8 (lodged 36), cost £14,045. This is the **`as-built insulated-assumed`**
U-value front ([[project_as_built_insulated_assumed_bug]]; S0380.209 fixed walls, "roof next"):
the uninsulated-roof / as-built U over-estimates demand on big old dwellings. API-only (no
worksheet → ±0.5 lodged fallback only); needs a generated worksheet or a roof-U spec audit to
pin. It is one outlier, not a cluster-wide flats bug.
### B. Remaining raises (18 certs — all U-value / heat-loss-sensitive, NOT enum guesses)
### B. Remaining raises (16 certs — all U-value / heat-loss-sensitive, NOT enum guesses)
- **`gable_wall_type` 2 & 3 (14 certs).** RdSAP 10 **Table 4** RR walls: 0=Party (U=0.25),
1=Exposed (U=common wall), 2/3 = **Sheltered (U=external×R0.5)** + **Adjacent-to-heated
(U=0)**, code↔type order unconfirmed (schema says "not yet seen"). Needs (i) a worksheet to
@ -161,16 +248,24 @@ is what the strict-raise guard exists to prevent.
---
## ★ Additional worksheets that would help most
## ★ Additional worksheets that would help most (the user will generate these on request)
Case 19 (above) already covers electric storage heaters + loose-jacket cylinder + RR. The two
that would add the most NEW coverage:
1. **An electric ROOM-heater dwelling** (SAP code ~691, control 2601/2602) — the **cat-10
cluster (43 certs, worst by mean error 10.26)**, which case 19 does not touch.
2. **A room-in-roof with a SHELTERED gable and an ADJACENT-TO-HEATED gable** (Table 4 types
The two electric clusters are now systematic-bias-free (S0380.227-231) but their TAILS sit at
the ±0.5-vs-lodged fallback bar because **no worksheet validates them at 1e-4**. The three
highest-value worksheets to ask the user for:
1. **An electric ROOM-heater dwelling** (SAP code ~691, control 2601/2602/2603, Dual meter) —
pins the cat-10 tail (48 certs, mean 5.26) at 1e-4. Make it PV-free + cylinder-free to
isolate the space-heat path from the diverter/HW.
2. **An electric STORAGE-heater dwelling distinct from case 19** (no PV, no WHS-911) — pins the
cat-7 tail (40 certs, mean 5.25): charge control (2401/2402), 7-hr vs 24-hr, responsiveness.
3. **A room-in-roof with a SHELTERED gable and an ADJACENT-TO-HEATED gable** (Table 4 types
beyond Party/Exposed) — closes the `gable_wall_type` 2/3 raise (14 certs) and pins the
Sheltered (U=ext×R0.5) / Adjacent (U=0) U-values the calculator must add.
Per worksheet send BOTH the **Summary PDF** (input) and the **P960/dr87 worksheet PDF** (the
`(1)..(286)` ground truth). Drop them in `sap worksheets/golden fixture debugging/<name>/` and
run the case-19 debug recipe.
The original "design one property" guidance (kept below for reference) is what case 19 was
built from.

View file

@ -0,0 +1,94 @@
# Handover — double_glazing fixture: glazing done, window-extraction open
Point-in-time note for the agent owning the Elmhurst window extractor /
mapper. Start from [`AGENT_GUIDE.md`](AGENT_GUIDE.md) for the 1e-4
worksheet-pin methodology and the cascade pipeline.
- **Branch:** `feature/per-cert-mapper-validation`. HEAD `8133521c`.
- **Fixture:** the double_glazing "before" recommendation pair —
`sap worksheets/Recommendations Elmhurst Files/double_glazing/before/`
(`Summary_001431 (1).pdf` + `P960-0001-001431 - 2026-06-02T115533.961.pdf`).
The Summary is also committed as a test fixture:
`backend/documents_parser/tests/fixtures/Summary_001431_double_glazing.pdf`.
- **Worksheet block to pin** (rating = block 1, region 0; demand = block 2,
postcode): SAP cont **57.2415**, cost (255) **1423.0955**, fabric (33)
**158.4548** / (37) **197.8463**; demand CO2 (272) **3486.0799**, PE (286)
**16796.5617**. (Blocks 3+ add PV/diverter — ignore for the "before" pin.)
## Done this session (S0380.235-237) — DON'T redo
| slice | what |
|---|---|
| **S0380.235** `3e45b7fa` | 5 missing Elmhurst §11 glazing labels → SAP10 Table 6b (`Secondary glazing`→7, `…- Normal emissivity`→11, `Triple pre 2002`→10, `Triple with unknown install date`→6, `Single glazing, known data`→15). |
| **S0380.236** `ea35bed2` | Extension party-wall type read independently of "As Main Wall" (`_extract_extensions`): Main `CU`→0.5, Ext `U Unable to determine`→0.25. Worksheet **(32) party heat loss now exact** (32.573 vs 32.5725). |
| **S0380.237** `8133521c` | `Secondary glazing - Low emissivity`→12. Double {1,2,3,7,13} + secondary {4,11,12} families now fully mapped. |
**Confirmed already correct — do not touch:**
- The calculator's window **U-adjustment `1/(1/U + 0.04)`** (SAP §3.2 curtain
correction) is exact: lodged 4.80→4.0268, 3.10→2.7580, 1.40→1.3258, all
match the worksheet to 1e-4. Our 14 extracted windows sum to **exactly
56.090**. The 1e-4 gap is NOT in the calculator.
- Glazing label→code mapping (g_L is the only cascade effect; lodged U/g
drive §3/§6 via `_g_perpendicular` preferring the lodged value).
## Open — current residual **SAP +1.13** (ours 58.37 vs ws 57.24), all in WINDOWS
The Summary §11 lodges **17 physical window rows**; we end up with **14**
`sap_windows`. Three windows are lost, in two distinct ways:
### Cause 1 (HARD — read before touching `_is_elmhurst_roof_window`)
The mapper routes the two `Double pre 2002` windows (lodged U 3.1 / 3.4) to
**roof** windows via the `U > 3.0` backstop in
`_is_elmhurst_roof_window` (`datatypes/epc/domain/mapper.py`, the final
`return w.u_value > _ELMHURST_ROOF_WINDOW_U_THRESHOLD`). This fixture's
worksheet bills them as **wall** windows (27).
**The trap:** cohort cert **000516** has a window that is *byte-identical*
in every extractable Summary field — `Double pre 2002`, U=3.1,
`location="External wall"`, bp `Main`, orient `North-East` — and *its*
worksheet bills it as a **roof** window (27a). Verified: gating the U>3
rule on `location == "External wall"` makes this fixture pass but
**breaks both 000516 pins** (`test_summary_000516_full_chain_…` and
`test_from_elmhurst_site_notes_matches_hand_built_000516`).
So identical Summary inputs are classified oppositely by the two
worksheets. **No rule keyed on the fields we currently extract can satisfy
both.** Resolving this needs a NEW disambiguating signal — likely a
roof/wall or rooflight field Elmhurst lodges in §11 (or the BP roof
structure) that the extractor doesn't yet capture. Do NOT flip the U>3
heuristic to fix this fixture; it silently regresses 000516.
### Cause 2 (tractable — a plain parsing miss)
The extractor produces **16 windows from 17 §11 rows** — it drops the
`Double glazing, known data` row (BFRC, lodged U=1.00 → adjusted 0.9615,
1st Extension, area 1.00; worksheet "Windows 12" on Ext1). The label maps
fine (→3); the physical row just isn't extracted. Fixing this alone won't
pin the fixture (Cause 1 still blocks) but it's a real, isolatable
extractor bug.
## Tracer recipe (rebuild — the throwaway lived in /tmp)
```python
# from repo root, PYTHONPATH=/workspaces/model
import re, subprocess; from pathlib import Path
from backend.documents_parser.elmhurst_extractor import ElmhurstSiteNotesExtractor
from datatypes.epc.domain.mapper import EpcPropertyDataMapper
from domain.sap10_calculator.rdsap.cert_to_inputs import (
SAP_10_2_SPEC_PRICES, cert_to_inputs, cert_to_demand_inputs,
heat_transmission_section_from_cert)
from domain.sap10_calculator.calculator import calculate_sap_from_inputs
def pages(pdf):
n=int(re.search(r"Pages:\s+(\d+)",subprocess.run(["pdfinfo",str(pdf)],
capture_output=True,text=True).stdout).group(1)); out=[]
for i in range(1,n+1):
L=subprocess.run(["pdftotext","-layout","-f",str(i),"-l",str(i),str(pdf),"-"],
capture_output=True,text=True).stdout
out.append("\n".join(tok for ln in L.splitlines()
for tok in re.split(r"\s{2,}",ln.strip()) if tok))
return out
D=Path("sap worksheets/Recommendations Elmhurst Files/double_glazing/before")
sn=ElmhurstSiteNotesExtractor(pages(D/"Summary_001431 (1).pdf")).extract()
epc=EpcPropertyDataMapper.from_elmhurst_site_notes(sn)
# len(sn.windows)==16 (should be 17); len(epc.sap_windows)==14 (2 → roof, 1 dropped)
```
Per-window A×U on the worksheet uses the ADJUSTED U `1/(1/U_lodged+0.04)`;
sum the §3 `(27)` lines to 60.5577 (we get 56.090 from 14 windows).

View file

@ -164,7 +164,10 @@ from domain.sap10_calculator.worksheet.energy_requirements import (
from domain.sap10_calculator.worksheet.fabric_energy_efficiency import (
fabric_energy_efficiency_kwh_per_m2_yr,
)
from domain.sap10_calculator.worksheet.photovoltaic import pv_split_monthly
from domain.sap10_calculator.worksheet.photovoltaic import (
PhotovoltaicSplit,
pv_split_monthly,
)
from domain.sap10_calculator.worksheet.space_cooling import (
SpaceCoolingResult,
space_cooling_monthly_kwh,
@ -2082,23 +2085,6 @@ def _off_peak_low_rate_gbp_per_kwh(tariff: Tariff) -> float:
return low * _PENCE_TO_GBP
def _off_peak_low_rate_gbp_per_kwh_via_meter_heuristic(meter_type: object) -> float:
"""Off-peak low-rate £/kWh for callsites that detect off-peak via the
`_is_off_peak_meter` heuristic (RdSAP meter code 3 = Unknown is
treated as off-peak for electric end-uses; see _is_off_peak_meter
docstring). When the meter resolves to a known off-peak tariff
(codes 1/4/5), bills at that tariff's Table 32 low rate; when the
meter resolves to STANDARD (codes 2 = Single, 3 = Unknown), falls
back to the SEVEN_HOUR rate (5.50, Table 32 code 31). Codifies the
heuristic that pre-S0380.138 was baked into the literal
`prices.e7_low_rate_p_per_kwh` constant."""
tariff = tariff_from_meter_type(meter_type)
if tariff is Tariff.STANDARD:
_high, low = _tariff_high_low_rates_p_per_kwh(Tariff.SEVEN_HOUR)
return low * _PENCE_TO_GBP
return _off_peak_low_rate_gbp_per_kwh(tariff)
# Tariff → (high_rate_fuel_code, low_rate_fuel_code) for the SAP 10.2
# Table 12d (CO2) / Table 12e (PE) monthly factors. Mirror of the
# Table 32 cost-rates dict above: 7-hour and 10-hour tariffs split into
@ -2138,6 +2124,16 @@ def _table_12a_system_for_main(
main.main_heating_index_number is not None
and heat_pump_record(main.main_heating_index_number) is not None
)
# Electric room heaters (RdSAP main_heating_category 10) are direct-
# acting electric → SAP 10.2 Table 12a Grid 1 (PDF p.191) "Other
# systems including direct-acting electric" row (7-hour high-rate
# fraction 1.00, 10-hour 0.50). Distinct from electric STORAGE
# heaters (category 7), which charge off-peak and correctly fall
# through to None here (→ 100% low rate). Gated on `_is_electric_main`
# so a non-electric room heater (gas / solid-fuel cat 10) is excluded;
# all callers already pre-gate on electric, this is belt-and-braces.
if main.main_heating_category == 10 and _is_electric_main(main):
return Table12aSystem.OTHER_DIRECT_ACTING_ELECTRIC
# ASHP — Table 4a rows 211-217 (earlier generations) + 221-227
# (2013+) cover the air-source space. Warm-air ASHPs are 521-524.
if code is not None and (
@ -2173,6 +2169,41 @@ def _space_heating_fuel_cost_gbp_per_kwh(
return blended * _PENCE_TO_GBP
def _main_space_heating_high_rate_fraction(
main: Optional[MainHeatingDetail],
tariff: Tariff,
) -> float:
"""SAP 10.2 Appendix M1 §3a (PDF p.93) — the fraction of the main
space-heating fuel that is billed at the HIGH rate in Section 10a,
i.e. carries an "electricity not at the low-rate" fuel code (30, 32,
34, 35 or 38). Only this high-rate portion of E_space,m may enter the
PV-eligible demand D_PV,m; the low-rate portion (code 31/33/36/37/39)
is excluded.
Mirrors `_space_heating_fuel_cost_gbp_per_kwh`'s rate split exactly so
the D_PV inclusion and the §10a billing stay consistent:
- non-electric main, or STANDARD tariff 1.0 (no off-peak split;
the eligible-code gate in `_pv_eligible_demand_monthly_kwh`
already excludes non-electric fuels, and a STANDARD-tariff
electric main bills 100% at code 30).
- electric main on an off-peak tariff whose Table 12a Grid 1 SH row
is wired the published high-rate fraction. Electric STORAGE
heaters (Table 12a `_table_12a_system_for_main` None, charged
wholly off-peak) and any system whose Grid 1 SH row is not yet
wired bill 100% at the low rate fraction 0.0, so E_space,m is
excluded from D_PV entirely (worksheet (240) high-rate cost = 0).
"""
if not _is_electric_main(main) or tariff is Tariff.STANDARD:
return 1.0
system = _table_12a_system_for_main(main)
if system is None:
return 0.0
try:
return space_heating_high_rate_fraction(system, tariff)
except NotImplementedError:
return 0.0
def _hot_water_fuel_cost_gbp_per_kwh(
water_heating_fuel: Optional[int],
main: Optional[MainHeatingDetail],
@ -2464,6 +2495,7 @@ def _pv_eligible_demand_monthly_kwh(
main_fuel_code_table_12: Optional[int],
secondary_fuel_code_table_12: Optional[int],
water_heating_fuel_code_table_12: Optional[int],
main_space_high_rate_fraction: float = 1.0,
) -> tuple[float, ...]:
"""SAP 10.2 Appendix M1 §3a (p.93) — monthly PV-eligible demand
D_PV,m. Always includes lighting + appliances + cooking + electric
@ -2472,6 +2504,18 @@ def _pv_eligible_demand_monthly_kwh(
(codes 30, 32, 34, 35, 38 per spec). Includes E_water,m only when
the water heating fuel code is 30 (standard electricity) per spec.
`main_space_high_rate_fraction` scales the main-heating contribution
by the portion billed at the HIGH rate (code 30) in Section 10a.
Per the §3a inclusion rule "(211) should be included only where the
fuel code applied to it in Section 10a is 30, 32, 34, 35 or 38 (i.e.
electricity not at the low-rate)", off-peak electric mains (e.g.
storage heaters charged wholly at the low rate, fraction 0.0) must
NOT add their (211) to D_PV. Defaults to 1.0 unchanged for
STANDARD-tariff electric mains and the gas-main / electric-secondary
cohort. Without this, off-peak storage-heater dwellings over-counted
D_PV by the full (211) in winter, inflating R_PV,m β the onsite
PV split (case 19: β_Jan 0.894 0.792, matching worksheet 0.791).
Secondary space heating is included on the same footing as main:
Appendix M1 §3a counts E_space,m as the dwelling's total electric
space-heating demand, which for a gas-main / electric-secondary
@ -2481,9 +2525,13 @@ def _pv_eligible_demand_monthly_kwh(
worksheet (233a) gap localised on the cohort-2 gas+PV certs:
cert 3136 onsite 726.9 790.3 vs worksheet 792.1).
The off-peak immersion × (243) Ewater branch and the Appendix G4
PV diverter adjustment are deferred current cohort fixtures
don't exercise them."""
The off-peak immersion × (243) Ewater branch is deferred. The
Appendix G4 PV-diverter saving is intentionally NOT reflected here:
per the §3a note (PDF p.93, lines 5485-5486) "If there is a PV
diverter, then for the purposes of this β factor calculation (219)m
should not include the diverter savings" — so D_PV uses the
pre-diverter (219), and the diverter (63b)m is applied afterwards in
`_pv_diverter_monthly_kwh`."""
include_main_space = (
main_fuel_code_table_12 is not None
and main_fuel_code_table_12 in _PV_ELIGIBLE_SPACE_HEATING_FUEL_CODES
@ -2506,7 +2554,7 @@ def _pv_eligible_demand_monthly_kwh(
+ pumps_fans_monthly_kwh[m]
)
if include_main_space:
d += main_1_fuel_monthly_kwh[m]
d += main_space_high_rate_fraction * main_1_fuel_monthly_kwh[m]
if include_secondary_space:
d += secondary_fuel_monthly_kwh[m]
if include_water:
@ -2515,6 +2563,70 @@ def _pv_eligible_demand_monthly_kwh(
return tuple(monthly)
# SAP 10.2 Appendix G4 step 4 (PDF p.73) — correction factors applied to
# the surplus PV available to the diverter: 0.8 for the cylinder's
# ability to accept the heat, and fPV,diverter,storageloss = 0.9 for the
# increased cylinder losses from storing water at a higher temperature.
_PV_DIVERTER_CYLINDER_ACCEPTANCE_FACTOR: Final[float] = 0.8
_PV_DIVERTER_STORAGE_LOSS_FACTOR: Final[float] = 0.9
def _pv_diverter_monthly_kwh(
*,
epc: EpcPropertyData,
pv_export_monthly_kwh: tuple[float, ...],
water_demand_monthly_kwh: tuple[float, ...],
avg_daily_hot_water_l: float,
battery_capacity_kwh: float,
pv_generation_kwh: float,
) -> Optional[tuple[float, ...]]:
"""SAP 10.2 Appendix G4 (PDF p.72-73) — monthly PV-diverter water-
heating input SPV,diverter,m (positive kWh), entered as the negative
worksheet (63b)m.
`pv_export_monthly_kwh` is the pre-diverter surplus EPV,m × (1 βm)
the portion of PV generation not consumed by the dwelling's
instantaneous demand, which would otherwise be exported. Per G4 step
4:
SPV,diverter,m = EPV,m × (1 βm) × 0.8 × fPV,diverter,storageloss
clamped to (62)m + (63a)m (`water_demand_monthly_kwh`; (63a) the
WWHRS reduction, 0 here) so the diverter never supplies more than the
water-heating demand.
Returns None diverter disregarded by software (G4 step 1) unless
ALL four inclusion conditions hold:
a. a PV system connected to the dwelling supply (EPV > 0);
b. a cylinder whose volume exceeds (43) the average daily hot-water
use;
c. no solar water heating present;
d. no battery storage present.
`pv_diverter_present` (Summary §19 / API `pv_diverter`) gates the
whole calculation: an absent diverter returns None immediately.
"""
if not epc.sap_energy_source.pv_diverter_present:
return None
# a. PV connected to the dwelling (case "a" Appendix M1 step 2).
if pv_generation_kwh <= 0.0:
return None
# b. Cylinder volume (litres) must exceed (43) average daily HW use.
cylinder_volume_l = _hot_water_cylinder_volume_l(epc)
if cylinder_volume_l is None or cylinder_volume_l <= avg_daily_hot_water_l:
return None
# c. No solar water heating. d. No battery storage.
if epc.solar_water_heating or battery_capacity_kwh > 0.0:
return None
correction = (
_PV_DIVERTER_CYLINDER_ACCEPTANCE_FACTOR
* _PV_DIVERTER_STORAGE_LOSS_FACTOR
)
return tuple(
min(pv_export_monthly_kwh[m] * correction, water_demand_monthly_kwh[m])
for m in range(12)
)
# RdSAP 10 §11.1 b): when the kWp is not lodged but the cert lodges a
# "% of roof area" PV figure, derive the PV peak power as
# `0.12 × PV area`, with PV area being the dwelling's roof area for
@ -2602,30 +2714,25 @@ def _pv_export_credit_gbp_per_kwh() -> float:
def _pv_dwelling_import_price_gbp_per_kwh(
meter_type: object, prices: PriceTable
tariff: Tariff, prices: PriceTable
) -> float:
"""PV dwelling-consumption price per kWh per SAP 10.2 Appendix M1 §6
(p.94): "apply the normal import electricity price to PV energy used
within the dwelling". Onsite-consumed PV displaces grid IMPORTS, so
it bills at the standard electricity import tariff (Table 32 code 30
under the RdSAP10 amendment per ADR-0010 §10 = 13.19 p/kWh the
same rate `_fuel_cost`'s `other_uses_p_per_kwh` already pays for
lighting/pumps/fans, and crucially the same rate Table 32 code 60
pays for the EXPORT credit. In Table 32 these collapse to a single
13.19 p value, so the IMPORT/EXPORT split is mathematically
equivalent to the legacy single-rate-EXPORT credit but the
distinction matters when an off-peak tariff lands: §6 then directs
a weighted Table 12a high/low rate, deferred until the first off-
peak cost cert ships."""
if _is_off_peak_meter(meter_type, fuel_is_electric=True):
# Off-peak weighted Table 12a rate (deferred — `_fuel_cost`
# short-circuits Tariff != STANDARD before reaching this path).
# Routes through the meter-heuristic helper so an Unknown-meter
# cert (code 3 = "treat as off-peak for electric end-uses" per
# _is_off_peak_meter) falls back to the SEVEN_HOUR low rate
# rather than raising on STANDARD.
return _off_peak_low_rate_gbp_per_kwh_via_meter_heuristic(meter_type)
return table_32_unit_price_p_per_kwh(30) * _PENCE_TO_GBP
(PDF p.94, lines 5510-5513): "apply the normal import electricity
price to PV energy used within the dwelling In the case of the
former, use a weighted average of high and low rates (Table 12a)."
Onsite-consumed PV displaces the dwelling's "all other uses"
electricity (lighting / appliances / pumps), so it bills at the same
Table 12a Grid 2 ALL_OTHER_USES rate `_other_fuel_cost_gbp_per_kwh`
derives a STANDARD-tariff dwelling pays the flat Table 32 code 30
13.19 p/kWh (unchanged from the legacy single-rate path), while an
off-peak dwelling pays the weighted high/low blend (7-hour:
0.90 × 15.29 + 0.10 × 5.50 = 14.311 p/kWh, matching worksheet
(252)/(269) "PV used in dwelling" on case 19).
Pre-S0380.233 the off-peak branch returned the bare low rate
(5.50 p/kWh), under-crediting onsite PV on every off-peak cert."""
return _other_fuel_cost_gbp_per_kwh(tariff, prices)
def _other_fuel_cost_gbp_per_kwh(
@ -6079,9 +6186,7 @@ def _fuel_cost(
pv_dwelling_kwh_per_yr=pv_dwelling_kwh_per_yr,
pv_exported_kwh_per_yr=pv_exported_kwh_per_yr,
pv_dwelling_import_price_gbp_per_kwh=(
_pv_dwelling_import_price_gbp_per_kwh(
epc.sap_energy_source.meter_type, prices
)
_pv_dwelling_import_price_gbp_per_kwh(_rdsap_tariff(epc), prices)
),
)
@ -6537,6 +6642,7 @@ def cert_to_inputs(
# the scalar `water_eff` (Table 4a/4b boilers, legacy fallback).
# Q_space (kWh/month) per spec = (98c)m × (204) = (98c)m × (1
# sec_frac) for single-main fixtures.
space_heating_monthly_useful_kwh: tuple[float, ...] = (0.0,) * 12
if wh_result is not None:
# Eq D1 Q_space is the DHW boiler's OWN space-heating load — its
# (204)/(205) share of total — not the dwelling total (202). See
@ -6711,6 +6817,12 @@ def cert_to_inputs(
)
if epc.sap_heating.water_heating_fuel is not None else None
),
# SAP 10.2 Appendix M1 §3a — exclude the low-rate portion of an
# off-peak electric main from D_PV (the §10a high/low split that
# `_space_heating_fuel_cost_gbp_per_kwh` already bills).
main_space_high_rate_fraction=_main_space_heating_high_rate_fraction(
main, _rdsap_tariff(epc),
),
)
pv_split = pv_split_monthly(
epv_monthly_kwh=pv_monthly_kwh,
@ -6718,17 +6830,70 @@ def cert_to_inputs(
battery_capacity_kwh=_pv_battery_capacity_kwh(epc),
)
# SAP 10.2 Appendix G4 (PDF p.72-73) — PV diverter. The β factor above
# is computed on the PRE-diverter (219) per the §3a note; now apply
# the diverter saving. SPV,diverter,m diverts the surplus PV (the
# would-be export EPV,m × (1 βm)) into the cylinder immersion:
# - (63b)m = SPV,diverter,m reduces the §4 output (64)m → less main-
# system water-heating fuel (219);
# - the export drops to EPV,ex,m = EPV,m(1 βm) + (63b)m / 0.9 (the
# diverted energy is no longer exported); the onsite dwelling
# portion EPV,dw,m = EPV,m × βm is unchanged (the β is fixed).
hw_output_monthly_for_factors = (
wh_result.output_monthly_kwh if wh_result is not None else (0.0,) * 12
)
pv_diverter_monthly_kwh = _pv_diverter_monthly_kwh(
epc=epc,
pv_export_monthly_kwh=pv_split.epv_exported_monthly_kwh,
water_demand_monthly_kwh=(
wh_result.total_demand_monthly_kwh if wh_result is not None
else (0.0,) * 12
),
avg_daily_hot_water_l=(
wh_result.annual_avg_hot_water_l_per_day if wh_result is not None
else 0.0
),
battery_capacity_kwh=_pv_battery_capacity_kwh(epc),
pv_generation_kwh=sum(pv_monthly_kwh),
)
if pv_diverter_monthly_kwh is not None and wh_result is not None:
pv63b_monthly_kwh = tuple(-s for s in pv_diverter_monthly_kwh)
# (64)m = (62)m + (63a)m + (63b)m — reduce the §4 output by the
# diverter input, then recompute (219) from the reduced output.
hw_output_monthly_for_factors = tuple(
max(0.0, wh_result.output_monthly_kwh[m] + pv63b_monthly_kwh[m])
for m in range(12)
)
if section_12_4_4_blend is None:
hw_kwh = _apply_water_efficiency(
wh_output_monthly_kwh=hw_output_monthly_for_factors,
wh_output_annual_kwh=sum(hw_output_monthly_for_factors),
water_efficiency_pct=water_eff,
eq_d1_winter_summer_pct=eq_d1_winter_summer_pct,
space_heating_monthly_useful_kwh=space_heating_monthly_useful_kwh,
interlock_penalty_pp=eq_d1_interlock_penalty_pp,
)
# EPV,ex,m = EPV,m(1 βm) + (63b)m / fPV,diverter,storageloss.
adjusted_export_monthly_kwh = tuple(
pv_split.epv_exported_monthly_kwh[m]
+ pv63b_monthly_kwh[m] / _PV_DIVERTER_STORAGE_LOSS_FACTOR
for m in range(12)
)
pv_split = PhotovoltaicSplit(
beta_monthly=pv_split.beta_monthly,
epv_dwelling_monthly_kwh=pv_split.epv_dwelling_monthly_kwh,
epv_exported_monthly_kwh=adjusted_export_monthly_kwh,
)
# SAP 10.2 §12.4.4 overrides — when summer immersion applies (back-
# boiler combo + cylinder + WHC from main heating), the HW cost /
# CO2 / PE factors are kWh-weighted blends of the winter boiler fuel
# + summer electric immersion. The standing-charges line adds the
# off-peak electric standing because the cylinder is heated by an
# off-peak immersion Jun-Sep. When the rule does NOT apply, the
# locals fall back to the existing single-fuel HW helpers.
hw_monthly_kwh_for_factors = (
wh_result.output_monthly_kwh if wh_result is not None
else (0.0,) * 12
)
# locals fall back to the existing single-fuel HW helpers. The HW
# factors weight by the diverter-adjusted (64)m output.
hw_monthly_kwh_for_factors = hw_output_monthly_for_factors
if section_12_4_4_blend is not None:
(
_hw_total_unused,
@ -6931,7 +7096,7 @@ def cert_to_inputs(
pv_generation_kwh_per_yr=_pv_generation_kwh_per_yr(epc, climate),
pv_export_credit_gbp_per_kwh=_pv_export_credit_gbp_per_kwh(),
pv_dwelling_import_price_gbp_per_kwh=_pv_dwelling_import_price_gbp_per_kwh(
epc.sap_energy_source.meter_type, prices
_rdsap_tariff(epc), prices
),
# SAP 10.2 Appendix M1 §3-4 PV split — the cascade applies
# IMPORT PEF (Table 12) to the onsite portion and EXPORT PEF

View file

@ -250,13 +250,16 @@ _RULE_2_STORAGE_CODES: Final[frozenset[int]] = frozenset(
# Rule 3: direct-acting electric + heat pumps + electric room heaters
# → 10-hour. §12 lists "heat pump (211 to 224, 521 to 524, or
# database)" — the "database" branch fires when the cert lodges a
# PCDB Table 362 heat-pump index regardless of SAP code.
# PCDB Table 362 heat-pump index regardless of SAP code. §12 also
# names "electric room heaters" verbatim (RdSAP 10 PDF p.62) — Table 4a
# electric room-heater codes 691 (panel/convector/radiant), 692 (fan),
# 693 (portable), 694 (water-/oil-filled), 699 (assumed). Without these
# a Dual-meter room-heater cert fell through to Rule 4 (7-hour default).
_RULE_3_TEN_HOUR_CODES: Final[frozenset[int]] = frozenset(
[191] # direct-acting electric boiler
+ list(range(211, 225)) # heat pumps 211-224
+ list(range(521, 525)) # warm-air heat pumps 521-524
# TODO: electric room heater codes (SAP Table 4a row 6xx for
# electric panel / radiant heaters) when a fixture surfaces them.
+ [691, 692, 693, 694, 699] # electric room heaters (Table 4a)
)

View file

@ -980,21 +980,28 @@ def heat_transmission_from_cert(
# U_main_wall per spec page 23 ("Common wall U-value is inferred
# from the U-value of the main wall in the building part below";
# gables fall under the same Table 4 rule).
rr_a_rr = geom["rr_simplified_a_rr_m2"]
rr_common = geom["rr_common_wall_area_m2"]
rr_gable = geom["rr_gable_area_m2"]
# `rr_roof_area` is the worksheet's simplified A_RR (the notional
# room-in-roof roof area, RdSAP 10 §3.10.1). Its perimeter common
# walls + gables are billed to walls; the leftover residual is the
# roof-going area that takes the roof U-value.
rr_roof_area = geom["rr_simplified_a_rr_m2"]
rr_common_wall_area = geom["rr_common_wall_area_m2"]
rr_gable_area = geom["rr_gable_area_m2"]
rr_detailed_area = 0.0
if rr_a_rr > 0:
if rr_roof_area > 0:
rir = part.sap_room_in_roof
assert rir is not None # rr_a_rr > 0 ⇒ rir present per _part_geometry
walls += uw * (rr_common + rr_gable)
assert rir is not None # rr_roof_area > 0 ⇒ rir present per _part_geometry
walls += uw * (rr_common_wall_area + rr_gable_area)
# Deduct any "Roof of Room" rooflights piercing the RR shell
# (see `rw_area_on_rr` rationale at the gross-roof block).
a_rr_final = max(0.0, rr_a_rr - rr_common - rr_gable - rw_area_on_rr)
rr_residual_roof_area = max(
0.0,
rr_roof_area - rr_common_wall_area - rr_gable_area - rw_area_on_rr,
)
u_rr = u_rr_default_all_elements(
country=country, age_band=rir.construction_age_band,
)
roof += u_rr * a_rr_final
roof += u_rr * rr_residual_roof_area
elif part.sap_room_in_roof is not None and part.sap_room_in_roof.detailed_surfaces:
# RdSAP10 §3.10 Detailed RR — iterate per-surface lodgement.
# Slope / flat_ceiling / stud_wall route to roof (worksheet
@ -1007,7 +1014,8 @@ def heat_transmission_from_cert(
# band". Wall-going RIR surfaces (gable_wall, gable_wall_
# external, common_wall) deduct from the simplified A_RR
# to leave the residual area, mirroring the Simplified
# branch's `a_rr_final = rr_a_rr - rr_common - rr_gable`.
# branch's `rr_residual_roof_area = rr_roof_area -
# rr_common_wall_area - rr_gable_area`.
# Roof-going surfaces (slope / flat_ceiling / stud_wall)
# do NOT deduct — they sit inside the RR shell rather than
# forming its perimeter walls.
@ -1179,7 +1187,7 @@ def heat_transmission_from_cert(
main_wall_area
+ (alt_walls_total_area - alt_window_area)
+ roof_area + floor_area_total
+ w_area + d_area + rw_area_part + rr_a_rr + rr_detailed_area
+ w_area + d_area + rw_area_part + rr_roof_area + rr_detailed_area
+ cantilever_area
)
total_external_area += part_external_area

0
etl/hubspot/__init__.py Normal file
View file

View file

@ -67,7 +67,9 @@ def handler(body: dict[str, Any], context: Any) -> None:
logger.info(
f"Triggering MagicPlan fetcher for HubSpot deal ID {hubspot_deal_id}"
)
_trigger_magicplan_fetcher(sqs_client, hubspot_deal, listing, hubspot_deal_id)
_trigger_magicplan_fetcher(
sqs_client, hubspot_deal, listing, hubspot_deal_id
)
else:
# Deal already in db, check whether anything has changed
logger.info(
@ -119,13 +121,18 @@ def handler(body: dict[str, Any], context: Any) -> None:
logger.info(
f"Triggering MagicPlan fetcher for HubSpot deal ID {hubspot_deal_id}"
)
_trigger_magicplan_fetcher(sqs_client, hubspot_deal, listing, hubspot_deal_id)
_trigger_magicplan_fetcher(
sqs_client, hubspot_deal, listing, hubspot_deal_id
)
print("done")
def _trigger_magicplan_fetcher(
sqs_client: Any, hubspot_deal: Dict[str, str], listing: Optional[dict[str, str]], hubspot_deal_id: str
sqs_client: Any,
hubspot_deal: Dict[str, str],
listing: Optional[dict[str, str]],
hubspot_deal_id: str,
) -> None:
message_body = {
"address": hubspot_deal.get("dealname"),
@ -136,9 +143,7 @@ def _trigger_magicplan_fetcher(
QueueUrl=get_settings().MAGICPLAN_SQS_URL,
MessageBody=json.dumps(message_body),
)
logger.info(
f"Sent message to MagicPlan queue. MessageId: {response['MessageId']}"
)
logger.info(f"Sent message to MagicPlan queue. MessageId: {response['MessageId']}")
def _trigger_pashub_fetcher(
@ -148,7 +153,7 @@ def _trigger_pashub_fetcher(
"pashub_link": hubspot_deal["pashub_link"],
"address": None, # potentially available from Listing, leave as None for now
"hubspot_deal_id": deal_id,
"sharepoint_link": hubspot_deal.get("sharepoint_link", None),
# "sharepoint_link": hubspot_deal.get("sharepoint_link", None), # Don't send sharepoint link for now as they are inconsistent
"uprn": hubspot_deal.get("national_uprn", None),
"landlord_property_id": hubspot_deal.get("owner_property_id", None),
"deal_stage": hubspot_deal.get("deal_stage", None),

View file

@ -0,0 +1,29 @@
from __future__ import annotations
from datetime import datetime, timezone
from typing import ClassVar, Optional
from uuid import UUID
from sqlmodel import Field, SQLModel
class BulkAddressUploadRow(SQLModel, table=True):
"""Minimal mirror of the FE-owned ``bulk_address_uploads`` table.
The schema source of truth is the Next.js Drizzle repo
(``src/app/db/schema/bulk_address_uploads.ts``); the backend's fuller mirror
lives at ``backend/app/db/models/bulk_address_uploads.py``. This DDD-side
mirror declares only the columns the finaliser writes the terminal status
so the finaliser can flip status on its own ``PostgresConfig`` session without
pulling in the legacy ``backend/`` connection (ADR-0013). Keep the column names
in step with the Drizzle source.
"""
__tablename__: ClassVar[str] = "bulk_address_uploads" # pyright: ignore[reportIncompatibleVariableOverride]
id: UUID = Field(primary_key=True)
task_id: Optional[UUID] = Field(default=None, index=True)
status: str
updated_at: datetime = Field(
default_factory=lambda: datetime.now(timezone.utc)
)

View file

@ -549,7 +549,15 @@ class EpcBuildingPartModel(SQLModel, table=True):
building_part_number: Optional[int] = Field(default=None)
wall_dry_lined: Optional[bool] = Field(default=None)
wall_thickness_mm: Optional[int] = Field(default=None)
wall_insulation_thickness: Optional[str] = Field(default=None)
# Union[str, int] — int mm when the API lodges
# `wall_insulation_thickness == "measured"` (resolved by
# `_api_resolve_wall_insulation_thickness`), else the lodged string
# ("NI", a numeric string, ...). JSONB to preserve int vs str on
# round-trip, exactly like the sibling `roof_insulation_thickness` /
# `flat_roof_insulation_thickness`.
wall_insulation_thickness: Optional[Union[str, int]] = Field(
default=None, sa_column=Column(JSONB, nullable=True)
)
floor_heat_loss: Optional[int] = Field(default=None)
floor_insulation_thickness: Optional[str] = Field(default=None)
flat_roof_insulation_thickness: Optional[Union[str, int]] = Field(

View file

@ -2,24 +2,49 @@ from __future__ import annotations
from typing import ClassVar, Optional
from sqlalchemy import Column
from sqlalchemy import Enum as SAEnum
from sqlmodel import Field, SQLModel
# Mirror of the FE-owned `creation_status` pgEnum (property.ts:
# propertyCreationStatusEnum = {LOADING, READY, ERROR}). A single SAEnum instance
# so test-schema create_all emits one CREATE TYPE; prod owns the type via Drizzle.
property_creation_status_sa_enum = SAEnum(
"LOADING", "READY", "ERROR", name="creation_status"
)
class PropertyRow(SQLModel, table=True):
"""Defensive view of the FE-owned ``property`` table.
"""Mirror of the FE-owned ``property`` table.
The schema and migrations for ``property`` are owned by the front-end
Next.js repo; this declares only the identity columns the modelling backend
reads/writes, so FE-owned migrations to other columns don't ripple into us.
The schema and migrations for ``property`` are owned by the front-end Next.js
repo (``src/app/db/schema/property.ts``); this declares the identity columns
the modelling backend reads, plus the subset the ``bulk_upload_finaliser``
Lambda **inserts** at Finalise (ADR-0013). It is no longer read-only the
finaliser is the one backend caller that inserts. Columns not declared here
are still owned by FE migrations and don't ripple into us.
"""
__tablename__: ClassVar[str] = "property" # pyright: ignore[reportIncompatibleVariableOverride]
# Non-Optional: this is a read-only defensive view of the FE-owned ``property``
# table — the backend never inserts rows, so every row read carries an id.
id: int = Field(primary_key=True)
# bigserial in the FE schema — DB-assigned on insert, so Optional/None on the
# way in and always populated on the way out.
id: Optional[int] = Field(default=None, primary_key=True)
portfolio_id: int
postcode: str
address: str
# Nullable in the FE schema. The finaliser writes `matched ?? user-inputted`,
# which is absent for fully-unmatched rows.
postcode: Optional[str] = Field(default=None)
address: Optional[str] = Field(default=None)
uprn: Optional[int] = Field(default=None)
landlord_property_id: Optional[str] = Field(default=None)
# Insertable columns the finaliser writes (ADR-0013). All nullable in the FE
# schema except `creation_status` (NOT NULL); the finaliser always sets it to
# 'READY', so a nullable mirror is safe — the real column enforces NOT NULL.
creation_status: Optional[str] = Field(
default=None,
sa_column=Column(property_creation_status_sa_enum, nullable=True),
)
user_inputted_address: Optional[str] = Field(default=None)
user_inputted_postcode: Optional[str] = Field(default=None)
lexiscore: Optional[float] = Field(default=None)

View file

@ -0,0 +1,146 @@
"""Finalises a BulkUpload into ``property`` rows (ADR-0013).
The domain logic of Finalise: turn the combiner output rows into property identity
rows (the same resolution the old Next.js ``/finalize`` route did) and persist them
through the injected writer. Like every orchestrator it never commits the caller
owns the transaction boundary (see the Lambda handler).
"""
from __future__ import annotations
import re
from typing import Any, Optional
from uuid import UUID
from repositories.bulk_upload.bulk_upload_status_writer import BulkUploadStatusWriter
from repositories.property.property_repository import (
PropertyIdentityInsert,
PropertyRepository,
)
# Combiner-output columns — identical to the old frontend /finalize route and the
# backend combined-results reader (router.py).
ADDRESS_COLS = ("Address 1", "Address 2", "Address 3")
POSTCODE_COL = "postcode"
INTERNAL_REF_COL = "Internal Reference"
UPRN_COL = "address2uprn_uprn"
MATCHED_ADDRESS_COL = "address2uprn_address"
LEXISCORE_COL = "address2uprn_lexiscore"
MISSING_SENTINEL = "invalid postcode"
UK_POSTCODE_RE = re.compile(r"[A-Z]{1,2}\d[A-Z\d]?\s*\d[A-Z]{2}", re.IGNORECASE)
def _normalize(value: Any) -> str:
if value is None:
return ""
return str(value).strip()
def _is_missing(value: str) -> bool:
return value == "" or value.lower() == MISSING_SENTINEL
def _parse_uprn(raw: Any) -> Optional[int]:
val = _normalize(raw)
if _is_missing(val):
return None
try:
return int(val)
except ValueError:
return None
def _parse_lexiscore(raw: Any) -> Optional[float]:
val = _normalize(raw)
if _is_missing(val):
return None
try:
return float(val)
except ValueError:
return None
def _extract_postcode(matched: Optional[str], fallback: str) -> Optional[str]:
if matched:
m = UK_POSTCODE_RE.search(matched)
if m:
return m.group(0).upper()
return fallback or None
class BulkUploadFinaliserOrchestrator:
"""Owns the domain flow of Finalise, depending only on repository ports.
Both collaborators are ports (``PropertyRepository``,
``BulkUploadStatusWriter``); the concrete Postgres adapters are wired by the
Lambda handler (the composition root). So a unit test constructs this with two
fakes and exercises ``finalise`` end-to-end no engine, session, or DB. The
orchestrator never commits: the caller opens the transaction around
``finalise`` so the insert and the ``complete`` flip land atomically.
"""
def __init__(
self,
property_repo: PropertyRepository,
status_writer: BulkUploadStatusWriter,
) -> None:
self._property_repo = property_repo
self._status_writer = status_writer
def finalise(
self, rows: list[dict[str, str]], portfolio_id: int, task_id: UUID
) -> int:
"""Resolve the combiner rows, insert the properties, and mark the upload
``complete`` all via the injected repositories, no DB connection of its
own. Returns the number of properties inserted. Does not commit."""
inserts = self.to_property_rows(rows, portfolio_id)
inserted = self._property_repo.insert_all(inserts)
self._status_writer.set_status(task_id, "complete")
return inserted
def to_property_rows(
self, rows: list[dict[str, str]], portfolio_id: int
) -> list[PropertyIdentityInsert]:
"""Resolve combiner rows into property identity inserts.
Pure (no DB / IO) and independently testable. Reproduces the old
``/finalize`` route's resolution exactly: matched address falls back to the
user-inputted one; postcode is extracted from the matched address or falls
back to the user-inputted postcode.
"""
return [self._row_to_insert(raw, portfolio_id) for raw in rows]
@staticmethod
def _row_to_insert(
raw: dict[str, str], portfolio_id: int
) -> PropertyIdentityInsert:
user_inputted_address = (
", ".join(p for p in (_normalize(raw.get(c)) for c in ADDRESS_COLS) if p)
or None
)
user_inputted_postcode = _normalize(raw.get(POSTCODE_COL)) or None
uprn = _parse_uprn(raw.get(UPRN_COL))
matched_address_raw = _normalize(raw.get(MATCHED_ADDRESS_COL))
matched_address = (
None if _is_missing(matched_address_raw) else matched_address_raw
)
address = matched_address or user_inputted_address
postcode = _extract_postcode(matched_address, user_inputted_postcode or "")
internal_ref = _normalize(raw.get(INTERNAL_REF_COL)) or None
lexiscore = _parse_lexiscore(raw.get(LEXISCORE_COL))
return PropertyIdentityInsert(
portfolio_id=portfolio_id,
uprn=uprn,
landlord_property_id=internal_ref,
address=address,
postcode=postcode,
user_inputted_address=user_inputted_address,
user_inputted_postcode=user_inputted_postcode,
lexiscore=lexiscore,
creation_status="READY",
)

View file

@ -7,6 +7,7 @@ testpaths =
recommendations/tests
backend/tests
backend/address2UPRN/tests
backend/ordnanceSurvey/tests
backend/app/db/functions/tests
backend/categorisation/tests
backend/condition/tests
@ -25,7 +26,5 @@ testpaths =
etl/epc_clean/tests
etl/hubspot/tests
etl/spatial/tests
domain/sap10_ml/tests
tests/
markers =
integration: mark a test as an integration test

View file

@ -0,0 +1,24 @@
from __future__ import annotations
from abc import ABC, abstractmethod
from uuid import UUID
class BulkUploadStatusWriter(ABC):
"""Port: writes the terminal BulkUpload status at Finalise (ADR-0013).
The finaliser owns the terminal write Next.js no longer writes ``complete``;
it only flips ``awaiting_review finalising`` at dispatch (ADR-0005). This is
the backend half of that "two writers" split, alongside the combiner's
``combining``/``awaiting_review`` writes.
"""
@abstractmethod
def set_status(self, task_id: UUID, status: str) -> None:
"""Set the status of the bulk_address_uploads row for ``task_id``.
Does not commit the caller owns the transaction boundary, so the status
flip can share the same transaction as the property insert (atomic
finalise) or run in its own session for the failure path.
"""
...

View file

@ -0,0 +1,33 @@
"""Postgres adapter for ``BulkUploadStatusWriter`` (ADR-0013).
Flips the ``bulk_address_uploads`` status on the caller's session — so the
finaliser can mark ``complete`` in the *same* transaction as the property insert
(atomic finalise), and mark ``failed`` in a fresh session on the error path.
"""
from __future__ import annotations
from datetime import datetime, timezone
from uuid import UUID
from sqlmodel import Session, select
from infrastructure.postgres.bulk_address_upload_table import BulkAddressUploadRow
from repositories.bulk_upload.bulk_upload_status_writer import BulkUploadStatusWriter
class BulkUploadStatusWriterPostgresRepository(BulkUploadStatusWriter):
def __init__(self, session: Session) -> None:
self._session = session
def set_status(self, task_id: UUID, status: str) -> None:
row = self._session.exec(
select(BulkAddressUploadRow).where(
BulkAddressUploadRow.task_id == task_id
)
).first()
if row is None:
raise ValueError(f"No bulk_address_uploads row for task_id {task_id}")
row.status = status
row.updated_at = datetime.now(timezone.utc)
self._session.add(row)

View file

@ -1,7 +1,9 @@
from __future__ import annotations
from typing import Optional
from typing import Optional, cast
from sqlalchemy import Table
from sqlalchemy.dialects.postgresql import insert as pg_insert
from sqlmodel import Session, col, select
from domain.geospatial.planning_restrictions import PlanningRestrictions
@ -9,36 +11,57 @@ from domain.property.properties import Properties
from domain.property.property import Property, PropertyIdentity
from infrastructure.postgres.property_table import PropertyRow
from repositories.epc.epc_repository import EpcRepository
from repositories.property.property_repository import PropertyRepository
from repositories.property.property_repository import (
PropertyIdentityInsert,
PropertyRepository,
)
from repositories.spatial.spatial_repository import SpatialRepository
class PropertyPostgresRepository(PropertyRepository):
"""Hydrates the Property aggregate from the FE-owned ``property`` row plus the
EPC slice (via an injected `EpcRepository`) and the planning protections
(via an injected `SpatialRepository`, keyed by UPRN ADR-0020). Reads only
from repos no external IO so a hydrated Property is a pure function of
repository state (ADR-0003).
"""Postgres adapter for the ``property`` table — reads and writes (ADR-0003).
Reads hydrate the Property aggregate from the FE-owned row plus the EPC slice
(via an injected `EpcRepository`) and the planning protections (via an
injected `SpatialRepository`, keyed by UPRN ADR-0020), so a hydrated
Property is a pure function of repository state. ``epc_repo`` / ``spatial_repo``
are optional: the Finalise write path (``insert_all``) creates new identity
rows and never hydrates, so callers that only insert construct this with a
session alone.
"""
def __init__(
self,
session: Session,
epc_repo: EpcRepository,
spatial_repo: SpatialRepository,
epc_repo: Optional[EpcRepository] = None,
spatial_repo: Optional[SpatialRepository] = None,
) -> None:
self._session = session
self._epc_repo = epc_repo
self._spatial_repo = spatial_repo
# ``__table__`` is injected at runtime on table=True classes but the stubs
# don't expose it; pin to ``Table`` so the dialect insert is typed.
self._table: Table = cast(Table, getattr(PropertyRow, "__table__"))
def _epc(self) -> EpcRepository:
if self._epc_repo is None:
raise ValueError(
"PropertyPostgresRepository needs an EpcRepository to read; it was "
"constructed for the write-only Finalise path."
)
return self._epc_repo
def get(self, property_id: int) -> Property:
row = self._session.get(PropertyRow, property_id)
if row is None:
raise ValueError(f"property {property_id} not found")
identity = PropertyIdentity(
# `postcode`/`address` are nullable in the FE schema (the finaliser may
# insert a row with neither); coerce the degenerate null to "" so the
# identity type stays a plain str.
portfolio_id=row.portfolio_id,
postcode=row.postcode,
address=row.address,
postcode=row.postcode or "",
address=row.address or "",
uprn=row.uprn,
landlord_property_id=row.landlord_property_id,
)
@ -47,7 +70,7 @@ class PropertyPostgresRepository(PropertyRepository):
)
return Property(
identity=identity,
epc=self._epc_repo.get_for_property(property_id),
epc=self._epc().get_for_property(property_id),
planning_restrictions=_restrictions_of(row.uprn, restrictions),
)
@ -58,7 +81,7 @@ class PropertyPostgresRepository(PropertyRepository):
select(PropertyRow).where(col(PropertyRow.id).in_(property_ids))
).all()
row_by_id = {row.id: row for row in rows}
epcs = self._epc_repo.get_for_properties(property_ids)
epcs = self._epc().get_for_properties(property_ids)
restrictions: dict[int, PlanningRestrictions] = self._restrictions_for(
[row.uprn for row in rows if row.uprn is not None]
)
@ -71,8 +94,8 @@ class PropertyPostgresRepository(PropertyRepository):
Property(
identity=PropertyIdentity(
portfolio_id=row.portfolio_id,
postcode=row.postcode,
address=row.address,
postcode=row.postcode or "",
address=row.address or "",
uprn=row.uprn,
landlord_property_id=row.landlord_property_id,
),
@ -82,13 +105,47 @@ class PropertyPostgresRepository(PropertyRepository):
)
return Properties(items)
def _restrictions_for(
self, uprns: list[int]
) -> dict[int, PlanningRestrictions]:
if not uprns:
def _restrictions_for(self, uprns: list[int]) -> dict[int, PlanningRestrictions]:
# No spatial repo (the write-only Finalise path) → no cached protections;
# `_restrictions_of` then defaults every UPRN to unrestricted.
if not uprns or self._spatial_repo is None:
return {}
return self._spatial_repo.get_for_uprns(uprns)
def insert_all(self, rows: list[PropertyIdentityInsert]) -> int:
if not rows:
return 0
values = [
{
"portfolio_id": r.portfolio_id,
"creation_status": r.creation_status,
"uprn": r.uprn,
"landlord_property_id": r.landlord_property_id,
"address": r.address,
"postcode": r.postcode,
"user_inputted_address": r.user_inputted_address,
"user_inputted_postcode": r.user_inputted_postcode,
"lexiscore": r.lexiscore,
}
for r in rows
]
stmt = pg_insert(self._table).values(values)
# Matches `uq_property_portfolio_uprn` (partial: WHERE uprn IS NOT NULL),
# reproducing today's Next.js onConflictDoNothing — a re-run leaves existing
# properties untouched (contrast property_overrides, which recalculates).
stmt = stmt.on_conflict_do_nothing(
index_elements=["portfolio_id", "uprn"],
index_where=self._table.c.uprn.isnot(None),
)
# SQLModel re-exports SQLAlchemy's Session.execute; one overload is marked
# deprecated in the stubs, and they resolve the INSERT to a bare
# ``Result`` (no ``rowcount``) — both are stub limitations, not real.
result = self._session.execute(stmt) # pyright: ignore[reportDeprecated]
return cast(int, result.rowcount) # pyright: ignore[reportUnknownMemberType, reportAttributeAccessIssue]
def _restrictions_of(
uprn: Optional[int], by_uprn: dict[int, PlanningRestrictions]

View file

@ -1,17 +1,40 @@
from __future__ import annotations
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import Optional
from domain.property.properties import Properties
from domain.property.property import Property
class PropertyRepository(ABC):
"""Loads and saves the Property aggregate.
@dataclass(frozen=True)
class PropertyIdentityInsert:
"""One row inserted into the FE-owned ``property`` table at Finalise (ADR-0013).
Composes the aggregate whole from the FE-owned ``property`` identity row plus
its source-data slices (EPC today; Site Notes / enrichments as later slices
land). Aggregates load whole never half a Property (ADR-0002).
Mirrors the exact column set today's Next.js ``/finalize`` writes: nine fields
plus ``creation_status='READY'``. ``address``/``postcode`` are the resolved
(matched ?? user-inputted) values and may be ``None``.
"""
portfolio_id: int
uprn: Optional[int]
landlord_property_id: Optional[str]
address: Optional[str]
postcode: Optional[str]
user_inputted_address: Optional[str]
user_inputted_postcode: Optional[str]
lexiscore: Optional[float]
creation_status: str = "READY"
class PropertyRepository(ABC):
"""Reads and writes the FE-owned ``property`` table.
Reads hydrate the Property aggregate whole never half a Property from the
identity row plus its source-data slices (EPC today; Site Notes / enrichments
as later slices land) (ADR-0002, ADR-0012). Writes bulk-insert identity rows at
Finalise (ADR-0013). One repository per aggregate.
"""
@abstractmethod
@ -23,3 +46,11 @@ class PropertyRepository(ABC):
rather than one round-trip per property (ADR-0012). Order follows the
input ids."""
...
@abstractmethod
def insert_all(self, rows: list[PropertyIdentityInsert]) -> int:
"""Bulk-insert identity rows, skipping any whose ``(portfolio_id, uprn)``
already exists (the FE partial unique index, ``WHERE uprn IS NOT NULL``).
Rows with no UPRN are always inserted. Returns the number actually
inserted; an empty list is a no-op returning 0 (ADR-0013)."""
...

View file

@ -59,7 +59,11 @@ from domain.sap10_calculator.rdsap.cert_to_inputs import (
_is_electric_water, # pyright: ignore[reportPrivateUsage]
_is_off_peak_meter, # pyright: ignore[reportPrivateUsage]
_main_floor_u_value, # pyright: ignore[reportPrivateUsage]
_main_space_heating_high_rate_fraction, # pyright: ignore[reportPrivateUsage]
_other_fuel_cost_gbp_per_kwh, # pyright: ignore[reportPrivateUsage]
_pv_diverter_monthly_kwh, # pyright: ignore[reportPrivateUsage]
_pv_dwelling_import_price_gbp_per_kwh, # pyright: ignore[reportPrivateUsage]
_pv_eligible_demand_monthly_kwh, # pyright: ignore[reportPrivateUsage]
_primary_loss_applies, # pyright: ignore[reportPrivateUsage]
_rdsap_extract_fans_default, # pyright: ignore[reportPrivateUsage]
_pv_overshading_factor, # pyright: ignore[reportPrivateUsage]
@ -1766,6 +1770,209 @@ def test_other_fuel_cost_for_18_hour_tariff_uses_18_hour_high_rate() -> None:
)
def test_main_space_high_rate_fraction_zero_for_off_peak_storage_heaters() -> None:
# Arrange — SAP 10.2 Appendix M1 §3a (PDF p.93): E_space,m (211) is
# included in D_PV "only where the fuel code applied to it in Section
# 10a is 30, 32, 34, 35 or 38 (i.e. electricity not at the low-rate)".
# Electric STORAGE heaters (code 402) on a 7-hour off-peak tariff are
# charged wholly at the low rate (Table 12a Grid 1 SH fraction 0.00 /
# `_table_12a_system_for_main` → None) — worksheet (240) high-rate
# cost = 0 — so none of (211) may enter D_PV.
from domain.sap10_calculator.tables.table_12a import Tariff
storage_main = MainHeatingDetail(
has_fghrs=False,
main_fuel_type=29, # electricity
heat_emitter_type=1,
emitter_temperature=1,
main_heating_control=2402,
main_heating_category=7,
sap_main_heating_code=402,
)
# Act
off_peak = _main_space_heating_high_rate_fraction(
storage_main, Tariff.SEVEN_HOUR
)
standard = _main_space_heating_high_rate_fraction(
storage_main, Tariff.STANDARD
)
gas = _main_space_heating_high_rate_fraction(
_gas_boiler_detail(sap_main_heating_code=102), Tariff.SEVEN_HOUR
)
# Assert
assert abs(off_peak - 0.0) <= 1e-9
# STANDARD tariff has no high/low split → 100% high rate.
assert abs(standard - 1.0) <= 1e-9
# Non-electric main never carries an off-peak split.
assert abs(gas - 1.0) <= 1e-9
def test_pv_eligible_demand_excludes_low_rate_main_space_heating() -> None:
# Arrange — SAP 10.2 Appendix M1 §3a (PDF p.93). A main billed wholly
# at the low rate (high-rate fraction 0.0) must contribute zero to
# D_PV even though its Table-12 code (30) is in the eligible set; the
# secondary (also code 30) at its full high-rate fraction stays in.
main_1 = tuple(float(100 + m) for m in range(12))
secondary = tuple(float(10 + m) for m in range(12))
base = tuple(float(5) for _ in range(12)) # lighting et al.
# Act
excluded = _pv_eligible_demand_monthly_kwh(
lighting_monthly_kwh=base,
appliances_monthly_kwh=(0.0,) * 12,
cooking_monthly_kwh=(0.0,) * 12,
electric_shower_monthly_kwh=(0.0,) * 12,
pumps_fans_monthly_kwh=(0.0,) * 12,
main_1_fuel_monthly_kwh=main_1,
secondary_fuel_monthly_kwh=secondary,
hot_water_monthly_kwh=(0.0,) * 12,
main_fuel_code_table_12=30,
secondary_fuel_code_table_12=30,
water_heating_fuel_code_table_12=26, # gas → no E_water
main_space_high_rate_fraction=0.0,
)
included = _pv_eligible_demand_monthly_kwh(
lighting_monthly_kwh=base,
appliances_monthly_kwh=(0.0,) * 12,
cooking_monthly_kwh=(0.0,) * 12,
electric_shower_monthly_kwh=(0.0,) * 12,
pumps_fans_monthly_kwh=(0.0,) * 12,
main_1_fuel_monthly_kwh=main_1,
secondary_fuel_monthly_kwh=secondary,
hot_water_monthly_kwh=(0.0,) * 12,
main_fuel_code_table_12=30,
secondary_fuel_code_table_12=30,
water_heating_fuel_code_table_12=26,
main_space_high_rate_fraction=1.0,
)
# Assert — excluded drops the full (211); secondary stays in both.
for m in range(12):
assert abs(excluded[m] - (base[m] + secondary[m])) <= 1e-9
assert abs(included[m] - (base[m] + secondary[m] + main_1[m])) <= 1e-9
def test_pv_dwelling_import_price_blends_high_low_on_off_peak() -> None:
# Arrange — SAP 10.2 Appendix M1 §6 (PDF p.94, lines 5510-5513): PV
# used in the dwelling is credited at "a weighted average of high and
# low rates (Table 12a)". On a 7-hour tariff the ALL_OTHER_USES blend
# is 0.90 × 15.29 + 0.10 × 5.50 = 14.311 p/kWh (worksheet case 19
# (252) "PV used in dwelling" = 14.3110). STANDARD tariff has no
# split → flat Table 32 code 30 = 13.19 p/kWh (unchanged).
from domain.sap10_calculator.tables.table_12a import Tariff
# Act
off_peak = _pv_dwelling_import_price_gbp_per_kwh(
Tariff.SEVEN_HOUR, SAP_10_2_SPEC_PRICES
)
standard = _pv_dwelling_import_price_gbp_per_kwh(
Tariff.STANDARD, SAP_10_2_SPEC_PRICES
)
# Assert
assert abs(off_peak - 0.14311) <= 1e-6
assert abs(standard - 0.1319) <= 1e-6
def _pv_diverter_epc():
"""A minimal dwelling that satisfies every Appendix G4 inclusion
condition: a 210 L cylinder (code 4), no solar HW, no battery, with
`pv_diverter_present` set on the energy source."""
from dataclasses import replace
epc = make_minimal_sap10_epc(
total_floor_area_m2=90.0,
habitable_rooms_count=4,
country_code="ENG",
has_hot_water_cylinder=True,
solar_water_heating=False,
sap_heating=make_sap_heating(
main_heating_details=[_gas_boiler_detail(sap_main_heating_code=102)],
cylinder_size=4, # RdSAP Table 28 code 4 → 210 L
),
)
return replace(
epc,
sap_energy_source=replace(
epc.sap_energy_source, pv_diverter_present=True
),
)
def test_pv_diverter_monthly_applies_g4_correction_and_clamp() -> None:
# Arrange — SAP 10.2 Appendix G4 step 4 (PDF p.73): SPV,diverter,m =
# EPV,m(1 βm) × 0.8 × 0.9, clamped to ≤ (62)m + (63a)m. With a
# 100-kWh monthly surplus the uncapped diverter input is 72 kWh; a
# month whose water demand is only 50 kWh clamps it to 50.
epc = _pv_diverter_epc()
export = tuple(100.0 for _ in range(12))
demand = tuple(50.0 if m < 6 else 1000.0 for m in range(12))
# Act
out = _pv_diverter_monthly_kwh(
epc=epc,
pv_export_monthly_kwh=export,
water_demand_monthly_kwh=demand,
avg_daily_hot_water_l=120.0, # < 210 L cylinder
battery_capacity_kwh=0.0,
pv_generation_kwh=1200.0,
)
# Assert
assert out is not None
for m in range(12):
expected = min(100.0 * 0.8 * 0.9, demand[m])
assert abs(out[m] - expected) <= 1e-9
def test_pv_diverter_disregarded_when_any_g4_condition_fails() -> None:
# Arrange — SAP 10.2 Appendix G4 step 1: if a PV system / large-enough
# cylinder / no-solar-HW / no-battery condition is not met, software
# disregards the diverter (returns None).
from dataclasses import replace
epc = _pv_diverter_epc()
export: tuple[float, ...] = (100.0,) * 12
demand: tuple[float, ...] = (1000.0,) * 12
def divert(
e: object, avg_l: float = 120.0, battery: float = 0.0, pv_gen: float = 1200.0
) -> Optional[tuple[float, ...]]:
return _pv_diverter_monthly_kwh(
epc=e, # pyright: ignore[reportArgumentType]
pv_export_monthly_kwh=export,
water_demand_monthly_kwh=demand,
avg_daily_hot_water_l=avg_l,
battery_capacity_kwh=battery,
pv_generation_kwh=pv_gen,
)
# Act / Assert — sanity: all conditions met → not None.
assert divert(epc) is not None
# Diverter not present.
assert (
divert(
replace(
epc,
sap_energy_source=replace(
epc.sap_energy_source, pv_diverter_present=False
),
)
)
is None
)
# No PV generation (condition a).
assert divert(epc, pv_gen=0.0) is None
# Cylinder not larger than (43) average daily HW use (condition b).
assert divert(epc, avg_l=9999.0) is None
# Battery present (condition d).
assert divert(epc, battery=5.0) is None
# Solar water heating present (condition c).
assert divert(replace(epc, solar_water_heating=True)) is None
def test_is_off_peak_meter_recognises_bare_18_hour_lodging() -> None:
# Arrange — RdSAP 10 §17 page 85 row 10-2 lodges 18-hour meter
# as the bare "18-hour" or "18 Hour" form (Elmhurst Summary §14.2
@ -2938,6 +3145,55 @@ def test_space_heating_off_peak_fallback_uses_actual_tariff_low_rate_not_e7() ->
assert abs(cost_eighteen_hour - 0.0741) <= 1e-6
def test_space_heating_electric_room_heater_off_peak_bills_at_direct_acting_high_rate() -> None:
# Arrange — an ELECTRIC room heater (RdSAP main_heating_category 10,
# e.g. SAP code 691) is direct-acting electric, so SAP 10.2 Table 12a
# Grid 1 (PDF p.191) puts it on the "Other systems including direct-
# acting electric" row: 7-hour high-rate fraction 1.00, 10-hour 0.50.
# Unlike STORAGE heaters (category 7), which charge off-peak and so
# correctly bill 100% at the low rate, a room heater runs on demand —
# mostly at the HIGH rate. `_table_12a_system_for_main` only mapped
# ASHP, so a room heater fell through to the "100% low-rate" fallback
# (5.50 p, £0.0550), under-charging space heating by ~9.79 p/kWh and
# systematically OVER-rating the cat-10 cluster (1,000-cert API sample:
# 48 certs, mean |err| 9.49, signed +5.08). The fix maps electric
# cat-10 mains to OTHER_DIRECT_ACTING_ELECTRIC. Mirror of S0380.228
# (which fixed the same fallback for electric SECONDARY heating).
from domain.sap10_calculator.tables.table_12a import Tariff
electric_room_heater_main = MainHeatingDetail(
has_fghrs=False,
main_fuel_type=30, # standard electricity
heat_emitter_type=2,
emitter_temperature=1,
main_heating_control=2602,
main_heating_category=10, # electric room heaters
sap_main_heating_code=691,
)
gas_room_heater_main = MainHeatingDetail(
has_fghrs=False,
main_fuel_type=1, # mains gas — NOT electric
heat_emitter_type=2,
emitter_temperature=1,
main_heating_control=2602,
main_heating_category=10, # gas room heater (also cat 10)
sap_main_heating_code=631,
)
# Act — 7-hour off-peak tariff.
electric_rate = _space_heating_fuel_cost_gbp_per_kwh(
electric_room_heater_main, Tariff.SEVEN_HOUR, prices=SAP_10_2_SPEC_PRICES,
)
gas_rate = _space_heating_fuel_cost_gbp_per_kwh(
gas_room_heater_main, Tariff.SEVEN_HOUR, prices=SAP_10_2_SPEC_PRICES,
)
# Assert — electric room heater: 1.00 × 15.29 p = £0.1529 (high rate).
# Gas room heater is unaffected (non-electric → single Table 32 rate,
# not the off-peak electric split).
assert abs(electric_rate - 0.1529) <= 1e-6
assert abs(gas_rate - 0.0550) > 1e-6
def test_heat_network_dlf_full_table_12c_age_band_coverage() -> None:
# Arrange — SAP 10.2 Table 12c (page 193) heat-network Distribution
# Loss Factor by dwelling age band A..M. None → K-or-newer

View file

@ -17,12 +17,35 @@ from domain.sap10_calculator.tables.table_12a import (
Table12aSystem,
Tariff,
other_use_high_rate_fraction,
rdsap_tariff_for_cert,
space_heating_high_rate_fraction,
tariff_from_meter_type,
water_heating_high_rate_fraction,
)
def test_dual_meter_electric_room_heater_resolves_to_ten_hour_tariff() -> None:
# Arrange — RdSAP 10 §12 (PDF p.62) Dual-meter tariff dispatch: for a
# Dual meter the choice between 7-hour and 10-hour is made by the main
# heating type. Rule 3 verbatim: "if the main system ... is a direct-
# acting electric boiler (191), or electric room heaters ... it is
# 10-hour tariff." The electric room-heater codes are Table 4a 691
# (panel/convector/radiant), 692 (fan), 693 (portable), 694 (water-/
# oil-filled), 699 (assumed). Pre-slice these fell through to Rule 4
# (7-hour default), so a Dual-meter room-heater cert was billed at the
# 7-hour rates with Table 12a high-rate fraction 1.00 instead of the
# 10-hour rates with fraction 0.50 — over-charging direct-acting heat
# once S0380.230 routed it to OTHER_DIRECT_ACTING_ELECTRIC.
# Act / Assert — every electric room-heater code on a Dual meter → 10-hour.
for code in (691, 692, 693, 694, 699):
assert rdsap_tariff_for_cert(1, main_1_sap_code=code) is Tariff.TEN_HOUR
# Storage heaters (Rule 2) stay 7-hour; a gas room heater (non-Rule-3
# code) keeps the Rule 4 Dual default of 7-hour.
assert rdsap_tariff_for_cert(1, main_1_sap_code=401) is Tariff.SEVEN_HOUR
assert rdsap_tariff_for_cert(1, main_1_sap_code=601) is Tariff.SEVEN_HOUR
def test_tariff_enum_has_five_members() -> None:
"""Table 12a columns: standard (no off-peak split), 7-hour, 10-hour,
18-hour, 24-hour. Worksheet-shape fidelity: TEN_HOUR is included for

View file

@ -66,6 +66,8 @@ def build_epc() -> EpcPropertyData:
"""
main = SapBuildingPart(
identifier=BuildingPartIdentifier.MAIN,
# API parity: roof_construction int mirrors the gov-EPC mapper
roof_construction=4,
construction_age_band="B",
wall_construction=_WC_CAVITY,
wall_insulation_type=4,
@ -99,6 +101,8 @@ def build_epc() -> EpcPropertyData:
)
extension_1 = SapBuildingPart(
identifier=BuildingPartIdentifier.EXTENSION_1,
# API parity: roof_construction int mirrors the gov-EPC mapper
roof_construction=4,
construction_age_band="B",
wall_construction=_WC_CAVITY,
wall_insulation_type=4,
@ -131,6 +135,8 @@ def build_epc() -> EpcPropertyData:
)
extension_2 = SapBuildingPart(
identifier=BuildingPartIdentifier.EXTENSION_2,
# API parity: roof_construction int mirrors the gov-EPC mapper
roof_construction=3,
construction_age_band="B",
wall_construction=_WC_CAVITY,
wall_insulation_type=4,

View file

@ -64,6 +64,8 @@ def build_epc() -> EpcPropertyData:
"""
main = SapBuildingPart(
identifier=BuildingPartIdentifier.MAIN,
# API parity: roof_construction int mirrors the gov-EPC mapper
roof_construction=4,
construction_age_band="B",
wall_construction=_WC_CAVITY,
wall_insulation_type=4,

View file

@ -65,6 +65,8 @@ def build_epc() -> EpcPropertyData:
"""
main = SapBuildingPart(
identifier=BuildingPartIdentifier.MAIN,
# API parity: roof_construction int mirrors the gov-EPC mapper
roof_construction=4,
construction_age_band="B",
wall_construction=_WC_CAVITY,
wall_insulation_type=4,
@ -134,6 +136,8 @@ def build_epc() -> EpcPropertyData:
)
extension = SapBuildingPart(
identifier=BuildingPartIdentifier.EXTENSION_1,
# API parity: roof_construction int mirrors the gov-EPC mapper
roof_construction=4,
construction_age_band="B",
wall_construction=_WC_CAVITY,
wall_insulation_type=4,

View file

@ -61,6 +61,8 @@ def build_epc() -> EpcPropertyData:
"""
main = SapBuildingPart(
identifier=BuildingPartIdentifier.MAIN,
# API parity: roof_construction int mirrors the gov-EPC mapper
roof_construction=4,
construction_age_band="B",
wall_construction=_WC_CAVITY,
wall_insulation_type=4, # "A As Built"
@ -131,6 +133,8 @@ def build_epc() -> EpcPropertyData:
)
extension = SapBuildingPart(
identifier=BuildingPartIdentifier.EXTENSION_1,
# API parity: roof_construction int mirrors the gov-EPC mapper
roof_construction=4,
construction_age_band="B",
wall_construction=_WC_CAVITY,
wall_insulation_type=4,

View file

@ -66,6 +66,8 @@ def build_epc() -> EpcPropertyData:
"""
main = SapBuildingPart(
identifier=BuildingPartIdentifier.MAIN,
# API parity: roof_construction int mirrors the gov-EPC mapper
roof_construction=4,
construction_age_band="B",
wall_construction=_WC_CAVITY,
wall_insulation_type=4,
@ -98,6 +100,8 @@ def build_epc() -> EpcPropertyData:
)
extension = SapBuildingPart(
identifier=BuildingPartIdentifier.EXTENSION_1,
# API parity: roof_construction int mirrors the gov-EPC mapper
roof_construction=4,
construction_age_band="B",
wall_construction=_WC_CAVITY,
wall_insulation_type=4,

View file

@ -70,6 +70,8 @@ def build_epc() -> EpcPropertyData:
"""
main = SapBuildingPart(
identifier=BuildingPartIdentifier.MAIN,
# API parity: roof_construction int mirrors the gov-EPC mapper
roof_construction=3,
construction_age_band="A",
wall_construction=_WC_CAVITY,
wall_insulation_type=4,

View file

@ -1274,6 +1274,86 @@ def test_explicit_wall_is_basement_flag_disambiguates_system_built_from_basement
assert alt_api_code_6.is_basement_wall is True
def test_system_build_property_derives_from_main_wall_construction_type() -> None:
# Arrange — system-built is a WALL TYPE: RdSAP10 WALL_SYSTEM_BUILT=6
# on the MAIN wall. It shares the integer with basement, so a code-6
# main wall is system-built only when it is NOT flagged basement. The
# `system_build` property reads the wall type (wall_construction) + the
# dedicated basement flag — it does not need a separate dwelling-level
# field.
from dataclasses import replace
base_main = make_building_part(
identifier=BuildingPartIdentifier.MAIN,
construction_age_band="G",
wall_construction=6, wall_insulation_type=4,
party_wall_construction=1, roof_construction=4,
floor_dimensions=[
make_floor_dimension(
total_floor_area_m2=80.0, room_height_m=2.5,
party_wall_length_m=0.0, heat_loss_perimeter_m=35.0, floor=0,
),
],
)
system_built = make_minimal_sap10_epc(
sap_building_parts=[replace(base_main, wall_is_basement=False)],
)
basement = make_minimal_sap10_epc(
sap_building_parts=[replace(base_main, wall_is_basement=True)],
)
cavity = make_minimal_sap10_epc(
sap_building_parts=[replace(base_main, wall_construction=4)],
)
no_main = make_minimal_sap10_epc(sap_building_parts=[])
# Act / Assert — code 6 + not basement → system-built; code 6 + basement
# → not system-built; a non-6 wall type → not system-built; no main → None.
assert system_built.system_build is True
assert basement.system_build is False
assert cavity.system_build is False
assert no_main.system_build is None
def test_system_built_addendum_clears_basement_on_code_6_walls_api_path() -> None:
# Arrange — gov-EPC API system-built cert: the per-wall code 6 can't be
# told from a basement at lodging time, so once the cert addendum marks
# the dwelling system-built, `from_api_response` clears the code-6
# basement heuristic. A genuine basement (no addendum signal) keeps it.
from dataclasses import replace
from datatypes.epc.domain.epc_property_data import Addendum
from datatypes.epc.domain.mapper import _clear_basement_flag_when_system_built # pyright: ignore[reportPrivateUsage]
code_6_main = make_building_part(
identifier=BuildingPartIdentifier.MAIN,
construction_age_band="G",
wall_construction=6, wall_insulation_type=4,
party_wall_construction=1, roof_construction=4,
floor_dimensions=[
make_floor_dimension(
total_floor_area_m2=80.0, room_height_m=2.5,
party_wall_length_m=0.0, heat_loss_perimeter_m=35.0, floor=0,
),
],
)
system_built_cert = replace(
make_minimal_sap10_epc(sap_building_parts=[code_6_main]),
addendum=Addendum(system_build=True),
)
genuine_basement_cert = make_minimal_sap10_epc(sap_building_parts=[code_6_main])
# Act
cleared = _clear_basement_flag_when_system_built(system_built_cert)
untouched = _clear_basement_flag_when_system_built(genuine_basement_cert)
# Assert — system-built cert: code-6 main wall is no longer basement,
# and the wall-type-derived system_build reads True. Genuine basement
# (no addendum) is unchanged → still basement.
assert cleared.sap_building_parts[0].main_wall_is_basement is False
assert cleared.system_build is True
assert untouched.sap_building_parts[0].main_wall_is_basement is True
assert untouched.system_build is False
def test_basement_alt_wall_uses_table_23_u_value_not_cascade() -> None:
"""RdSAP §5.17 / Table 23 governs basement-wall U-values: 0.7 for age
A-F, 0.6 for G-H, 0.45 for I, 0.35 for J, ..., 0.26 for M. The

View file

@ -21,7 +21,10 @@ from repositories.plan.plan_repository import PlanRepository
from repositories.product.product_repository import ProductRepository
from repositories.property_baseline.property_baseline_repository import PropertyBaselineRepository
from repositories.epc.epc_repository import EpcRepository
from repositories.property.property_repository import PropertyRepository
from repositories.property.property_repository import (
PropertyIdentityInsert,
PropertyRepository,
)
from repositories.scenario.scenario_repository import ScenarioRepository
from repositories.solar.solar_repository import SolarRepository
from repositories.spatial.spatial_repository import SpatialRepository
@ -60,6 +63,10 @@ class FakePropertyRepo(PropertyRepository):
def get_many(self, property_ids: list[int]) -> Properties:
return Properties([self._hydrate(property_id) for property_id in property_ids])
def insert_all(self, rows: list[PropertyIdentityInsert]) -> int:
self.inserted: list[PropertyIdentityInsert] = list(rows)
return len(rows)
class FakeEpcRepo(EpcRepository):
def __init__(self, by_property: Optional[dict[int, EpcPropertyData]] = None) -> None:

View file

@ -0,0 +1,105 @@
"""The finaliser orchestrator runs end-to-end against fake writers — no DB.
This is the payoff of injecting the repository ports (ADR-0013): the whole
Finalise domain flow (resolve combiner rows -> insert properties -> mark complete)
is unit-testable with two in-memory fakes, no engine/session/Postgres.
"""
from __future__ import annotations
from uuid import UUID, uuid4
from domain.property.properties import Properties
from domain.property.property import Property
from orchestration.bulk_upload_finaliser_orchestrator import (
BulkUploadFinaliserOrchestrator,
)
from repositories.bulk_upload.bulk_upload_status_writer import BulkUploadStatusWriter
from repositories.property.property_repository import (
PropertyIdentityInsert,
PropertyRepository,
)
class FakePropertyRepository(PropertyRepository):
"""Records inserts; the read half is unused on the Finalise path."""
def __init__(self) -> None:
self.inserted: list[PropertyIdentityInsert] = []
def insert_all(self, rows: list[PropertyIdentityInsert]) -> int:
self.inserted = list(rows)
return len(rows)
def get(self, property_id: int) -> Property: # pragma: no cover
raise NotImplementedError
def get_many(self, property_ids: list[int]) -> Properties: # pragma: no cover
raise NotImplementedError
class FakeStatusWriter(BulkUploadStatusWriter):
def __init__(self) -> None:
self.calls: list[tuple[UUID, str]] = []
def set_status(self, task_id: UUID, status: str) -> None:
self.calls.append((task_id, status))
def _orchestrator() -> tuple[
BulkUploadFinaliserOrchestrator, FakePropertyRepository, FakeStatusWriter
]:
prop = FakePropertyRepository()
status = FakeStatusWriter()
return BulkUploadFinaliserOrchestrator(prop, status), prop, status
def test_finalise_inserts_resolved_rows_and_marks_complete() -> None:
orchestrator, prop, status = _orchestrator()
task_id = uuid4()
rows = [
{
"Address 1": "1 Some Street",
"postcode": "A0 0AA",
"Internal Reference": "REF-1",
"address2uprn_uprn": "100023",
"address2uprn_address": "1 SOME STREET, TOWN, SW1A 1AA",
"address2uprn_lexiscore": "0.95",
},
]
inserted = orchestrator.finalise(rows, portfolio_id=7, task_id=task_id)
assert inserted == 1
(row,) = prop.inserted
assert row.portfolio_id == 7
assert row.uprn == 100023
assert row.landlord_property_id == "REF-1"
assert row.address == "1 SOME STREET, TOWN, SW1A 1AA" # matched wins
assert row.postcode == "SW1A 1AA" # extracted from the matched address
assert row.user_inputted_address == "1 Some Street"
assert row.lexiscore == 0.95
assert row.creation_status == "READY"
# The upload is marked complete via the injected status writer.
assert status.calls == [(task_id, "complete")]
def test_finalise_falls_back_to_user_input_when_unmatched() -> None:
orchestrator, prop, _status = _orchestrator()
rows = [
{
"Address 1": "2 Other Road",
"postcode": "B1 1BB",
"address2uprn_uprn": "invalid postcode",
"address2uprn_address": "invalid postcode",
"address2uprn_lexiscore": "",
},
]
orchestrator.finalise(rows, portfolio_id=9, task_id=uuid4())
(row,) = prop.inserted
assert row.uprn is None # missing sentinel -> None
assert row.address == "2 Other Road" # falls back to user input
assert row.postcode == "B1 1BB" # falls back to user-inputted postcode
assert row.lexiscore is None

View file

@ -48,3 +48,37 @@ def test_epc_property_data_round_trips(schema_dir: str, db_engine: Engine) -> No
# Assert
assert reloaded == original
def test_building_part_wall_insulation_thickness_preserves_int(
db_engine: Engine,
) -> None:
# SAP 10.2 §5.7/Table 8: when the API lodges
# `wall_insulation_thickness == "measured"`, the mapper resolves the
# value to an int mm. The `epc_building_part.wall_insulation_thickness`
# column must therefore preserve int vs str on round-trip (JSONB), like
# its `roof_insulation_thickness` sibling — a plain str column would
# round-trip the int 100 back as "100" and corrupt the Table 8 lookup.
from dataclasses import replace
# Arrange — take a green fixture and force the measured-int case.
original = _load_epc("RdSAP-Schema-21.0.0")
assert original.sap_building_parts, "fixture must have a building part"
bp0 = replace(original.sap_building_parts[0], wall_insulation_thickness=100)
original = replace(
original,
sap_building_parts=[bp0, *original.sap_building_parts[1:]],
)
# Act
with Session(db_engine) as session:
epc_property_id = EpcPostgresRepository(session).save(original)
session.commit()
with Session(db_engine) as session:
reloaded = EpcPostgresRepository(session).get(epc_property_id)
# Assert — the int survives as an int, not the string "100".
assert reloaded is not None
value = reloaded.sap_building_parts[0].wall_insulation_thickness
assert value == 100
assert isinstance(value, int)

View file

@ -37,7 +37,7 @@ class DomnaSharepointClient:
site_id=self.sharepoint_drive.value,
)
return sharepoint_client.list_folder_contents(path)
return sharepoint_client.list_folder_contents(path, page_size=500)
def does_folder_exists_at(self, file_name: str, file_path: str):
folders: Dict[str, Any] = self.get_folders_in_path(file_path)

View file

@ -9,3 +9,4 @@ class DomnaSites(Enum):
OSMOSIS_ACD = os.getenv("OSMOSIS_ACD_SHAREPOINT_ID")
PRIVATE_PAY = os.getenv("PRIVATE_PAY_SHAREPOINT_ID")
SOCIAL_HOUSING_WAVE_3 = os.getenv("SOCIAL_HOUSING_WAVE_3_SHAREPOINT_ID")
ECO = os.getenv("ECO_SHAREPOINT_ID")