diff --git a/.github/workflows/_deploy_lambda.yml b/.github/workflows/_deploy_lambda.yml index 70f9eabe..0035a579 100644 --- a/.github/workflows/_deploy_lambda.yml +++ b/.github/workflows/_deploy_lambda.yml @@ -76,6 +76,8 @@ on: required: false TF_VAR_social_housing_wave_3_sharepoint_id: required: false + TF_VAR_eco_sharepoint_id: + required: false TF_VAR_pashub_email: required: false TF_VAR_pashub_password: @@ -159,6 +161,7 @@ jobs: TF_VAR_osmosis_acd_sharepoint_id: ${{ secrets.TF_VAR_osmosis_acd_sharepoint_id }} TF_VAR_private_pay_sharepoint_id: ${{ secrets.TF_VAR_private_pay_sharepoint_id }} TF_VAR_social_housing_wave_3_sharepoint_id: ${{ secrets.TF_VAR_social_housing_wave_3_sharepoint_id }} + TF_VAR_eco_sharepoint_id: ${{ secrets.TF_VAR_eco_sharepoint_id }} TF_VAR_pashub_email: ${{ secrets.TF_VAR_pashub_email }} TF_VAR_pashub_password: ${{ secrets.TF_VAR_pashub_password }} TF_VAR_pashub_coordination_email: ${{ secrets.TF_VAR_pashub_coordination_email }} @@ -210,6 +213,7 @@ jobs: TF_VAR_osmosis_acd_sharepoint_id: ${{ secrets.TF_VAR_osmosis_acd_sharepoint_id }} TF_VAR_private_pay_sharepoint_id: ${{ secrets.TF_VAR_private_pay_sharepoint_id }} TF_VAR_social_housing_wave_3_sharepoint_id: ${{ secrets.TF_VAR_social_housing_wave_3_sharepoint_id }} + TF_VAR_eco_sharepoint_id: ${{ secrets.TF_VAR_eco_sharepoint_id }} TF_VAR_pashub_email: ${{ secrets.TF_VAR_pashub_email }} TF_VAR_pashub_password: ${{ secrets.TF_VAR_pashub_password }} TF_VAR_pashub_coordination_email: ${{ secrets.TF_VAR_pashub_coordination_email }} diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index fc999bc0..73660bb5 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -284,6 +284,46 @@ jobs: AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} AWS_REGION: ${{ secrets.DEV_AWS_REGION }} + # ============================================================ + # Build Bulk Upload Finaliser image and Push + # ============================================================ + bulkUploadFinaliser_image: + needs: [determine_stage, shared_terraform] + uses: ./.github/workflows/_build_image.yml + with: + ecr_repo: bulk_upload_finaliser-${{ needs.determine_stage.outputs.stage }} + dockerfile_path: applications/bulk_upload_finaliser/Dockerfile + build_context: . + build_args: | + DEV_DB_HOST=$DEV_DB_HOST + DEV_DB_PORT=$DEV_DB_PORT + DEV_DB_NAME=$DEV_DB_NAME + secrets: + AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} + AWS_REGION: ${{ secrets.DEV_AWS_REGION }} + DEV_DB_HOST: ${{ secrets.DEV_DB_HOST }} + DEV_DB_PORT: ${{ secrets.DEV_DB_PORT }} + DEV_DB_NAME: ${{ secrets.DEV_DB_NAME }} + + # ============================================================ + # Deploy Bulk Upload Finaliser Lambda + # ============================================================ + bulkUploadFinaliser_lambda: + needs: [bulkUploadFinaliser_image, determine_stage, shared_terraform] + uses: ./.github/workflows/_deploy_lambda.yml + with: + lambda_name: bulkUploadFinaliser + lambda_path: deployment/terraform/lambda/bulkUploadFinaliser + stage: ${{ needs.determine_stage.outputs.stage }} + ecr_repo: bulk_upload_finaliser-${{ needs.determine_stage.outputs.stage }} + image_digest: ${{ needs.bulkUploadFinaliser_image.outputs.image_digest }} + terraform_apply: ${{ needs.determine_stage.outputs.terraform_apply }} + secrets: + AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} + AWS_REGION: ${{ secrets.DEV_AWS_REGION }} + # ============================================================ # Condition ETL image and Push # ============================================================ @@ -448,6 +488,7 @@ jobs: TF_VAR_osmosis_acd_sharepoint_id: ${{ secrets.OSMOSIS_ACD_SHAREPOINT_ID }} TF_VAR_private_pay_sharepoint_id: ${{ secrets.PRIVATE_PAY_SHAREPOINT_ID }} TF_VAR_social_housing_wave_3_sharepoint_id: ${{ secrets.SOCIAL_HOUSING_WAVE_3_SHAREPOINT_ID }} + TF_VAR_eco_sharepoint_id: ${{ secrets.ECO_SHAREPOINT_ID }} TF_VAR_pashub_email: ${{ secrets.PASHUB_EMAIL }} TF_VAR_pashub_password: ${{ secrets.PASHUB_PASSWORD }} TF_VAR_pashub_coordination_email: ${{ secrets.PASHUB_COORDINATION_EMAIL }} @@ -458,7 +499,7 @@ jobs: # Deploy FastAPI Lambda # ============================================================ fast_api_lambda: - needs: [determine_stage, ara_engine_lambda, categorisation_lambda, postcodeSplitter_lambda, bulk_address2uprn_combiner_lambda] + needs: [determine_stage, ara_engine_lambda, categorisation_lambda, postcodeSplitter_lambda, bulk_address2uprn_combiner_lambda, bulkUploadFinaliser_lambda] uses: ./.github/workflows/_deploy_lambda.yml with: lambda_name: ara_fast_api diff --git a/.github/workflows/lambda_smoke_tests.yml b/.github/workflows/lambda_smoke_tests.yml index 44288821..6fe947ce 100644 --- a/.github/workflows/lambda_smoke_tests.yml +++ b/.github/workflows/lambda_smoke_tests.yml @@ -63,6 +63,16 @@ jobs: build_context: . service_name: bulk-address2uprn-combiner + # ============================================================ + # Bulk Upload Finaliser + # ============================================================ + bulk_upload_finaliser_smoke_test: + uses: ./.github/workflows/_smoke_test_lambda.yml + with: + dockerfile_path: applications/bulk_upload_finaliser/Dockerfile + build_context: . + service_name: bulk-upload-finaliser + # ============================================================ # Condition ETL # ============================================================ diff --git a/applications/bulk_upload_finaliser/Dockerfile b/applications/bulk_upload_finaliser/Dockerfile new file mode 100644 index 00000000..0c8f792f --- /dev/null +++ b/applications/bulk_upload_finaliser/Dockerfile @@ -0,0 +1,38 @@ +FROM public.ecr.aws/lambda/python:3.11 + +# Postgres host/port/database are baked into the image at build time from the +# deploy workflow's --build-arg values (GitHub Actions DEV_DB_* secrets), +# mirroring the landlord_description_overrides Dockerfile. They map onto the +# POSTGRES_* names PostgresConfig.from_env reads. Username/password are NOT baked +# in -- Terraform injects those as Lambda env vars from Secrets Manager. +ARG DEV_DB_HOST +ARG DEV_DB_PORT +ARG DEV_DB_NAME + +ENV POSTGRES_HOST=${DEV_DB_HOST} +ENV POSTGRES_PORT=${DEV_DB_PORT} +ENV POSTGRES_DATABASE=${DEV_DB_NAME} + +WORKDIR /var/task + +COPY applications/bulk_upload_finaliser/requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# DDD-shaped packages only -- no pandas, no legacy backend/. The finaliser writes +# both `property` and the terminal `bulk_address_uploads` status through DDD repos +# on its own PostgresConfig session (ADR-0013). `datatypes/` comes in via the +# Property aggregate's import closure (domain/property/property.py -> site_notes -> +# datatypes/epc/...), enforced by tests/test_lambda_packaging.py. +COPY datatypes/ datatypes/ +COPY domain/ domain/ +COPY infrastructure/ infrastructure/ +COPY orchestration/ orchestration/ +COPY repositories/ repositories/ +COPY utilities/ utilities/ +COPY applications/ applications/ + +# Place the handler at the Lambda task root so the runtime resolves +# ``main.handler`` without an extra package prefix. +COPY applications/bulk_upload_finaliser/handler.py /var/task/main.py + +CMD ["main.handler"] diff --git a/applications/bulk_upload_finaliser/bulk_upload_finaliser_trigger_body.py b/applications/bulk_upload_finaliser/bulk_upload_finaliser_trigger_body.py new file mode 100644 index 00000000..086ca291 --- /dev/null +++ b/applications/bulk_upload_finaliser/bulk_upload_finaliser_trigger_body.py @@ -0,0 +1,22 @@ +from uuid import UUID + +from pydantic import BaseModel, ConfigDict + + +class BulkUploadFinaliserTriggerBody(BaseModel): + """Trigger body for the bulk_upload_finaliser Lambda (ADR-0013). + + Dispatched by the Next.js Finalise action via + ``POST /v1/bulk-uploads/trigger-finaliser``. ``s3_uri`` is the combiner output + (``combined_output_s3_uri``) — the same address/UPRN CSV the old synchronous + ``/finalize`` route read. + """ + + model_config = ConfigDict(extra="allow") + + task_id: UUID + sub_task_id: UUID + s3_uri: str + # bigint in the FE schema; Python int is unbounded so Pydantic stays simple. + portfolio_id: int + bulk_upload_id: UUID diff --git a/applications/bulk_upload_finaliser/handler.py b/applications/bulk_upload_finaliser/handler.py new file mode 100644 index 00000000..6017cbb5 --- /dev/null +++ b/applications/bulk_upload_finaliser/handler.py @@ -0,0 +1,104 @@ +"""bulk_upload_finaliser Lambda (ADR-0013). + +Replaces the synchronous Next.js ``/finalize`` property insert. Thin wiring: parse +the trigger, read the combiner output CSV from S3, hand the rows to the +``BulkUploadFinaliserOrchestrator`` (which owns the resolution + persist), then +write the terminal BulkUpload status directly (ADR-0005 hands terminal ownership to +the backend). ``complete`` is written in the *same* transaction as the property +insert (atomic finalise); ``failed`` is written on a fresh session on error. + +PostgresConfig-only, like the landlord classifier Lambda — no legacy ``backend/`` +connection — so a single DB config (POSTGRES_*) drives the whole run. +""" + +import logging +import os +from typing import Any +from uuid import UUID + +import boto3 +from sqlalchemy.engine import Engine + +from applications.bulk_upload_finaliser.bulk_upload_finaliser_trigger_body import ( + BulkUploadFinaliserTriggerBody, +) +from infrastructure.postgres.config import PostgresConfig +from infrastructure.postgres.engine import commit_scope, make_engine, make_session +from infrastructure.s3.csv_s3_client import CsvS3Client +from infrastructure.s3.s3_uri import parse_s3_uri +from orchestration.bulk_upload_finaliser_orchestrator import ( + BulkUploadFinaliserOrchestrator, +) +from orchestration.task_orchestrator import TaskOrchestrator +from repositories.bulk_upload.bulk_upload_status_writer_postgres import ( + BulkUploadStatusWriterPostgresRepository, +) +from repositories.property.property_postgres_repository import ( + PropertyPostgresRepository, +) +from utilities.aws_lambda.subtask_handler import subtask_handler + +logger = logging.getLogger(__name__) + + +def _run(engine: Engine, trigger: BulkUploadFinaliserTriggerBody) -> int: + bucket, _key = parse_s3_uri(trigger.s3_uri) + + boto3_client: Any = boto3.client # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType] + boto_s3: Any = boto3_client("s3") + rows = CsvS3Client(boto_s3, bucket).read_rows(trigger.s3_uri) + + session = make_session(engine) + try: + orchestrator = BulkUploadFinaliserOrchestrator( + # Write-only path: no EpcRepository needed for inserts. + property_repo=PropertyPostgresRepository(session), + status_writer=BulkUploadStatusWriterPostgresRepository(session), + ) + # Atomic finalise: the orchestrator inserts properties and marks `complete` + # via its injected writers; the transaction here makes them land together — + # a failure in either rolls back both, leaving the row for the failure path. + with commit_scope(session): + inserted = orchestrator.finalise( + rows, trigger.portfolio_id, trigger.task_id + ) + finally: + session.close() + + logger.info( + "Finalised bulk upload %s: %d rows read, %d properties inserted.", + trigger.bulk_upload_id, + len(rows), + inserted, + ) + return inserted + + +def _mark_failed(engine: Engine, task_id: UUID) -> None: + session = make_session(engine) + try: + with commit_scope(session): + BulkUploadStatusWriterPostgresRepository(session).set_status( + task_id, "failed" + ) + finally: + session.close() + + +@subtask_handler() +def handler( + body: dict[str, Any], context: Any, task_orchestrator: TaskOrchestrator +) -> dict[str, int]: + trigger = BulkUploadFinaliserTriggerBody.model_validate(body) + engine = make_engine(PostgresConfig.from_env(os.environ)) + + try: + inserted = _run(engine, trigger) + except Exception: + # Hand the BulkUpload to the terminal `failed` state so the UI leaves + # `finalising`; the @subtask_handler also marks the SubTask FAILED on the + # re-raise below. + _mark_failed(engine, trigger.task_id) + raise + + return {"inserted": inserted} diff --git a/applications/bulk_upload_finaliser/requirements.txt b/applications/bulk_upload_finaliser/requirements.txt new file mode 100644 index 00000000..6a85a255 --- /dev/null +++ b/applications/bulk_upload_finaliser/requirements.txt @@ -0,0 +1,4 @@ +boto3 +pydantic +sqlmodel +psycopg2-binary diff --git a/backend/app/bulk_uploads/router.py b/backend/app/bulk_uploads/router.py index c050b18c..ea02fd80 100644 --- a/backend/app/bulk_uploads/router.py +++ b/backend/app/bulk_uploads/router.py @@ -12,6 +12,7 @@ from backend.app.bulk_uploads.schema import ( CombinedResultRow, CombinedResultsResponse, CombinerTriggerRequest, + FinaliserTriggerRequest, FlagsSummary, LandlordOverridesTriggerRequest, PostcodeSplitterTriggerRequest, @@ -113,6 +114,26 @@ async def trigger_landlord_overrides(req: LandlordOverridesTriggerRequest): } +@router.post("/trigger-finaliser", status_code=202) +async def trigger_finaliser(req: FinaliserTriggerRequest): + settings = get_settings() + + try: + sqs = boto3.client("sqs", settings.AWS_DEFAULT_REGION) + response = sqs.send_message( + QueueUrl=settings.FINALISER_SQS_URL, + MessageBody=req.model_dump_json(), + ) + except Exception as e: + raise HTTPException(status_code=500, detail=f"SQS error: {e}") + + return { + "task_id": req.task_id, + "sub_task_id": req.sub_task_id, + "sqs_message_id": response.get("MessageId"), + } + + @router.get("/{task_id}/combined-results", response_model=CombinedResultsResponse) async def get_combined_results( task_id: UUID, diff --git a/backend/app/bulk_uploads/schema.py b/backend/app/bulk_uploads/schema.py index af797cac..759f76ca 100644 --- a/backend/app/bulk_uploads/schema.py +++ b/backend/app/bulk_uploads/schema.py @@ -23,6 +23,14 @@ class LandlordOverridesTriggerRequest(BaseModel): column_mapping: dict[str, str] +class FinaliserTriggerRequest(BaseModel): + task_id: str + sub_task_id: str + s3_uri: str # combiner output (combined_output_s3_uri) + portfolio_id: int + bulk_upload_id: str + + class FlagsSummary(BaseModel): duplicates: int missing: int diff --git a/backend/app/config.py b/backend/app/config.py index f969518d..8939e6ff 100644 --- a/backend/app/config.py +++ b/backend/app/config.py @@ -43,6 +43,7 @@ class Settings(BaseSettings): POSTCODE_SPLITTER_SQS_URL: str = "changeme" COMBINER_SQS_URL: str = "changeme" LANDLORD_OVERRIDES_SQS_URL: str = "changeme" + FINALISER_SQS_URL: str = "changeme" # Third parties EPC_AUTH_TOKEN: str = "changeme" @@ -82,6 +83,7 @@ class Settings(BaseSettings): OSMOSIS_ACD_SHAREPOINT_ID: Optional[str] = None PRIVATE_PAY_SHAREPOINT_ID: Optional[str] = None SOCIAL_HOUSING_WAVE_3_SHAREPOINT_ID: Optional[str] = None + ECO_SHAREPOINT_ID: Optional[str] = None OPENAI_API_KEY: Optional[str] = None # Pas Hub diff --git a/backend/app/db/models/uploaded_file.py b/backend/app/db/models/uploaded_file.py index b6a73d5d..e00acbe1 100644 --- a/backend/app/db/models/uploaded_file.py +++ b/backend/app/db/models/uploaded_file.py @@ -21,6 +21,8 @@ class FileTypeEnum(enum.Enum): IMPROVEMENT_OPTION_EVALUATION = "improvement_option_evaluation" MEDIUM_TERM_IMPROVEMENT_PLAN = "medium_term_improvement_plan" RETROFIT_DESIGN_DOC = "retrofit_design_doc" + MCS_COMPLIANCE_CERTIFICATE = "mcs_compliance_certificate" + OTHER = "other" class FileSourceEnum(enum.Enum): diff --git a/backend/documents_parser/elmhurst_extractor.py b/backend/documents_parser/elmhurst_extractor.py index 2523acb0..16f32e07 100644 --- a/backend/documents_parser/elmhurst_extractor.py +++ b/backend/documents_parser/elmhurst_extractor.py @@ -269,8 +269,8 @@ class ElmhurstSiteNotesExtractor: ) def _wall_details_from_lines(self, lines: List[str]) -> WallDetails: - thickness_raw = self._local_val(lines, "Wall Thickness") - thickness_mm = ( + thickness_raw: Optional[str] = self._local_val(lines, "Wall Thickness") + thickness_mm: Optional[int] = ( int(thickness_raw.split()[0]) if thickness_raw else None ) # Composite / retrofit insulation thickness — Summary §7.0 @@ -280,8 +280,8 @@ class ElmhurstSiteNotesExtractor: # is local-scoped inside the §7 block so it does not collide # with the §8 Roofs / §9 Floors blocks. None when the PDF # omits the line (no retrofit lodged). - ins_thickness_raw = self._local_val(lines, "Insulation Thickness") - insulation_thickness_mm = self._parse_thickness_mm(ins_thickness_raw) + ins_thickness_raw: Optional[str] = self._local_val(lines, "Insulation Thickness") + insulation_thickness_mm: Optional[int] = self._parse_thickness_mm(ins_thickness_raw) return WallDetails( wall_type=self._local_str(lines, "Type"), insulation=self._local_str(lines, "Insulation"), @@ -318,8 +318,10 @@ class ElmhurstSiteNotesExtractor: continue if area <= 0: continue - thickness_raw = self._local_val(lines, f"Alternative Wall {n} Thickness") - thickness_mm = self._parse_thickness_mm(thickness_raw) + thickness_raw: Optional[str] = self._local_val( + lines, f"Alternative Wall {n} Thickness" + ) + thickness_mm: Optional[int] = self._parse_thickness_mm(thickness_raw) result.append(AlternativeWall( area_m2=area, wall_type=self._local_str(lines, f"Alternative Wall {n} Type"), @@ -365,8 +367,8 @@ class ElmhurstSiteNotesExtractor: return int(match.group()) if match else None def _roof_details_from_lines(self, lines: List[str]) -> RoofDetails: - thickness_raw = self._local_val(lines, "Insulation Thickness") - thickness_mm = self._parse_thickness_mm(thickness_raw) + thickness_raw: Optional[str] = self._local_val(lines, "Insulation Thickness") + thickness_mm: Optional[int] = self._parse_thickness_mm(thickness_raw) insulation = self._local_str(lines, "Insulation") # The Summary PDF omits the "Insulation Thickness" line entirely # when no retrofit insulation is lodged (e.g. "Insulation: N None" @@ -390,14 +392,14 @@ class ElmhurstSiteNotesExtractor: return self._roof_details_from_lines(lines) def _floor_details_from_lines(self, lines: List[str]) -> FloorDetails: - u_val_raw = self._local_val(lines, "Default U-value") - default_u = float(u_val_raw) if u_val_raw else None + u_val_raw: Optional[str] = self._local_val(lines, "Default U-value") + default_u: Optional[float] = float(u_val_raw) if u_val_raw else None # RdSAP 10 §5.13 Table 20 — retro-fitted upper floors lodge an # "Insulation Thickness: NNN mm" cell so the cascade can route # via the per-thickness column. Mirror of the §8 roof extractor # at `_roof_details_from_lines`. - thickness_raw = self._local_val(lines, "Insulation Thickness") - thickness_mm = self._parse_thickness_mm(thickness_raw) + thickness_raw: Optional[str] = self._local_val(lines, "Insulation Thickness") + thickness_mm: Optional[int] = self._parse_thickness_mm(thickness_raw) return FloorDetails( location=self._local_str(lines, "Location"), floor_type=self._local_str(lines, "Type"), @@ -670,12 +672,24 @@ class ElmhurstSiteNotesExtractor: # even when the main wall fields are inherited; merge # them into the inherited WallDetails so the bp carries # them through to its SapBuildingPart. + # + # "As Main Wall: Yes" inherits the EXTERNAL wall + # construction only — the PARTY WALL TYPE is lodged + # separately in the extension's §7 block and may differ + # (cert 001431: Main "CU Cavity masonry unfilled" U=0.5, + # 1st Extension "U Unable to determine" → RdSAP default + # U=0.25). Read the extension's own party wall type when + # present; fall back to the main's only when absent. + ext_party_wall_type = ( + self._local_str(wall_lines, "Party Wall Type") + or main_walls.party_wall_type + ) walls = WallDetails( wall_type=main_walls.wall_type, insulation=main_walls.insulation, thickness_unknown=main_walls.thickness_unknown, u_value_known=main_walls.u_value_known, - party_wall_type=main_walls.party_wall_type, + party_wall_type=ext_party_wall_type, thickness_mm=main_walls.thickness_mm, insulation_thickness_mm=main_walls.insulation_thickness_mm, alternative_walls=self._alternative_walls_from_lines(wall_lines), @@ -1538,6 +1552,7 @@ class ElmhurstSiteNotesExtractor: wind_turbines_terrain_type=terrain, hydro_electricity_generated_kwh=hydro, pv_arrays=self._extract_pv_arrays(), + pv_diverter_present=self._bool_val("Diverter present"), pv_percent_roof_area=pv_pct if pv_pct > 0 else None, solar_hw_collector_orientation=solar_orientation, solar_hw_collector_pitch_deg=solar_pitch, diff --git a/backend/documents_parser/tests/fixtures/Summary_001431_double_glazing.pdf b/backend/documents_parser/tests/fixtures/Summary_001431_double_glazing.pdf new file mode 100644 index 00000000..f58a0217 Binary files /dev/null and b/backend/documents_parser/tests/fixtures/Summary_001431_double_glazing.pdf differ diff --git a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py index 216af22c..66f0172e 100644 --- a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py +++ b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py @@ -42,6 +42,7 @@ from datatypes.epc.domain.mapper import ( EpcPropertyDataMapper, UnmappedApiCode, UnmappedElmhurstLabel, + _elmhurst_glazing_type_code, # pyright: ignore[reportPrivateUsage] ) from domain.sap10_calculator.calculator import calculate_sap_from_inputs from domain.sap10_calculator.rdsap.cert_to_inputs import SAP_10_2_SPEC_PRICES, cert_to_inputs @@ -1471,6 +1472,68 @@ def test_summary_000474_double_glazed_windows_route_to_code_3() -> None: ) +def test_elmhurst_glazing_label_full_coverage_per_sap10_table_6b() -> None: + # Arrange — the double_glazing recommendation fixture (Summary_001431) + # exercises every RdSAP-21 §11 glazing-type lodging in one cert. Each + # label must resolve to the SAP 10.2 Table 6b cascade code whose + # `_G_LIGHT_BY_GLAZING_CODE` daylight factor g_L is correct for the + # glazing family: single 0.90, double / secondary 0.80, triple 0.70 + # (the lodged manufacturer U/g drive §3/§6; the code only sets g_L). + expected: dict[str, int] = { + "Single glazing": 1, + "Single glazing, known data": 15, + "Double pre 2002": 2, + "Double between 2002 and 2021": 3, + "Double with unknown install date": 3, + "Double glazing, known data": 3, + "Double post or during 2022": 5, + "Secondary glazing": 7, + "Secondary glazing - Normal emissivity": 11, + "Secondary glazing - Low emissivity": 12, + "Triple pre 2002": 10, + "Triple between 2002 and 2021": 9, + "Triple post or during 2022": 6, + "Triple with unknown install date": 6, + } + + # Act / Assert + for label, code in expected.items(): + assert _elmhurst_glazing_type_code(label) == code, ( + f"{label!r} should map to SAP 10.2 Table 6b code {code}" + ) + + +def test_extension_party_wall_type_read_independently_of_as_main_wall() -> None: + # Arrange — RdSAP 10 §3.3: "As Main Wall: Yes" inherits only the + # external wall CONSTRUCTION; the party wall type is lodged + # separately per building part and may differ. The double_glazing + # fixture (Summary_001431) lodges Main party "CU Cavity masonry + # unfilled" (SAP10 wall_construction 4 → u_party_wall 0.5) but the + # 1st Extension party "U Unable to determine" (→ wall_construction 0 + # → RdSAP default u_party_wall 0.25), even though the extension is + # "As Main Wall: Yes". Pre-fix the extension inherited the Main's + # party type (both 0.5), inflating worksheet (32) party heat loss. + pages = _summary_pdf_to_textract_style_pages( + _FIXTURES / "Summary_001431_double_glazing.pdf" + ) + site_notes = ElmhurstSiteNotesExtractor(pages).extract() + + # Act + epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes) + + # Assert — Main BP keeps cavity-unfilled (4); the extension BP gets + # the "Unable to determine" sentinel (0), a distinct party wall U. + party_codes = [ + bp.party_wall_construction for bp in epc.sap_building_parts + ] + assert party_codes == [4, 0], ( + f"expected Main=4 (CU, U=0.5) + Ext=0 (Unable, U=0.25), got {party_codes}" + ) + # The two map to different SAP party-wall U-values. + assert abs(u_party_wall(4) - 0.5) <= 1e-9 + assert abs(u_party_wall(0) - 0.25) <= 1e-9 + + def test_summary_mapper_raises_on_unmapped_glazing_type_label() -> None: # Arrange — same strict-coverage gate as the cylinder-size helper # (Slice S0380.15 + S0380.16): silently routing an unknown glazing @@ -4361,6 +4424,32 @@ def test_elmhurst_foam_cylinder_insulation_still_maps_to_factory_code_1() -> Non assert code == 1 +def test_elmhurst_roof_construction_int_matches_api_codes() -> None: + # Arrange — cross-mapper structural parity: the gov-EPC API mapper + # populates BOTH roof_construction (int) and roof_construction_type + # (str derived via `_API_ROOF_CONSTRUCTION_TO_STR`), but the Elmhurst + # mapper set only the string, leaving the int None. The SAP cascade + # reads the string (so SAP parity held), but consumers of the int + # (e.g. domain/sap10_ml ML aggregates) saw None on every site-notes + # cert. `_elmhurst_roof_construction_int` closes the gap, mapping the + # Elmhurst roof code to the same SAP10 int the API lodges. Unmapped + # codes return None (not a raise) — the int is not cascade-load- + # bearing, so an unknown roof must not block the cert. + from datatypes.epc.domain.mapper import _elmhurst_roof_construction_int # pyright: ignore[reportPrivateUsage] + + # Act / Assert — each Elmhurst roof code → the gov-EPC API int. + assert _elmhurst_roof_construction_int("F Flat") == 1 + assert _elmhurst_roof_construction_int("PN Pitched (slates/tiles), no access") == 3 + assert _elmhurst_roof_construction_int("PA Pitched (slates/tiles), access to loft") == 4 + assert _elmhurst_roof_construction_int("PS Pitched, sloping ceiling") == 8 + assert _elmhurst_roof_construction_int("S Same dwelling above") == 7 + assert _elmhurst_roof_construction_int("A Another dwelling above") == 7 + # Absent / unmapped → None (no raise; not cascade-load-bearing). + assert _elmhurst_roof_construction_int(None) is None + assert _elmhurst_roof_construction_int("") is None + assert _elmhurst_roof_construction_int("NR Non-residential space above") is None + + def test_elmhurst_wall_is_basement_disambiguates_system_built_from_basement() -> None: # Arrange — "SY System build" and "B Basement wall" both map to SAP10 # wall_construction=6 (canonical WALL_SYSTEM_BUILT). The explicit diff --git a/backend/ordnanceSurvey/helpers.py b/backend/ordnanceSurvey/helpers.py index c0d6583b..52fbe9c5 100644 --- a/backend/ordnanceSurvey/helpers.py +++ b/backend/ordnanceSurvey/helpers.py @@ -1,4 +1,5 @@ import urllib.parse +from typing import Any from pydantic import ValidationError import requests import pandas as pd @@ -7,13 +8,13 @@ from utils.logger import setup_logger logger = setup_logger() -def os_places_results_to_dataframe(data: dict) -> pd.DataFrame: +def os_places_results_to_dataframe(data: dict[str, Any]) -> pd.DataFrame: """ Flatten the OS Places API response results into a DataFrame. Each result contains either a DPA or LPI record. """ - results = data.get("results", []) - rows = [] + results: list[dict[str, Any]] = data.get("results", []) + rows: list[dict[str, Any]] = [] for r in results: if "DPA" in r: rows.append(r["DPA"]) @@ -22,7 +23,7 @@ def os_places_results_to_dataframe(data: dict) -> pd.DataFrame: return pd.DataFrame(rows) -def lookup_os_places(postcode: str, api_key: str) -> dict: +def lookup_os_places(postcode: str, api_key: str) -> dict[str, Any]: """ Lookup a postcode using the OS Places API. Returns the full API response data or an error dict. diff --git a/backend/ordnanceSurvey/local_handler/.env.local.example b/backend/ordnanceSurvey/local_handler/.env.local.example new file mode 100644 index 00000000..b1f0330e --- /dev/null +++ b/backend/ordnanceSurvey/local_handler/.env.local.example @@ -0,0 +1,21 @@ +# Runtime env for the Ordnance Survey handler when run locally via docker compose. +# Variable names must match backend/app/config.py Settings (DB_*, ORDNANCE_SURVEY_API_KEY) +# plus the AWS creds boto3 needs for S3 access. + +ENVIRONMENT=local + +# Database (OS Places postcode cache) +DB_HOST= +DB_PORT=5432 +DB_NAME= +DB_USERNAME= +DB_PASSWORD= + +# Ordnance Survey Places API +ORDNANCE_SURVEY_API_KEY= + +# AWS — read input CSV from / write results to S3 +AWS_ACCESS_KEY_ID= +AWS_SECRET_ACCESS_KEY= +AWS_DEFAULT_REGION=eu-west-2 +S3_BUCKET_NAME=retrofit-data-dev diff --git a/backend/ordnanceSurvey/local_handler/invoke_local_lambda.py b/backend/ordnanceSurvey/local_handler/invoke_local_lambda.py index e5272732..dd3c9742 100644 --- a/backend/ordnanceSurvey/local_handler/invoke_local_lambda.py +++ b/backend/ordnanceSurvey/local_handler/invoke_local_lambda.py @@ -12,9 +12,11 @@ payload = { { "body": json.dumps( { - "task_id": "e31f2f21-175b-4a91-a3ec-a6baa325e917", - "sub_task_id": "8673913b-1a88-42d7-8578-0449123d94b0", - "s3_uri": "s3://retrofit-data-dev/ara_raw_inputs/calico/missinguprn.csv", + "task_id": "ea1a20b2-a21f-464c-9999-25f684405b69", + "sub_task_id": "747c99df-a1ff-41a0-98c2-5265d7797f90", + "s3_uri": "s3://retrofit-data-dev/ara_raw_outputs/hyde/output1(Sheet2).csv", + "lexiscore_column": "address2uprn_lexiscore", + "lexiscore_threshold": 0.2, } ) } diff --git a/backend/ordnanceSurvey/local_handler/run_local.sh b/backend/ordnanceSurvey/local_handler/run_local.sh new file mode 100755 index 00000000..345b60ee --- /dev/null +++ b/backend/ordnanceSurvey/local_handler/run_local.sh @@ -0,0 +1,12 @@ +#!/usr/bin/env bash +set -euo pipefail +cd "$(dirname "$0")" + +if [ ! -f .env.local ]; then + cp .env.local.example .env.local + echo "Created .env.local from the template — fill it in, then re-run." >&2 + exit 1 +fi + +docker compose build --no-cache +docker compose up --force-recreate diff --git a/backend/ordnanceSurvey/main.py b/backend/ordnanceSurvey/main.py index 6e82b468..03020307 100644 --- a/backend/ordnanceSurvey/main.py +++ b/backend/ordnanceSurvey/main.py @@ -22,6 +22,7 @@ import uuid import os import pandas as pd +from tqdm import tqdm logger: logging.Logger = setup_logger() @@ -152,8 +153,10 @@ def handler(body: dict[str, Any], context: Any, local: bool = False) -> None: df["ordnance_survey_lexiscore"] = None # Process each postcode group at a time - for postcode, group in grouped: - print(f"Processing postcode: {postcode} ({len(group)} rows)") + for postcode, group in tqdm( + grouped, total=grouped.ngroups, desc="OS postcodes", unit="postcode" + ): + tqdm.write(f"Processing postcode: {postcode} ({len(group)} rows)") valid_group = AddressMatch.is_valid_postcode(postcode) if not valid_group: logger.warning(f"Postcode {postcode} is invalid, skipping") diff --git a/backend/ordnanceSurvey/tests/test_data.csv b/backend/ordnanceSurvey/tests/test_data.csv new file mode 100644 index 00000000..f5f886d7 --- /dev/null +++ b/backend/ordnanceSurvey/tests/test_data.csv @@ -0,0 +1,2 @@ +User Input,Postcode,Expected UPRN +"115 NORTHWALK, CROYDON, SURREY",CR0 9ES, diff --git a/backend/ordnanceSurvey/tests/test_os_match.py b/backend/ordnanceSurvey/tests/test_os_match.py new file mode 100644 index 00000000..b2d63689 --- /dev/null +++ b/backend/ordnanceSurvey/tests/test_os_match.py @@ -0,0 +1,153 @@ +# backend/ordnanceSurvey/tests/test_os_match.py +""" +Debug harness for Ordnance Survey address matching. + +Mirrors backend/address2UPRN/tests/test_csv.py, but for the OS Places flow: +for each (User Input, Postcode) case it hits the live OS Places API, scores every +candidate address with AddressMatch.score (exactly as backend/ordnanceSurvey/main.py +does), and prints the full ranked breakdown so you can see *why* a match was or +wasn't found. + +Run with -s to see the ranking: + + pytest backend/ordnanceSurvey/tests/test_os_match.py -s + +Requires ORDNANCE_SURVEY_API_KEY to be set (config Settings / env); skipped otherwise. +""" + +import csv +import os +import time +from pathlib import Path +from typing import Any, Optional + +import pytest + +from backend.ordnanceSurvey.helpers import ( + lookup_os_places, + os_places_results_to_dataframe, +) +from backend.utils.addressMatch import AddressMatch + +FIXTURE_PATH = Path(__file__).parent / "test_data.csv" + +# Be polite to the live OS Places API between cases. +OS_THROTTLE_SECONDS = 1.0 + +# Handler treats best_score <= 0 as "no match" (see ordnanceSurvey/main.py). +MATCH_THRESHOLD = 0.0 + + +@pytest.fixture(autouse=True) +def _throttle_os_requests(): + yield + time.sleep(OS_THROTTLE_SECONDS) + + +def _api_key() -> Optional[str]: + # Read straight from the environment so the debug harness doesn't depend on + # the full Settings model loading cleanly. Falls back to Settings if unset. + key = os.getenv("ORDNANCE_SURVEY_API_KEY") + if not key: + try: + from backend.app.config import get_settings + + key = get_settings().ORDNANCE_SURVEY_API_KEY + except Exception: + key = None + if not key or key == "changeme": + return None + return key + + +def load_test_cases(): + with open(FIXTURE_PATH, newline="", encoding="utf-8") as f: + reader = csv.DictReader(f) + return [ + pytest.param( + row["User Input"], + row["Postcode"], + (row.get("Expected UPRN") or "").strip(), + id=f'{row["User Input"]} [{row["Postcode"]}]', + ) + for row in reader + ] + + +def _scored_candidates( + user_input: str, postcode: str, api_key: str +) -> list[dict[str, Any]]: + """ + Fetch OS Places candidates for a postcode (bypassing the DB cache) and score + every candidate ADDRESS exactly as ordnanceSurvey/main.py does. Returned + ranked best-first. + """ + response: dict[str, Any] = lookup_os_places(postcode, api_key) + assert response.get("status") == 200, f"OS Places API failed: {response}" + assert "data" in response, f"No data in OS Places response: {response}" + + candidates = os_places_results_to_dataframe(response["data"]) + records: list[dict[str, Any]] = candidates.to_dict("records") # type: ignore[assignment] + + scored: list[dict[str, Any]] = [] + for rec in records: + address = str(rec.get("ADDRESS", "")) + scored.append( + { + "uprn": rec.get("UPRN", "?"), + "address": address, + "normalised": AddressMatch.normalise_address(address), + "score": AddressMatch.score(user_input, address), + } + ) + scored.sort(key=lambda r: r["score"], reverse=True) + return scored + + +def _print_debug( + user_input: str, postcode: str, scored: list[dict[str, Any]] +) -> None: + print(f"\n{'=' * 80}") + print(f"User input : {user_input!r}") + print(f"Normalised : {AddressMatch.normalise_address(user_input)!r}") + print(f"Postcode : {postcode!r}") + print(f"Candidates : {len(scored)}") + print(f"{'-' * 80}") + if not scored: + print("(no OS Places candidates returned for this postcode)") + return + for row in scored[:15]: + print(f" score={row['score']:.4f} uprn={row['uprn']}") + print(f" ADDRESS : {row['address']}") + print(f" normalised : {row['normalised']}") + print(f"{'=' * 80}") + + +@pytest.mark.integration +@pytest.mark.parametrize("user_input,postcode,expected_uprn", load_test_cases()) +def test_os_match_finds_candidate( + user_input: str, + postcode: str, + expected_uprn: str, +): + api_key = _api_key() + if api_key is None: + pytest.skip("ORDNANCE_SURVEY_API_KEY not set") + + scored = _scored_candidates(user_input, postcode, api_key) + _print_debug(user_input, postcode, scored) + + best = scored[0] if scored else None + best_score = float(best["score"]) if best is not None else 0.0 + + # The handler records a match only when best_score > 0. This assertion is + # the debug signal: when it fails, the printed ranking above shows why. + assert best is not None and best_score > MATCH_THRESHOLD, ( + f"No OS match for {user_input!r} @ {postcode!r} " + f"(best_score={best_score:.4f}). See ranking above." + ) + + if expected_uprn: + assert str(best["uprn"]) == expected_uprn, ( + f"Best match UPRN {best['uprn']!r} != expected {expected_uprn!r}" + ) diff --git a/backend/pashub_fetcher/core_files.py b/backend/pashub_fetcher/core_files.py index e63511eb..c387e0b8 100644 --- a/backend/pashub_fetcher/core_files.py +++ b/backend/pashub_fetcher/core_files.py @@ -17,6 +17,7 @@ class CoreFiles(Enum): IMPROVEMENT_OPTION_EVALUATION = "Improvement Option Evaluation" MEDIUM_TERM_IMPROVEMENT_PLAN = "Medium Term Improvement Plan" RETROFIT_DESIGN_DOC = "Retrofit Design Doc" + MCS_COMPLIANCE_CERTIFICATE = "MCS Compliance Certificate" _CORE_FILE_TO_FILE_TYPE: dict[CoreFiles, str] = { @@ -32,14 +33,21 @@ _CORE_FILE_TO_FILE_TYPE: dict[CoreFiles, str] = { CoreFiles.IMPROVEMENT_OPTION_EVALUATION: FileTypeEnum.IMPROVEMENT_OPTION_EVALUATION.value, CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN: FileTypeEnum.MEDIUM_TERM_IMPROVEMENT_PLAN.value, CoreFiles.RETROFIT_DESIGN_DOC: FileTypeEnum.RETROFIT_DESIGN_DOC.value, + CoreFiles.MCS_COMPLIANCE_CERTIFICATE: FileTypeEnum.MCS_COMPLIANCE_CERTIFICATE.value, } def get_core_file_type( filename: str, evidence_category: Optional[str] = None ) -> Optional[CoreFiles]: - # Identify retrofit design doc using evidence category as the name is possibly unreliable. + # Identify MCS certificate and design doc using evidence category as the names are possibly unreliable. # We might change to always use evidence category, but needs more investigation + if ( + evidence_category is not None + and evidence_category.lower() == "mcs compliance certificate" + ): + return CoreFiles.MCS_COMPLIANCE_CERTIFICATE + if evidence_category is not None and evidence_category.lower() == "retrofit design": return CoreFiles.RETROFIT_DESIGN_DOC @@ -56,6 +64,7 @@ def get_core_file_type( CoreFiles.RETROFIT_DESIGN_DOC, CoreFiles.IMPROVEMENT_OPTION_EVALUATION, CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN, + CoreFiles.MCS_COMPLIANCE_CERTIFICATE, } for core_file in CoreFiles: @@ -68,8 +77,10 @@ def get_core_file_type( return None -def get_file_type_string(filename: str) -> Optional[str]: - core_file: Optional[CoreFiles] = get_core_file_type(filename) +def get_file_type_string( + filename: str, evidence_category: Optional[str] = None +) -> Optional[str]: + core_file: Optional[CoreFiles] = get_core_file_type(filename, evidence_category) if core_file is None: return None diff --git a/backend/pashub_fetcher/evidence_categories.py b/backend/pashub_fetcher/evidence_categories.py new file mode 100644 index 00000000..4e412364 --- /dev/null +++ b/backend/pashub_fetcher/evidence_categories.py @@ -0,0 +1,63 @@ +EVIDENCE_CATEGORIES = [ + "Advice report", + "Air Tests - BGV", + "Air Tightness Strategy", + "Assessment report", + "Blue Site Notes (PAS Assessment)", + "Building Assessment report", + "Building Condition report", + "Building Regulations Sign-off", + "Claim of compliance PAS2030", + "Claim of compliance PAS2035", + "Commissioning checklist", + "Condition report", + "Contract / Invoice", + "Electrical Certificate", + "Energy report", + "Evidence of submission to CPS", + "Floor Plan", + "Full Property Assessment", + "Gas Appliance Benchmarking Certificate", + "Gas Appliance Commissioning Checklist", + "Gas Inspection Certificate", + "Handover and Commissioning Documents", + "Handover Documents", + "Handover documents for client", + "Heat Demand Calculations", + "Heritage Impact Assessment", + "Improvement option evaluation", + "Installation Guides", + "Insurance guarantee", + "Intended outcomes", + "MCS Compliance Certificate", + "Medium term improvement plan", + "Medium term low carbon plan", + "Mid Photo", + "Mid-Install Inspection", + "Minor Works Electrical Certificate", + "Monitoring and evaluation outcomes", + "Occupancy assessment", + "Other", + "Other commissioning certificates", + "Photo", + "Post Energy Performance Report (EPR)", + "Post installation RdSAP", + "Post Photo", + "Pre Energy Performance Report (EPR)", + "Pre installation RdSAP", + "Pre Photo", + "Pre-Design Building Survey", + "Pre-Installation Building Inspection", + "Product Data sheets", + "Product warranty", + "Property Assessment", + "Qualifications", + "Retrofit design", + "Risk assessment", + "Significance survey", + "Site Note (Green /Blue) and Certificate(s)", + "Ventilation Assessment", + "Ventilation Assessment Checklist", + "Ventilation Report", + "Welsh - Checklist", +] diff --git a/backend/pashub_fetcher/handler/handler.py b/backend/pashub_fetcher/handler/handler.py index 626ce59d..00f0ddea 100644 --- a/backend/pashub_fetcher/handler/handler.py +++ b/backend/pashub_fetcher/handler/handler.py @@ -40,10 +40,6 @@ def handler(body: Dict[str, Any], context: Any) -> List[str]: if (not pashub_email) or (not pashub_password): raise ValueError("Pas Hub credentials not provided") - sharepoint_client = DomnaSharepointClient( - sharepoint_location=DomnaSites.SOCIAL_HOUSING_WAVE_3 - ) - if coordination_hub_email and coordination_hub_password: _coord_email, _coord_password = ( coordination_hub_email, @@ -57,6 +53,16 @@ def handler(body: Dict[str, Any], context: Any) -> List[str]: payload = PashubToAraTriggerRequest.model_validate(body) logger.debug("Successfully validated request body") + sharepoint_client: Optional[DomnaSharepointClient] = None + if payload.sharepoint_site is not None: + try: + resolved_site = DomnaSites[payload.sharepoint_site] + sharepoint_client = DomnaSharepointClient(sharepoint_location=resolved_site) + except KeyError: + logger.warning( + f"Unrecognised sharepoint_site '{payload.sharepoint_site}'; skipping SharePoint upload" + ) + service = PashubService( pashub_client=get_pashub_client(pashub_email, pashub_password), sharepoint_client=sharepoint_client, diff --git a/backend/pashub_fetcher/pashub_client.py b/backend/pashub_fetcher/pashub_client.py index 79d81838..e10fbec7 100644 --- a/backend/pashub_fetcher/pashub_client.py +++ b/backend/pashub_fetcher/pashub_client.py @@ -1,6 +1,6 @@ from collections import defaultdict import os -from typing import Dict, List, Optional +from typing import Dict, List, NamedTuple, Optional from datetime import datetime import requests @@ -13,6 +13,22 @@ from utils.logger import setup_logger logger = setup_logger() +class DownloadedFile(NamedTuple): + file_path: str + evidence_category: Optional[str] + created_utc: datetime + + +class _EvidenceFileGroups(NamedTuple): + core: Dict[CoreFiles, EvidenceFileData] + other: List[EvidenceFileData] + + +class DownloadedFiles(NamedTuple): + core: List[DownloadedFile] + other: List[DownloadedFile] + + class UnauthorizedError(Exception): pass @@ -33,42 +49,60 @@ class PashubClient: ) logger.info("Finished initialising CotalityClient") - def get_core_evidence_files_by_job_id(self, job_id: str) -> List[str]: - logger.info(f"Getting Core Evidence Files for job ID {job_id}") + def get_evidence_files_by_job_id( + self, job_id: str, include_other: bool = False + ) -> DownloadedFiles: + logger.info(f"Getting evidence files for job ID {job_id}") evidence_list: List[EvidenceFileData] = self._get_evidence_list(job_id) - logger.info(f"Found {len(evidence_list)} Evidence files to get") + logger.info(f"Found {len(evidence_list)} evidence files") if not evidence_list: - return [] + return DownloadedFiles(core=[], other=[]) - saved_files: List[str] = [] - - core_files: Dict[CoreFiles, EvidenceFileData] = self._select_latest_core_files( + grouped: _EvidenceFileGroups = self._group_into_core_and_other_files( evidence_list ) - logger.info(f"Number of core files to download is {len(core_files)}") - - for _, evidence in core_files.items(): - evidence_id = evidence.file_id - if not evidence_id: + core_files: List[DownloadedFile] = [] + for _, evidence in grouped.core.items(): + if not evidence.file_id: continue - - logger.info(f"Getting metadata for file {evidence.file_name}") metadata: EvidenceMetadata = self._get_evidence_metadata( - job_id, evidence_id + job_id, evidence.file_id ) - download_url: str = self._build_download_url(metadata, evidence.file_id) - output_dir: str = "/tmp" - - file_name: str = evidence.file_name - file_path: str = os.path.join(output_dir, file_name) - + file_path: str = os.path.join("/tmp", evidence.file_name) self._download_file(download_url, file_path) logger.info("Successfully downloaded file") - saved_files.append(file_path) + core_files.append( + DownloadedFile( + file_path=file_path, + evidence_category=evidence.evidence_category, + created_utc=datetime.fromisoformat(evidence.created_utc), + ) + ) - return saved_files + other_files: List[DownloadedFile] = [] + + if include_other: + for evidence in grouped.other: + if not evidence.file_id: + continue + + metadata = self._get_evidence_metadata(job_id, evidence.file_id) + download_url = self._build_download_url(metadata, evidence.file_id) + file_path = os.path.join("/tmp", evidence.file_name) + + self._download_file(download_url, file_path) + logger.info("Successfully downloaded other file") + other_files.append( + DownloadedFile( + file_path=file_path, + evidence_category=evidence.evidence_category, + created_utc=datetime.fromisoformat(evidence.created_utc), + ) + ) + + return DownloadedFiles(core=core_files, other=other_files) def get_uprn_by_job_id(self, job_id: str) -> Optional[str]: logger.info(f"Getting UPRN for job ID {job_id}") @@ -92,30 +126,32 @@ class PashubClient: ) return None - def _select_latest_core_files( + def _group_into_core_and_other_files( self, files: List[EvidenceFileData], - ) -> Dict[CoreFiles, EvidenceFileData]: + ) -> _EvidenceFileGroups: grouped: Dict[CoreFiles, List[EvidenceFileData]] = defaultdict(list) + other: List[EvidenceFileData] = [] for file in files: core_type: Optional[CoreFiles] = get_core_file_type( file.file_name, file.evidence_category ) if not core_type: + other.append(file) continue grouped[core_type].append(file) - latest_files: Dict[CoreFiles, EvidenceFileData] = {} + latest_core_files: Dict[CoreFiles, EvidenceFileData] = {} for core_type, group in grouped.items(): if core_type == CoreFiles.RETROFIT_DESIGN_DOC and len(group) > 1: osm_candidates = [f for f in group if "-OSM-" in f.file_name] group = osm_candidates if osm_candidates else group latest = max(group, key=lambda f: datetime.fromisoformat(f.created_utc)) - latest_files[core_type] = latest + latest_core_files[core_type] = latest - return latest_files + return _EvidenceFileGroups(core=latest_core_files, other=other) def _get_evidence_list(self, job_id: str) -> List[EvidenceFileData]: url = f"{self.base}/jobs/{job_id}/evidence" diff --git a/backend/pashub_fetcher/pashub_service.py b/backend/pashub_fetcher/pashub_service.py index f7f6ccd9..86a553f0 100644 --- a/backend/pashub_fetcher/pashub_service.py +++ b/backend/pashub_fetcher/pashub_service.py @@ -11,11 +11,15 @@ from backend.app.db.models.uploaded_file import ( from backend.documents_parser.db_writer import save_epc_property_data from backend.documents_parser.parser import parse_site_notes_pdf from backend.pashub_fetcher.core_files import get_file_type_string -from backend.pashub_fetcher.pashub_client import PashubClient, UnauthorizedError +from backend.pashub_fetcher.pashub_client import ( + DownloadedFile, + DownloadedFiles, + PashubClient, + UnauthorizedError, +) from backend.pashub_fetcher.pashub_to_ara_trigger_request import ( PashubToAraTriggerRequest, ) -from backend.pashub_fetcher.sharepoint_subfolders import SharepointSubfolders from datatypes.epc.domain.epc_property_data import EpcPropertyData from utils.logger import setup_logger from utils.s3 import upload_file_to_s3 @@ -34,7 +38,7 @@ class PashubService: def __init__( self, pashub_client: PashubClient, - sharepoint_client: DomnaSharepointClient, + sharepoint_client: Optional[DomnaSharepointClient], s3_bucket: str, coordination_client_factory: Optional[Callable[[], PashubClient]] = None, ) -> None: @@ -75,14 +79,16 @@ class PashubService: logger.info(f"No UPRN found for job {job_id}") try: - job_files: List[str] = active_client.get_core_evidence_files_by_job_id( - job_id + downloaded: DownloadedFiles = active_client.get_evidence_files_by_job_id( + job_id, include_other=request.get_other_files ) except UnauthorizedError: if active_client is not self._pashub_client: raise active_client = self._get_coordination_client() - job_files = active_client.get_core_evidence_files_by_job_id(job_id) + downloaded: DownloadedFiles = active_client.get_evidence_files_by_job_id( + job_id, include_other=request.get_other_files + ) if uprn or hubspot_deal_id: logger.info("Uploading files to s3") @@ -92,29 +98,47 @@ class PashubService: else FileSourceEnum.COORDINATION_HUB ) upload_records = self._upload_to_s3_and_update_db( - job_files, uprn, hubspot_deal_id, file_source + downloaded.core, uprn, hubspot_deal_id, file_source ) self._save_site_notes(upload_records) - # SharePoint upload disabled: pashub sharepoint_link is inconsistent - # (points to property or project unpredictably) - # if request.sharepoint_link: - # self._upload_to_sharepoint(request.sharepoint_link, job_files) + if downloaded.other: + self._upload_to_s3_and_update_db( + downloaded.other, + uprn, + hubspot_deal_id, + file_source, + default_file_type=FileTypeEnum.OTHER.value, + ) - for file_path in job_files: + if self._sharepoint_client and request.sharepoint_link and request.address: + folder_name = request.address.split("|")[0].strip() + folders = self._sharepoint_client.get_folders_in_path(request.sharepoint_link) + match = next( + (f["name"] for f in folders.get("value", []) if f["name"].lower() == folder_name.lower()), + None, + ) + if match is None: + logger.warning(f"SharePoint folder not found for '{folder_name}' in {request.sharepoint_link}") + else: + property_folder_path = f"{request.sharepoint_link}/{match}" + self._upload_to_sharepoint(property_folder_path, downloaded.core + downloaded.other) + + for df in downloaded.core + downloaded.other: try: - os.remove(file_path) + os.remove(df.file_path) except OSError: - logger.warning(f"Failed to delete temp file {file_path}") + logger.warning(f"Failed to delete temp file {df.file_path}") - return job_files + return [df.file_path for df in downloaded.core + downloaded.other] def _upload_to_s3_and_update_db( self, - job_files: List[str], + job_files: List[DownloadedFile], uprn: Optional[str], hubspot_deal_id: Optional[str], file_source: FileSourceEnum, + default_file_type: Optional[str] = None, ) -> List[_FileUploadRecord]: if not uprn and not hubspot_deal_id: return [] @@ -128,11 +152,11 @@ class PashubService: file_paths: List[str] = [] uploaded_files: List[UploadedFile] = [] - for file_path in job_files: - filename = os.path.basename(file_path) + for df in job_files: + filename = os.path.basename(df.file_path) file_key = f"{base_path}/{filename}" - upload_file_to_s3(file_path, self._s3_bucket, file_key) + upload_file_to_s3(df.file_path, self._s3_bucket, file_key) uploaded_file = UploadedFile( s3_file_bucket=self._s3_bucket, @@ -141,9 +165,9 @@ class PashubService: uprn=int(uprn) if uprn else None, hubspot_deal_id=hubspot_deal_id, file_source=file_source.value, - file_type=get_file_type_string(filename), + file_type=get_file_type_string(filename, df.evidence_category) or default_file_type, ) - file_paths.append(file_path) + file_paths.append(df.file_path) uploaded_files.append(uploaded_file) with db_session() as session: @@ -180,11 +204,13 @@ class PashubService: def _upload_to_sharepoint( self, - sharepoint_link: str, - job_files: List[str], + property_folder_path: str, + files: List[DownloadedFile], ) -> None: - assessment_path = f"{sharepoint_link}/{SharepointSubfolders.ASSESSMENT.value}" - - for file_path in job_files: - filename = file_path.split("/")[-1] - self._sharepoint_client.upload_file(file_path, assessment_path, filename) + assert self._sharepoint_client is not None + for df in files: + filename = os.path.basename(df.file_path) + try: + self._sharepoint_client.upload_file(df.file_path, property_folder_path, filename) + except Exception: + logger.warning(f"Failed to upload {filename} to SharePoint", exc_info=True) diff --git a/backend/pashub_fetcher/pashub_to_ara_trigger_request.py b/backend/pashub_fetcher/pashub_to_ara_trigger_request.py index 715a09f8..d747a388 100644 --- a/backend/pashub_fetcher/pashub_to_ara_trigger_request.py +++ b/backend/pashub_fetcher/pashub_to_ara_trigger_request.py @@ -14,6 +14,10 @@ class PashubToAraTriggerRequest(BaseModel): hubspot_listing_id: Optional[int] = None hubspot_deal_id: Optional[str] = None + sharepoint_site: Optional[str] = None + + get_other_files: bool = False + @property def pashub_job_id(self) -> str: match = re.search(r"/jobs/([^/]+)", self.pashub_link) diff --git a/backend/pashub_fetcher/tests/test_core_files.py b/backend/pashub_fetcher/tests/test_core_files.py index 3c1d11b8..a2047ece 100644 --- a/backend/pashub_fetcher/tests/test_core_files.py +++ b/backend/pashub_fetcher/tests/test_core_files.py @@ -183,3 +183,44 @@ def test_core_file_for_osm_fallback_does_not_fire_when_evidence_category_present # Assert assert result is None + + +def test_core_file_for_mcs_compliance_certificate_returns_mcs_compliance_certificate() -> None: + # Arrange + filename = "MCS_cert_job123.pdf" + + # Act + result = get_core_file_type( + filename, evidence_category="mcs compliance certificate" + ) + + # Assert + assert result == CoreFiles.MCS_COMPLIANCE_CERTIFICATE + + +def test_core_file_for_mcs_compliance_certificate_is_case_insensitive() -> None: + # Arrange + filename = "some_cert.pdf" + + # Act + result = get_core_file_type( + filename, evidence_category="MCS Compliance Certificate" + ) + + # Assert + assert result == CoreFiles.MCS_COMPLIANCE_CERTIFICATE + + +def test_get_file_type_string_with_mcs_evidence_category_returns_mcs_compliance_certificate() -> ( + None +): + # Arrange + filename = "some_cert.pdf" + + # Act + result = get_file_type_string( + filename, evidence_category="MCS Compliance Certificate" + ) + + # Assert + assert result == "mcs_compliance_certificate" diff --git a/backend/pashub_fetcher/tests/test_pashub_client.py b/backend/pashub_fetcher/tests/test_pashub_client.py index 34260c73..214a14a6 100644 --- a/backend/pashub_fetcher/tests/test_pashub_client.py +++ b/backend/pashub_fetcher/tests/test_pashub_client.py @@ -1,9 +1,22 @@ # pyright: reportPrivateUsage=false from typing import Optional +from unittest.mock import patch from backend.pashub_fetcher.core_files import CoreFiles from backend.pashub_fetcher.evidence_file_data import EvidenceFileData -from backend.pashub_fetcher.pashub_client import PashubClient +from backend.pashub_fetcher.evidence_metadata import EvidenceMetadata +from backend.pashub_fetcher.pashub_client import ( + DownloadedFile, + DownloadedFiles, + PashubClient, +) + + +def make_metadata() -> EvidenceMetadata: + return EvidenceMetadata( + container_name="my-container", + blob_uri="https://storage.example.com/blob?sas=token", + ) def make_client() -> PashubClient: @@ -26,11 +39,27 @@ def make_file( # --------------------------------------------------------------------------- -# _select_latest_core_files +# _group_into_core_and_other_files # --------------------------------------------------------------------------- -def test_select_latest_core_files_returns_single_retrofit_design_doc() -> None: +def test_group_into_core_and_other_files_classifies_core_and_other_correctly() -> None: + # Arrange + client = make_client() + files = [ + make_file(file_name="SiteNote_001.pdf"), + make_file(file_name="some_unknown_document.pdf"), + ] + + # Act + result = client._group_into_core_and_other_files(files) + + # Assert + assert CoreFiles.SITENOTE in result.core + assert [f.file_name for f in result.other] == ["some_unknown_document.pdf"] + + +def test_group_into_core_and_other_files_returns_single_retrofit_design_doc() -> None: # Arrange client = make_client() files = [ @@ -42,13 +71,16 @@ def test_select_latest_core_files_returns_single_retrofit_design_doc() -> None: ] # Act - result = client._select_latest_core_files(files) + result = client._group_into_core_and_other_files(files) # Assert - assert result[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf" + assert ( + result.core[CoreFiles.RETROFIT_DESIGN_DOC].file_name + == "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf" + ) -def test_select_latest_core_files_osm_candidate_wins_over_non_osm() -> None: +def test_group_into_core_and_other_files_osm_candidate_wins_over_non_osm() -> None: # Arrange - the non-OSM file is newer but should lose to the OSM file client = make_client() files = [ @@ -65,13 +97,18 @@ def test_select_latest_core_files_osm_candidate_wins_over_non_osm() -> None: ] # Act - result = client._select_latest_core_files(files) + result = client._group_into_core_and_other_files(files) # Assert - assert result[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf" + assert ( + result.core[CoreFiles.RETROFIT_DESIGN_DOC].file_name + == "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf" + ) -def test_select_latest_core_files_picks_latest_when_both_candidates_have_osm() -> None: +def test_group_into_core_and_other_files_picks_latest_when_both_candidates_have_osm() -> ( + None +): # Arrange client = make_client() files = [ @@ -88,13 +125,62 @@ def test_select_latest_core_files_picks_latest_when_both_candidates_have_osm() - ] # Act - result = client._select_latest_core_files(files) + result = client._group_into_core_and_other_files(files) # Assert - assert result[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "2603-OSM-B06M901-XX-DR-N-A_Alvaston Walk 022.pdf" + assert ( + result.core[CoreFiles.RETROFIT_DESIGN_DOC].file_name + == "2603-OSM-B06M901-XX-DR-N-A_Alvaston Walk 022.pdf" + ) -def test_select_latest_core_files_falls_back_to_latest_when_no_osm_candidates() -> None: +def test_group_into_core_and_other_files_classifies_mcs_cert_as_core() -> None: + # Arrange + client = make_client() + files = [ + make_file( + file_name="MCS_cert_job123.pdf", + evidence_category="MCS Compliance Certificate", + ), + ] + + # Act + result = client._group_into_core_and_other_files(files) + + # Assert + assert CoreFiles.MCS_COMPLIANCE_CERTIFICATE in result.core + assert result.other == [] + + +def test_group_into_core_and_other_files_picks_most_recent_mcs_cert() -> None: + # Arrange + client = make_client() + files = [ + make_file( + file_name="mcs_cert_old.pdf", + evidence_category="MCS Compliance Certificate", + created_utc="2024-01-01T00:00:00", + ), + make_file( + file_name="mcs_cert_new.pdf", + evidence_category="MCS Compliance Certificate", + created_utc="2024-06-01T00:00:00", + ), + ] + + # Act + result = client._group_into_core_and_other_files(files) + + # Assert + assert ( + result.core[CoreFiles.MCS_COMPLIANCE_CERTIFICATE].file_name + == "mcs_cert_new.pdf" + ) + + +def test_group_into_core_and_other_files_falls_back_to_latest_when_no_osm_candidates() -> ( + None +): # Arrange client = make_client() files = [ @@ -111,7 +197,84 @@ def test_select_latest_core_files_falls_back_to_latest_when_no_osm_candidates() ] # Act - result = client._select_latest_core_files(files) + result = client._group_into_core_and_other_files(files) # Assert - assert result[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "retrofit_design_v2.pdf" + assert ( + result.core[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "retrofit_design_v2.pdf" + ) + + +# --------------------------------------------------------------------------- +# get_evidence_files_by_job_id +# --------------------------------------------------------------------------- + + +def test_get_evidence_files_by_job_id_returns_downloaded_files_with_empty_other_when_include_other_false() -> ( + None +): + # Arrange + client = make_client() + files = [ + make_file(file_name="SiteNote_001.pdf"), + make_file(file_name="unknown_doc.pdf"), + ] + + # Act + with ( + patch.object(client, "_get_evidence_list", return_value=files), + patch.object(client, "_get_evidence_metadata", return_value=make_metadata()), + patch.object(client, "_download_file"), + ): + result = client.get_evidence_files_by_job_id("job-1", include_other=False) + + # Assert + assert isinstance(result, DownloadedFiles) + assert [df.file_path for df in result.core] == ["/tmp/SiteNote_001.pdf"] + assert result.other == [] + + +def test_get_evidence_files_by_job_id_core_files_carry_evidence_category() -> None: + # Arrange + client = make_client() + files = [ + make_file( + file_name="MCS_cert.pdf", + evidence_category="MCS Compliance Certificate", + ), + ] + + # Act + with ( + patch.object(client, "_get_evidence_list", return_value=files), + patch.object(client, "_get_evidence_metadata", return_value=make_metadata()), + patch.object(client, "_download_file"), + ): + result = client.get_evidence_files_by_job_id("job-1", include_other=False) + + # Assert + assert len(result.core) == 1 + assert result.core[0].evidence_category == "MCS Compliance Certificate" + + +def test_get_evidence_files_by_job_id_downloads_other_files_when_include_other_true() -> ( + None +): + # Arrange + client = make_client() + files = [ + make_file(file_name="SiteNote_001.pdf"), + make_file(file_name="unknown_doc.pdf"), + ] + + # Act + with ( + patch.object(client, "_get_evidence_list", return_value=files), + patch.object(client, "_get_evidence_metadata", return_value=make_metadata()), + patch.object(client, "_download_file"), + ): + result = client.get_evidence_files_by_job_id("job-1", include_other=True) + + # Assert + assert [df.file_path for df in result.core] == ["/tmp/SiteNote_001.pdf"] + assert [df.file_path for df in result.other] == ["/tmp/unknown_doc.pdf"] diff --git a/backend/pashub_fetcher/tests/test_pashub_service.py b/backend/pashub_fetcher/tests/test_pashub_service.py index 1f750117..ccb80ac4 100644 --- a/backend/pashub_fetcher/tests/test_pashub_service.py +++ b/backend/pashub_fetcher/tests/test_pashub_service.py @@ -1,17 +1,22 @@ import pytest +from datetime import datetime from typing import Any, Callable, Optional from unittest.mock import MagicMock, call, patch -from backend.app.db.models.uploaded_file import FileSourceEnum -from backend.pashub_fetcher.pashub_client import PashubClient, UnauthorizedError +from backend.app.db.models.uploaded_file import FileSourceEnum, FileTypeEnum +from backend.pashub_fetcher.pashub_client import ( + DownloadedFile, + DownloadedFiles, + PashubClient, + UnauthorizedError, +) from backend.pashub_fetcher.pashub_service import PashubService from backend.pashub_fetcher.pashub_to_ara_trigger_request import ( PashubToAraTriggerRequest, ) from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient - FAKE_JOB_LINK = "https://pashub.net/jobs/job-id-123/details" @@ -20,12 +25,16 @@ def make_request( uprn: Optional[str] = None, hubspot_deal_id: Optional[str] = None, sharepoint_link: Optional[str] = None, + get_other_files: bool = False, + address: Optional[str] = None, ) -> PashubToAraTriggerRequest: return PashubToAraTriggerRequest( pashub_link=pashub_link, uprn=uprn, hubspot_deal_id=hubspot_deal_id, sharepoint_link=sharepoint_link, + get_other_files=get_other_files, + address=address, ) @@ -43,6 +52,16 @@ def make_service( ) +_DEFAULT_UTC = datetime(2024, 1, 1) + + +def make_downloaded(core: list[str], other: list[str] = []) -> DownloadedFiles: + return DownloadedFiles( + core=[DownloadedFile(fp, None, _DEFAULT_UTC) for fp in core], + other=[DownloadedFile(fp, None, _DEFAULT_UTC) for fp in other], + ) + + # --------------------------------------------------------------------------- # run(): returns file paths # --------------------------------------------------------------------------- @@ -51,10 +70,9 @@ def make_service( def test_run_returns_file_paths() -> None: mock_client = MagicMock(spec=PashubClient) mock_client.get_uprn_by_job_id.return_value = None - mock_client.get_core_evidence_files_by_job_id.return_value = [ - "/tmp/a.pdf", - "/tmp/b.pdf", - ] + mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=["/tmp/a.pdf", "/tmp/b.pdf"] + ) service = make_service(pashub_client=mock_client) @@ -64,6 +82,30 @@ def test_run_returns_file_paths() -> None: assert result == ["/tmp/a.pdf", "/tmp/b.pdf"] +# --------------------------------------------------------------------------- +# run(): returns core + other file paths when get_other_files=True +# --------------------------------------------------------------------------- + + +def test_run_returns_core_and_other_file_paths() -> None: + # Arrange + mock_client = MagicMock(spec=PashubClient) + mock_client.get_uprn_by_job_id.return_value = None + mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=["/tmp/core.pdf"], + other=["/tmp/other.pdf"], + ) + + service = make_service(pashub_client=mock_client) + + # Act + with patch("backend.pashub_fetcher.pashub_service.os.remove"): + result = service.run(make_request(get_other_files=True)) + + # Assert + assert result == ["/tmp/core.pdf", "/tmp/other.pdf"] + + # --------------------------------------------------------------------------- # run(): skips upload when neither uprn nor hubspot_deal_id # --------------------------------------------------------------------------- @@ -72,7 +114,9 @@ def test_run_returns_file_paths() -> None: def test_run_skips_upload_when_no_uprn_and_no_deal_id() -> None: mock_client = MagicMock(spec=PashubClient) mock_client.get_uprn_by_job_id.return_value = None - mock_client.get_core_evidence_files_by_job_id.return_value = ["/tmp/a.pdf"] + mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=["/tmp/a.pdf"] + ) service = make_service(pashub_client=mock_client) @@ -93,10 +137,9 @@ def test_run_skips_upload_when_no_uprn_and_no_deal_id() -> None: def test_run_uploads_files_to_s3_using_uprn_path() -> None: mock_client = MagicMock(spec=PashubClient) mock_client.get_uprn_by_job_id.return_value = None - mock_client.get_core_evidence_files_by_job_id.return_value = [ - "/tmp/SiteNote_001.pdf", - "/tmp/Photopack_002.pdf", - ] + mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=["/tmp/SiteNote_001.pdf", "/tmp/Photopack_002.pdf"] + ) service = make_service(pashub_client=mock_client, s3_bucket="my-bucket") @@ -132,9 +175,9 @@ def test_run_uploads_files_to_s3_using_uprn_path() -> None: def test_run_persists_uploaded_file_records_to_db() -> None: mock_client = MagicMock(spec=PashubClient) mock_client.get_uprn_by_job_id.return_value = None - mock_client.get_core_evidence_files_by_job_id.return_value = [ - "/tmp/SiteNote_001.pdf" - ] + mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=["/tmp/SiteNote_001.pdf"] + ) fake_session = MagicMock() service = make_service(pashub_client=mock_client) @@ -163,9 +206,9 @@ def test_run_persists_uploaded_file_records_to_db() -> None: def test_run_uses_hubspot_deal_id_path_when_no_uprn() -> None: mock_client = MagicMock(spec=PashubClient) mock_client.get_uprn_by_job_id.return_value = None - mock_client.get_core_evidence_files_by_job_id.return_value = [ - "/tmp/SiteNote_001.pdf" - ] + mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=["/tmp/SiteNote_001.pdf"] + ) service = make_service(pashub_client=mock_client, s3_bucket="my-bucket") @@ -191,9 +234,9 @@ def test_run_uses_hubspot_deal_id_path_when_no_uprn() -> None: def test_run_parses_and_saves_site_notes_for_rd_sap_site_note_file() -> None: mock_client = MagicMock(spec=PashubClient) mock_client.get_uprn_by_job_id.return_value = None - mock_client.get_core_evidence_files_by_job_id.return_value = [ - "/tmp/RdSAP_SiteNote_001.pdf" - ] + mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=["/tmp/RdSAP_SiteNote_001.pdf"] + ) fake_epc_data = MagicMock() fake_session = MagicMock() @@ -241,11 +284,15 @@ def test_run_uses_coordination_client_when_pas_401_on_uprn_lookup() -> None: coord_client = MagicMock(spec=PashubClient) coord_client.get_uprn_by_job_id.return_value = "99999" - coord_client.get_core_evidence_files_by_job_id.return_value = ["/tmp/a.pdf"] + coord_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=["/tmp/a.pdf"] + ) factory = MagicMock(return_value=coord_client) - service = make_service(pashub_client=pas_client, coordination_client_factory=factory) + service = make_service( + pashub_client=pas_client, coordination_client_factory=factory + ) with ( patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3"), @@ -256,20 +303,24 @@ def test_run_uses_coordination_client_when_pas_401_on_uprn_lookup() -> None: assert result == ["/tmp/a.pdf"] coord_client.get_uprn_by_job_id.assert_called_once() - coord_client.get_core_evidence_files_by_job_id.assert_called_once() + coord_client.get_evidence_files_by_job_id.assert_called_once() assert factory.call_count == 1 def test_run_uses_coordination_client_when_pas_401_on_file_listing() -> None: pas_client = MagicMock(spec=PashubClient) - pas_client.get_core_evidence_files_by_job_id.side_effect = UnauthorizedError() + pas_client.get_evidence_files_by_job_id.side_effect = UnauthorizedError() coord_client = MagicMock(spec=PashubClient) - coord_client.get_core_evidence_files_by_job_id.return_value = ["/tmp/a.pdf"] + coord_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=["/tmp/a.pdf"] + ) factory = MagicMock(return_value=coord_client) - service = make_service(pashub_client=pas_client, coordination_client_factory=factory) + service = make_service( + pashub_client=pas_client, coordination_client_factory=factory + ) with ( patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3"), @@ -279,7 +330,7 @@ def test_run_uses_coordination_client_when_pas_401_on_file_listing() -> None: result = service.run(make_request(uprn="12345")) assert result == ["/tmp/a.pdf"] - coord_client.get_core_evidence_files_by_job_id.assert_called_once() + coord_client.get_evidence_files_by_job_id.assert_called_once() pas_client.get_uprn_by_job_id.assert_not_called() @@ -302,24 +353,32 @@ def test_run_raises_unauthorized_when_both_clients_401() -> None: factory = MagicMock(return_value=coord_client) - service = make_service(pashub_client=pas_client, coordination_client_factory=factory) + service = make_service( + pashub_client=pas_client, coordination_client_factory=factory + ) with pytest.raises(UnauthorizedError): service.run(make_request()) -def test_run_persists_coordination_hub_file_source_when_pas_401_on_uprn_lookup() -> None: +def test_run_persists_coordination_hub_file_source_when_pas_401_on_uprn_lookup() -> ( + None +): pas_client = MagicMock(spec=PashubClient) pas_client.get_uprn_by_job_id.side_effect = UnauthorizedError() coord_client = MagicMock(spec=PashubClient) coord_client.get_uprn_by_job_id.return_value = "99999" - coord_client.get_core_evidence_files_by_job_id.return_value = ["/tmp/a.pdf"] + coord_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=["/tmp/a.pdf"] + ) factory = MagicMock(return_value=coord_client) fake_session = MagicMock() - service = make_service(pashub_client=pas_client, coordination_client_factory=factory) + service = make_service( + pashub_client=pas_client, coordination_client_factory=factory + ) with ( patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3"), @@ -334,17 +393,23 @@ def test_run_persists_coordination_hub_file_source_when_pas_401_on_uprn_lookup() assert added[0].file_source == FileSourceEnum.COORDINATION_HUB.value -def test_run_persists_coordination_hub_file_source_when_pas_401_on_file_listing() -> None: +def test_run_persists_coordination_hub_file_source_when_pas_401_on_file_listing() -> ( + None +): pas_client = MagicMock(spec=PashubClient) - pas_client.get_core_evidence_files_by_job_id.side_effect = UnauthorizedError() + pas_client.get_evidence_files_by_job_id.side_effect = UnauthorizedError() coord_client = MagicMock(spec=PashubClient) - coord_client.get_core_evidence_files_by_job_id.return_value = ["/tmp/a.pdf"] + coord_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=["/tmp/a.pdf"] + ) factory = MagicMock(return_value=coord_client) fake_session = MagicMock() - service = make_service(pashub_client=pas_client, coordination_client_factory=factory) + service = make_service( + pashub_client=pas_client, coordination_client_factory=factory + ) with ( patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3"), @@ -359,12 +424,231 @@ def test_run_persists_coordination_hub_file_source_when_pas_401_on_file_listing( assert added[0].file_source == FileSourceEnum.COORDINATION_HUB.value +# --------------------------------------------------------------------------- +# run(): get_other_files=True → other temp files deleted after run +# --------------------------------------------------------------------------- + + +def test_run_deletes_other_temp_files_when_get_other_files_true() -> None: + # Arrange + mock_client = MagicMock(spec=PashubClient) + mock_client.get_uprn_by_job_id.return_value = None + mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=["/tmp/core.pdf"], + other=["/tmp/other.pdf"], + ) + + service = make_service(pashub_client=mock_client) + + # Act + with patch("backend.pashub_fetcher.pashub_service.os.remove") as mock_remove: + service.run(make_request(get_other_files=True)) + + # Assert + mock_remove.assert_any_call("/tmp/core.pdf") + mock_remove.assert_any_call("/tmp/other.pdf") + + +# --------------------------------------------------------------------------- +# run(): get_other_files=True → other files uploaded to S3 +# --------------------------------------------------------------------------- + + +def test_run_uploads_other_files_to_s3_when_get_other_files_true() -> None: + # Arrange + mock_client = MagicMock(spec=PashubClient) + mock_client.get_uprn_by_job_id.return_value = None + mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=["/tmp/SiteNote_001.pdf"], + other=["/tmp/unknown_file.pdf"], + ) + + service = make_service(pashub_client=mock_client, s3_bucket="my-bucket") + + # Act + with ( + patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3") as mock_s3, + patch("backend.pashub_fetcher.pashub_service.db_session"), + patch("backend.pashub_fetcher.pashub_service.os.remove"), + ): + service.run(make_request(uprn="12345", get_other_files=True)) + + # Assert + mock_s3.assert_any_call( + "/tmp/unknown_file.pdf", + "my-bucket", + "documents/uprn/12345/unknown_file.pdf", + ) + + +# --------------------------------------------------------------------------- +# run(): get_other_files=True → other files persisted with file_type OTHER +# --------------------------------------------------------------------------- + + +def test_run_persists_other_files_with_other_file_type() -> None: + # Arrange + mock_client = MagicMock(spec=PashubClient) + mock_client.get_uprn_by_job_id.return_value = None + mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=[], + other=["/tmp/unknown_file.pdf"], + ) + + fake_session = MagicMock() + service = make_service(pashub_client=mock_client) + + # Act + with ( + patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3"), + patch("backend.pashub_fetcher.pashub_service.db_session") as mock_db, + patch("backend.pashub_fetcher.pashub_service.os.remove"), + ): + mock_db.return_value.__enter__.return_value = fake_session + service.run(make_request(uprn="12345", get_other_files=True)) + + # Assert + all_added = [item for c in fake_session.add_all.call_args_list for item in c[0][0]] + assert len(all_added) == 1 + assert all_added[0].file_type == FileTypeEnum.OTHER.value + + +def test_run_persists_mcs_cert_with_mcs_compliance_certificate_file_type() -> None: + # Arrange + mock_client = MagicMock(spec=PashubClient) + mock_client.get_uprn_by_job_id.return_value = None + mock_client.get_evidence_files_by_job_id.return_value = DownloadedFiles( + core=[ + DownloadedFile( + "/tmp/MCS_cert.pdf", "MCS Compliance Certificate", datetime(2024, 1, 1) + ) + ], + other=[], + ) + + fake_session = MagicMock() + service = make_service(pashub_client=mock_client) + + # Act + with ( + patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3"), + patch("backend.pashub_fetcher.pashub_service.db_session") as mock_db, + patch("backend.pashub_fetcher.pashub_service.os.remove"), + ): + mock_db.return_value.__enter__.return_value = fake_session + service.run(make_request(uprn="12345")) + + # Assert + fake_session.add_all.assert_called_once() + added: list[Any] = fake_session.add_all.call_args[0][0] + assert added[0].file_type == FileTypeEnum.MCS_COMPLIANCE_CERTIFICATE.value + + +# --------------------------------------------------------------------------- +# run(): SharePoint upload +# --------------------------------------------------------------------------- + + +def test_sharepoint_uploads_all_files_to_property_folder() -> None: + # Arrange + mock_client = MagicMock(spec=PashubClient) + mock_client.get_uprn_by_job_id.return_value = None + mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=["/tmp/core.pdf"], + other=["/tmp/other.pdf"], + ) + + mock_sharepoint = MagicMock(spec=DomnaSharepointClient) + mock_sharepoint.get_folders_in_path.return_value = { + "value": [{"name": "123 Main St"}] + } + + service = make_service(pashub_client=mock_client, sharepoint_client=mock_sharepoint) + + # Act + with patch("backend.pashub_fetcher.pashub_service.os.remove"): + service.run( + make_request( + sharepoint_link="Retrofit/Properties", + get_other_files=True, + address="123 Main St | some deal", + ) + ) + + # Assert + mock_sharepoint.upload_file.assert_any_call( + "/tmp/core.pdf", "Retrofit/Properties/123 Main St", "core.pdf" + ) + mock_sharepoint.upload_file.assert_any_call( + "/tmp/other.pdf", "Retrofit/Properties/123 Main St", "other.pdf" + ) + + +def test_sharepoint_skips_upload_when_folder_not_found() -> None: + # Arrange + mock_client = MagicMock(spec=PashubClient) + mock_client.get_uprn_by_job_id.return_value = None + mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=["/tmp/core.pdf"] + ) + + mock_sharepoint = MagicMock(spec=DomnaSharepointClient) + mock_sharepoint.get_folders_in_path.return_value = { + "value": [{"name": "Different Property"}] + } + + service = make_service(pashub_client=mock_client, sharepoint_client=mock_sharepoint) + + # Act + with ( + patch("backend.pashub_fetcher.pashub_service.os.remove"), + patch("backend.pashub_fetcher.pashub_service.logger") as mock_logger, + ): + service.run( + make_request( + sharepoint_link="Retrofit/Properties", + address="No Such Property | deal", + ) + ) + + # Assert + mock_sharepoint.upload_file.assert_not_called() + mock_logger.warning.assert_called() + + +def test_sharepoint_skips_upload_when_sharepoint_client_is_none() -> None: + # Arrange + mock_client = MagicMock(spec=PashubClient) + mock_client.get_uprn_by_job_id.return_value = None + mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=["/tmp/core.pdf"] + ) + + service = PashubService( + pashub_client=mock_client, + sharepoint_client=None, + s3_bucket="test-bucket", + ) + + # Act — should not raise AttributeError on None._sharepoint_client + with patch("backend.pashub_fetcher.pashub_service.os.remove"): + result = service.run( + make_request( + sharepoint_link="Retrofit/Properties", + address="123 Main St | deal", + ) + ) + + # Assert + assert result == ["/tmp/core.pdf"] + + def test_run_warns_and_continues_when_site_notes_parsing_fails() -> None: mock_client = MagicMock(spec=PashubClient) mock_client.get_uprn_by_job_id.return_value = None - mock_client.get_core_evidence_files_by_job_id.return_value = [ - "/tmp/RdSAP_SiteNote_001.pdf" - ] + mock_client.get_evidence_files_by_job_id.return_value = make_downloaded( + core=["/tmp/RdSAP_SiteNote_001.pdf"] + ) service = make_service(pashub_client=mock_client) diff --git a/backend/pashub_fetcher/tests/test_token_getter.py b/backend/pashub_fetcher/tests/test_token_getter.py new file mode 100644 index 00000000..2c847d16 --- /dev/null +++ b/backend/pashub_fetcher/tests/test_token_getter.py @@ -0,0 +1,50 @@ +from unittest.mock import MagicMock, patch + +from backend.pashub_fetcher.token_getter import get_token_from_local_storage + + +def _configure_playwright_mock(mock_sync_playwright: MagicMock) -> None: + mock_page = MagicMock() + mock_page.url = "https://pashub.net/dashboard" + mock_page.evaluate.return_value = "fake-token" + + mock_context = MagicMock() + mock_context.new_page.return_value = mock_page + + mock_browser = MagicMock() + mock_browser.new_context.return_value = mock_context + + mock_p = MagicMock() + mock_p.chromium.launch.return_value = mock_browser + + mock_sync_playwright.return_value.__enter__.return_value = mock_p + + +@patch("backend.pashub_fetcher.token_getter.shutil.rmtree") +@patch("backend.pashub_fetcher.token_getter.glob.glob") +@patch("backend.pashub_fetcher.token_getter.sync_playwright") +def test_playwright_tmp_dirs_are_cleaned_up_after_browser_close( + mock_sync_playwright: MagicMock, + mock_glob: MagicMock, + mock_rmtree: MagicMock, +) -> None: + # Arrange + fake_artifacts = ["/tmp/playwright-artifacts-abc12"] + fake_profiles = ["/tmp/playwright_chromiumdev_profile-xyz99"] + + def glob_side_effect(pattern: str) -> list[str]: + if "playwright-artifacts-*" in pattern: + return fake_artifacts + if "playwright_chromiumdev_profile-*" in pattern: + return fake_profiles + return [] + + mock_glob.side_effect = glob_side_effect + _configure_playwright_mock(mock_sync_playwright) + + # Act + get_token_from_local_storage("user@example.com", "secret") + + # Assert + mock_rmtree.assert_any_call("/tmp/playwright-artifacts-abc12", ignore_errors=True) + mock_rmtree.assert_any_call("/tmp/playwright_chromiumdev_profile-xyz99", ignore_errors=True) diff --git a/backend/pashub_fetcher/token_getter.py b/backend/pashub_fetcher/token_getter.py index 2e2d1440..57338a6d 100644 --- a/backend/pashub_fetcher/token_getter.py +++ b/backend/pashub_fetcher/token_getter.py @@ -1,4 +1,6 @@ +import glob import os +import shutil from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeoutError @@ -90,5 +92,12 @@ def get_token_from_local_storage( context.close() browser.close() + for pattern in ( + "/tmp/playwright-artifacts-*", + "/tmp/playwright_chromiumdev_profile-*", + ): + for path in glob.glob(pattern): + shutil.rmtree(path, ignore_errors=True) + if record_video and video_dir: logger.info(f"Video(s) saved in: {video_dir}") diff --git a/backend/pashub_fetcher/trigger_lambda_from_file.py b/backend/pashub_fetcher/trigger_local_lambda_from_file.py similarity index 82% rename from backend/pashub_fetcher/trigger_lambda_from_file.py rename to backend/pashub_fetcher/trigger_local_lambda_from_file.py index fb9d1cbf..ad3097da 100644 --- a/backend/pashub_fetcher/trigger_lambda_from_file.py +++ b/backend/pashub_fetcher/trigger_local_lambda_from_file.py @@ -10,19 +10,19 @@ from backend.pashub_fetcher.pashub_to_ara_trigger_request import ( ) from backend.pashub_fetcher.handler.handler import handler - if __name__ == "__main__": BASE_DIR = os.path.dirname(os.path.dirname(__file__)) filepath: str = os.path.join( BASE_DIR, "pashub_fetcher", - "The_Guinness_Partnership_AtkinsR_alis_Coordination_Design_Board_1774881298.xlsx", + "local_run_02-06-2026", + "ECO_Approach_Coordination_Design_KN.xlsx", ) wb = load_workbook(filepath, data_only=True) - ws = wb["filtered_2"] + ws = wb["filtered"] - HEADER_ROW = 3 + HEADER_ROW = 1 headers: Dict[str, int] = {} for col in range(1, ws.max_column + 1): @@ -31,7 +31,7 @@ if __name__ == "__main__": headers[value.strip()] = col name_col = headers["Name"] - link_col = headers["PasHub Link"] + link_col = headers["PasHub ID"] hubspot_deal_id_col = headers["HubSpot ID"] trigger_requests: List[PashubToAraTriggerRequest] = [] @@ -50,7 +50,10 @@ if __name__ == "__main__": trigger_requests.append( PashubToAraTriggerRequest( - pashub_link=str(link), hubspot_deal_id=str(hubspot_deal_id) + pashub_link=str(link), + hubspot_deal_id=str(hubspot_deal_id), + address=str(name), + get_other_files=True, ) ) diff --git a/backend/pashub_fetcher/trigger_pashub_sqs_from_file.py b/backend/pashub_fetcher/trigger_pashub_sqs_from_file.py index f4c03afc..b647c6a7 100644 --- a/backend/pashub_fetcher/trigger_pashub_sqs_from_file.py +++ b/backend/pashub_fetcher/trigger_pashub_sqs_from_file.py @@ -16,40 +16,48 @@ logger: logging.Logger = logging.getLogger(__name__) DRY_RUN: bool = False -DEAL_ID_FILTER: frozenset[str] = frozenset( - { - "379452094688", - "379466504437", - "379660170452", - "380016925932", - "379848065216", - "379466504434", - "379452094690", - "379965924567", - "380016925923", - "379792072898", - "379654754502", - "379560262861", - "379969670369", - "379248717001", - "379971468493", - "379999888607", - "379606372580", - "379969603797", - "379967743213", - "379263155434", - "379855267025", - "379889899719", - "379071064307", - "379867925741", - } -) +# DEAL_ID_FILTER: frozenset[str] = frozenset( +# { +# "379452094688", +# "379466504437", +# "379660170452", +# "380016925932", +# "379848065216", +# "379466504434", +# "379452094690", +# "379965924567", +# "380016925923", +# "379792072898", +# "379654754502", +# "379560262861", +# "379969670369", +# "379248717001", +# "379971468493", +# "379999888607", +# "379606372580", +# "379969603797", +# "379967743213", +# "379263155434", +# "379855267025", +# "379889899719", +# "379071064307", +# "379867925741", +# } +# ) + +DEAL_ID_FILTER = None EXCEL_PATH: str = os.path.join( os.path.dirname(__file__), - "united-infrastructure-exports-all-deals-2026-05-14.xlsx", + "local_run_02-06-2026/ECO_Approach_Coordination_Design_KN.xlsx", ) +SHAREPOINT_PROPERTIES_FOLDER: str = ( + "Housing Associations/- Client Shared Folders/Abri/Abri Property Folders (Full PAS Info)" +) + +SHAREPOINT_SITE: str = "ECO" + def _build_requests(excel_path: str) -> list[PashubToAraTriggerRequest]: wb = load_workbook(excel_path, data_only=True) @@ -61,10 +69,10 @@ def _build_requests(excel_path: str) -> list[PashubToAraTriggerRequest]: if header_val is not None: headers[str(header_val).strip()] = col - pashub_col: int = headers["PasHub link"] - record_id_col: int = headers["Record ID"] - deal_name_col: int = headers["Deal Name"] - deal_stage_col: int = headers["Deal Stage"] + pashub_col: int = headers["PasHub ID"] + record_id_col: int = headers["HubSpot ID"] + deal_name_col: int = headers["Name"] + deal_stage_col: Optional[int] = headers.get("Deal Stage", None) requests: list[PashubToAraTriggerRequest] = [] @@ -77,7 +85,9 @@ def _build_requests(excel_path: str) -> list[PashubToAraTriggerRequest]: record_id_raw = ws.cell(row=row, column=record_id_col).value deal_name_raw = ws.cell(row=row, column=deal_name_col).value - deal_stage_raw = ws.cell(row=row, column=deal_stage_col).value + deal_stage_raw = ( + ws.cell(row=row, column=deal_stage_col).value if deal_stage_col else None + ) hubspot_deal_id: Optional[str] = ( str(record_id_raw) if record_id_raw is not None else None @@ -95,6 +105,9 @@ def _build_requests(excel_path: str) -> list[PashubToAraTriggerRequest]: hubspot_deal_id=hubspot_deal_id, address=address, deal_stage=deal_stage, + sharepoint_link=SHAREPOINT_PROPERTIES_FOLDER or None, + sharepoint_site=SHAREPOINT_SITE, + get_other_files=True, ) ) @@ -116,7 +129,7 @@ def main() -> None: for request in trigger_requests: action: str = "DRY RUN" if DRY_RUN else "SENDING" logger.info( - f"[{action}] deal_id={request.hubspot_deal_id} pashub_link={request.pashub_link}" + f"[{action}] deal_id={request.hubspot_deal_id} pashub_link={request.pashub_link} sharepoint_link={request.sharepoint_link}" ) if not DRY_RUN: diff --git a/datatypes/epc/domain/epc_property_data.py b/datatypes/epc/domain/epc_property_data.py index 150108fc..4d158c7e 100644 --- a/datatypes/epc/domain/epc_property_data.py +++ b/datatypes/epc/domain/epc_property_data.py @@ -324,6 +324,11 @@ class SapEnergySource: photovoltaic_arrays: Optional[List[PhotovoltaicArray]] = None wind_turbine_details: Optional[WindTurbineDetails] = None pv_batteries: Optional[PvBatteries] = None + # SAP 10.2 Appendix G4 — a PV diverter present on the dwelling routes + # surplus PV to a hot-water cylinder immersion. Drives worksheet + # (63b)m. Set from the API `sap_energy_source.pv_diverter` flag or the + # Elmhurst Summary §19 "Diverter present" row. + pv_diverter_present: bool = False @dataclass @@ -754,3 +759,25 @@ class EpcPropertyData: solar_hw_collector_orientation: Optional[str] = None solar_hw_collector_pitch_deg: Optional[int] = None solar_hw_overshading: Optional[str] = None + + @property + def system_build(self) -> Optional[bool]: + """Whether the dwelling's MAIN wall is system-built. + + System-built is a WALL TYPE: RdSAP10 `WALL_SYSTEM_BUILT == 6` on + the main wall (the U-value cascade table is keyed on that code). + It happens to share the integer with basement walls — so a code-6 + main wall is system-built only when it is NOT flagged as a + basement (`main_wall_is_basement`, the dedicated basement signal + the mapper sets from the distinct "SY"/"B" labels or the cert + addendum). Reading the wall type keeps the two concerns separate: + `wall_construction` carries the construction, the basement flag + carries the below-grade attribute. Returns None when there is no + MAIN building part (unknown).""" + for part in self.sap_building_parts: + if part.identifier is BuildingPartIdentifier.MAIN: + return ( + part.wall_construction == BASEMENT_WALL_CONSTRUCTION_CODE + and not part.main_wall_is_basement + ) + return None diff --git a/datatypes/epc/domain/mapper.py b/datatypes/epc/domain/mapper.py index ee551080..83a0a9eb 100644 --- a/datatypes/epc/domain/mapper.py +++ b/datatypes/epc/domain/mapper.py @@ -1,10 +1,12 @@ import re +from dataclasses import replace from datetime import date from decimal import ROUND_HALF_UP, Decimal from typing import Any, Dict, Final, List, Optional, Sequence, Union, cast from datatypes.epc.schema.helpers import from_dict from datatypes.epc.domain.epc_property_data import ( + BASEMENT_WALL_CONSTRUCTION_CODE, Addendum, BuildingPartIdentifier, EnergyElement, @@ -341,6 +343,7 @@ class EpcPropertyDataMapper: wind_turbines_terrain_type=survey.renewables.wind_turbines_terrain_type, electricity_smart_meter_present=survey.meters.electricity_smart_meter, photovoltaic_arrays=_elmhurst_pv_arrays(survey.renewables), + pv_diverter_present=survey.renewables.pv_diverter_present, # RdSAP 10 §11.1 b): when the cert lodges only a "% of # roof area" PV figure (no detailed kWp / orientation), # surface it through `photovoltaic_supply` so the @@ -1391,6 +1394,7 @@ class EpcPropertyDataMapper: else None ), pv_batteries=_first_pv_battery(es.pv_batteries), + pv_diverter_present=es.pv_diverter == "true", ), sap_building_parts=[ SapBuildingPart( @@ -1630,11 +1634,17 @@ class EpcPropertyDataMapper: for w in schema.sap_windows if not _api_is_roof_window(w) ], + # Empty → None (not []) so "no roof windows" has the single + # canonical representation the domain field defaults to + # (`Optional[List] = None`), matching the 21.0.0 path and the + # persistence round-trip (roof windows aren't yet stored — doc + # §2.4 — so a reloaded cert always reads None here). sap_roof_windows=[ _api_sap_roof_window(w) for w in schema.sap_windows if _api_is_roof_window(w) - ], + ] + or None, # SAP energy source sap_energy_source=SapEnergySource( mains_gas=es.mains_gas == "Y", @@ -1658,6 +1668,7 @@ class EpcPropertyDataMapper: else None ), pv_batteries=_first_pv_battery(es.pv_batteries), + pv_diverter_present=es.pv_diverter == "true", ), # SAP building parts sap_building_parts=[ @@ -1916,14 +1927,18 @@ class EpcPropertyDataMapper: if schema == "RdSAP-Schema-21.0.1": from datatypes.epc.schema.rdsap_schema_21_0_1 import RdSapSchema21_0_1 - return EpcPropertyDataMapper.from_rdsap_schema_21_0_1( - from_dict(RdSapSchema21_0_1, data) + return _clear_basement_flag_when_system_built( + EpcPropertyDataMapper.from_rdsap_schema_21_0_1( + from_dict(RdSapSchema21_0_1, data) + ) ) if schema == "RdSAP-Schema-21.0.0": from datatypes.epc.schema.rdsap_schema_21_0_0 import RdSapSchema21_0_0 - return EpcPropertyDataMapper.from_rdsap_schema_21_0_0( - from_dict(RdSapSchema21_0_0, data) + return _clear_basement_flag_when_system_built( + EpcPropertyDataMapper.from_rdsap_schema_21_0_0( + from_dict(RdSapSchema21_0_0, data) + ) ) raise ValueError(f"Unsupported EPC schema: {schema!r}") @@ -1933,6 +1948,68 @@ class EpcPropertyDataMapper: # --------------------------------------------------------------------------- +def _clear_basement_flag_when_system_built( + epc: EpcPropertyData, +) -> EpcPropertyData: + """When the dwelling is system-built, a `wall_construction == 6` wall + is WALL_SYSTEM_BUILT, not a basement — so the gov-EPC API code-6 + basement heuristic must not fire for it. The API path can't tell the + two apart at the per-wall level (both lodge code 6), so once the + cert-level `system_build` flag is known we clear the basement signal + on every code-6 wall that hasn't been explicitly determined + (`wall_is_basement` / `is_basement` still None). No-op unless the + dwelling is system-built, so genuine basements (system_build absent / + False) keep the code-6 heuristic. Returns the same object when + nothing changes. + + The Elmhurst path sets the per-wall flag directly from the distinct + "SY"/"B" labels, so it never reaches here (it routes through + `from_elmhurst_site_notes`, not `from_api_response`). + + Keyed on the RAW cert `addendum.system_build` signal rather than the + derived `epc.system_build` property — the property reads the wall + type AFTER this clears the basement flag, so using it here would be + circular.""" + if epc.addendum is None or epc.addendum.system_build is not True: + return epc + + def _clear_alt(alt: Optional[SapAlternativeWall]) -> Optional[SapAlternativeWall]: + if ( + alt is not None + and alt.is_basement is None + and alt.wall_construction == BASEMENT_WALL_CONSTRUCTION_CODE + ): + return replace(alt, is_basement=False) + return alt + + new_parts: List[SapBuildingPart] = [] + changed = False + for part in epc.sap_building_parts: + new_alt_1 = _clear_alt(part.sap_alternative_wall_1) + new_alt_2 = _clear_alt(part.sap_alternative_wall_2) + clear_main = ( + part.wall_is_basement is None + and part.wall_construction == BASEMENT_WALL_CONSTRUCTION_CODE + ) + if clear_main or new_alt_1 is not part.sap_alternative_wall_1 or ( + new_alt_2 is not part.sap_alternative_wall_2 + ): + changed = True + new_parts.append( + replace( + part, + wall_is_basement=False if clear_main else part.wall_is_basement, + sap_alternative_wall_1=new_alt_1, + sap_alternative_wall_2=new_alt_2, + ) + ) + else: + new_parts.append(part) + if not changed: + return epc + return replace(epc, sap_building_parts=new_parts) + + def _measurement_value(field: Any) -> float: """SAP measurements arrive as a `Measurement` (with `.value`), a raw dict {'value': N, 'quantity': '...'} when `from_dict` didn't coerce, or a plain @@ -2246,6 +2323,40 @@ def _elmhurst_dwelling_type( return f"{position} flat" +# Elmhurst roof-type codes → SAP10 roof_construction integer, matching the +# gov-EPC API codes in `_API_ROOF_CONSTRUCTION_TO_STR` so the two +# front-ends populate the same field. Harvested from the committed +# Elmhurst Summary fixtures (corpus + cohort): F/PN/PA/PS/S/A. Vaulted (5) +# and thatched (6) are omitted until a fixture surfaces their Elmhurst +# codes. NR ("Non-residential space above") is intentionally left +# unmapped — the gov enum's code 7 is specifically "dwelling above". +_ELMHURST_ROOF_CODE_TO_SAP10: Dict[str, int] = { + "F": 1, # Flat + "PN": 3, # Pitched (slates/tiles), no access (to loft) + "PA": 4, # Pitched (slates/tiles), access to loft + "PS": 8, # Pitched, sloping ceiling + "S": 7, # Same dwelling above + "A": 7, # Another dwelling above +} + + +def _elmhurst_roof_construction_int(coded: Optional[str]) -> Optional[int]: + """Map an Elmhurst roof_type string ('PA Pitched (slates/tiles), + access to loft') to the SAP10 `roof_construction` integer the gov-EPC + API lodges (4), so the site-notes and API front-ends populate the + same field (cross-mapper structural parity). + + Returns None for an absent or unmapped code — and, unlike + `_elmhurst_wall_construction_int`, does NOT raise. `roof_construction` + (int) is not read by the SAP cascade (which reads the string + `roof_construction_type`, populated on both paths), so an unmapped + roof code stays None — the pre-existing Elmhurst behaviour — rather + than blocking the cert.""" + if not coded: + return None + return _ELMHURST_ROOF_CODE_TO_SAP10.get(_leading_code(coded)) + + def _elmhurst_wall_construction_int(coded: str) -> Optional[int]: """Map an Elmhurst wall_type string ('CA Cavity') to the SAP10 integer code (4). Returns None when the lodging is absent (empty @@ -3426,6 +3537,7 @@ def _map_elmhurst_building_part( if walls.insulation_thickness_mm is not None else None ), + roof_construction=_elmhurst_roof_construction_int(roof.roof_type), roof_construction_type=_strip_code(roof.roof_type), roof_insulation_location=_strip_code(roof.insulation), roof_insulation_thickness=_resolve_sloping_ceiling_thickness( @@ -4967,6 +5079,36 @@ _ELMHURST_GLAZING_LABEL_TO_SAP10: Dict[str, int] = { # solar_transmittance=0.72 overrides per worksheet-pinned value. "Triple between 2002 and 2021": 9, "Secondary": 7, + # Elmhurst §11 lodges the full "Secondary glazing" phrase as well as + # the bare "Secondary" form — both are SAP 10.2 Table 6b "window with + # secondary glazing" (cascade code 7, g_L=0.80, g⊥=0.76). The + # RdSAP-21 schema splits secondary glazing by pane emissivity; the + # "Normal emissivity" variant is its own code 11 (g_L=0.80, identical + # cascade output to 7, kept distinct for the strict-raise audit + # trail). Surfaced on the double_glazing/before recommendation + # fixture (Summary_001431 §11 Window 9). + "Secondary glazing": 7, + "Secondary glazing - Normal emissivity": 11, + # RdSAP-21 glazed_type 12 "secondary glazing, low emissivity" — the + # low-E sibling of code 11. Cascade code 12 carries g_L=0.80 / g⊥=0.76 + # (identical daylight/solar bucket to 7 and 11; the lodged U/g drive + # §3/§6). Mapped symmetrically with the "Normal emissivity" variant so + # the whole secondary-glazing family is covered. + "Secondary glazing - Low emissivity": 12, + # RdSAP-21 row "triple glazing, installed pre-2002" (cascade code 10, + # g_L=0.70, g⊥=0.68 — same triple-glazed daylight/solar bucket as the + # other triple variants {6, 8, 9, 14}). The lodged manufacturer + # U-value / solar_transmittance drive §6; the code only sets g_L. + "Triple pre 2002": 10, + # Triple glazing of unknown install date → the generic SAP 10.2 + # Table 6b "triple glazed" row (cascade code 6, g_L=0.70). No + # dedicated "triple, unknown date" enum exists; 6 is the correct + # triple-glazed bucket. + "Triple with unknown install date": 6, + # RdSAP-21 row "single glazing, known data" (cascade code 15, + # g_L=0.90, g⊥=0.85 — same as plain single glazing). Manufacturer + # U/g lodged on WindowTransmissionDetails drive §6. + "Single glazing, known data": 15, } _ELMHURST_GLAZING_LABEL_NOISE_PREFIX_RE: Final[re.Pattern[str]] = re.compile( diff --git a/datatypes/epc/domain/tests/test_from_rdsap_schema.py b/datatypes/epc/domain/tests/test_from_rdsap_schema.py index 5a4796a5..c1ef3ad3 100644 --- a/datatypes/epc/domain/tests/test_from_rdsap_schema.py +++ b/datatypes/epc/domain/tests/test_from_rdsap_schema.py @@ -717,11 +717,16 @@ class TestApiResolveWallInsulationThickness: _api_resolve_wall_insulation_thickness, ) + lodged_thickness = "measured" + measured_value_mm = 100 + # Act - resolved = _api_resolve_wall_insulation_thickness("measured", 100) + resolved = _api_resolve_wall_insulation_thickness( + lodged_thickness, measured_value_mm + ) # Assert - assert resolved == 100 + assert resolved == measured_value_mm def test_non_measured_lodgement_passes_through_unchanged(self) -> None: # Arrange @@ -729,12 +734,17 @@ class TestApiResolveWallInsulationThickness: _api_resolve_wall_insulation_thickness, ) + ni_lodgement = "NI" + measured_value_mm = 100 + # Act - ni: object = _api_resolve_wall_insulation_thickness("NI", 100) + ni: object = _api_resolve_wall_insulation_thickness( + ni_lodgement, measured_value_mm + ) none_thk: object = _api_resolve_wall_insulation_thickness(None, None) # Assert - assert ni == "NI" + assert ni == ni_lodgement assert none_thk is None def test_measured_without_value_passes_through(self) -> None: @@ -743,11 +753,16 @@ class TestApiResolveWallInsulationThickness: _api_resolve_wall_insulation_thickness, ) + lodged_thickness = "measured" + measured_value_mm = None + # Act - resolved: object = _api_resolve_wall_insulation_thickness("measured", None) + resolved: object = _api_resolve_wall_insulation_thickness( + lodged_thickness, measured_value_mm + ) # Assert - assert resolved == "measured" + assert resolved == lodged_thickness # --------------------------------------------------------------------------- diff --git a/datatypes/epc/schema/rdsap_schema_21_0_0.py b/datatypes/epc/schema/rdsap_schema_21_0_0.py index dee7002d..6db6fa50 100644 --- a/datatypes/epc/schema/rdsap_schema_21_0_0.py +++ b/datatypes/epc/schema/rdsap_schema_21_0_0.py @@ -133,6 +133,7 @@ class SapEnergySource: wind_turbines_terrain_type: int electricity_smart_meter_present: str pv_batteries: Optional[PvBatteries] = None + pv_diverter: Optional[str] = None @dataclass diff --git a/datatypes/epc/schema/rdsap_schema_21_0_1.py b/datatypes/epc/schema/rdsap_schema_21_0_1.py index 87cbf91e..e508c161 100644 --- a/datatypes/epc/schema/rdsap_schema_21_0_1.py +++ b/datatypes/epc/schema/rdsap_schema_21_0_1.py @@ -161,6 +161,7 @@ class SapEnergySource: wind_turbines_terrain_type: int electricity_smart_meter_present: str pv_battery_count: Optional[int] = None + pv_diverter: Optional[str] = None wind_turbine_details: Optional[WindTurbineDetails] = None pv_batteries: Optional[Union[PvBatteries, List[PvBatteries]]] = None diff --git a/datatypes/epc/surveys/elmhurst_site_notes.py b/datatypes/epc/surveys/elmhurst_site_notes.py index f524ac79..2fa55acc 100644 --- a/datatypes/epc/surveys/elmhurst_site_notes.py +++ b/datatypes/epc/surveys/elmhurst_site_notes.py @@ -418,6 +418,11 @@ class Renewables: solar_hw_collector_orientation: Optional[str] = None solar_hw_collector_pitch_deg: Optional[int] = None solar_hw_overshading: Optional[str] = None + # Summary §19.0 "Diverter present" — a PV diverter routes surplus PV + # generation to an immersion heater in the hot-water cylinder + # (SAP 10.2 Appendix G4). Drives worksheet (63b)m. Defaults False + # when the cert lodges no PV or "Diverter present = No". + pv_diverter_present: bool = False @dataclass diff --git a/deployment/terraform/lambda/bulkUploadFinaliser/main.tf b/deployment/terraform/lambda/bulkUploadFinaliser/main.tf new file mode 100644 index 00000000..0d6685d0 --- /dev/null +++ b/deployment/terraform/lambda/bulkUploadFinaliser/main.tf @@ -0,0 +1,49 @@ +data "terraform_remote_state" "shared" { + backend = "s3" + config = { + bucket = "assessment-model-terraform-state" + key = "env:/${var.stage}/terraform.tfstate" + region = "eu-west-2" + } +} + +data "aws_secretsmanager_secret_version" "db_credentials" { + secret_id = "${var.stage}/assessment_model/db_credentials" +} + +locals { + db_credentials = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string) +} + +module "lambda" { + source = "../../modules/lambda_with_sqs" + + name = "bulk-upload-finaliser" + stage = var.stage + + image_uri = local.image_uri + + # The finaliser reads the combiner CSV and does one bulk INSERT — IO-light, but + # a property list can be ~40,000 rows, so 300s leaves ample headroom under the + # queue visibility timeout. batch_size = 1 keeps one upload per invocation so a + # bad record can't redrive its siblings; maximum_concurrency caps DB write + # fan-out. + timeout = 300 + batch_size = 1 + maximum_concurrency = 2 + + environment = merge( + { + STAGE = var.stage + LOG_LEVEL = "info" + POSTGRES_USERNAME = local.db_credentials.db_assessment_model_username + POSTGRES_PASSWORD = local.db_credentials.db_assessment_model_password + }, + ) +} + +# Attach S3 read policy so the handler can read the combiner output CSV. +resource "aws_iam_role_policy_attachment" "bulk_upload_finaliser_s3_read" { + role = module.lambda.role_name + policy_arn = data.terraform_remote_state.shared.outputs.bulk_upload_finaliser_s3_read_arn +} diff --git a/deployment/terraform/lambda/bulkUploadFinaliser/outputs.tf b/deployment/terraform/lambda/bulkUploadFinaliser/outputs.tf new file mode 100644 index 00000000..505d902a --- /dev/null +++ b/deployment/terraform/lambda/bulkUploadFinaliser/outputs.tf @@ -0,0 +1,9 @@ +output "bulk_upload_finaliser_queue_url" { + value = module.lambda.queue_url + description = "URL of the Bulk Upload Finaliser SQS queue (wire into the FastAPI FINALISER_SQS_URL)" +} + +output "bulk_upload_finaliser_queue_arn" { + value = module.lambda.queue_arn + description = "ARN of the Bulk Upload Finaliser SQS queue" +} diff --git a/deployment/terraform/lambda/bulkUploadFinaliser/provider.tf b/deployment/terraform/lambda/bulkUploadFinaliser/provider.tf new file mode 100644 index 00000000..98a588f1 --- /dev/null +++ b/deployment/terraform/lambda/bulkUploadFinaliser/provider.tf @@ -0,0 +1,16 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + version = ">= 5.0" + } + } + + backend "s3" { + bucket = "bulk-upload-finaliser-terraform-state" + key = "terraform.tfstate" + region = "eu-west-2" + } + + required_version = ">= 1.2.0" +} diff --git a/deployment/terraform/lambda/bulkUploadFinaliser/variables.tf b/deployment/terraform/lambda/bulkUploadFinaliser/variables.tf new file mode 100644 index 00000000..5d45b312 --- /dev/null +++ b/deployment/terraform/lambda/bulkUploadFinaliser/variables.tf @@ -0,0 +1,27 @@ +variable "lambda_name" { + type = string + description = "Logical name of the lambda (e.g. bulkUploadFinaliser)" +} + +variable "stage" { + description = "Deployment stage (e.g. dev, prod)" + type = string +} + +variable "ecr_repo_url" { + type = string + description = "ECR repository URL (no tag, no digest)" +} + +variable "image_digest" { + type = string + description = "Image digest (sha256:...)" +} + +locals { + image_uri = "${var.ecr_repo_url}@${var.image_digest}" +} + +output "resolved_image_uri" { + value = local.image_uri +} diff --git a/deployment/terraform/lambda/fast-api/main.tf b/deployment/terraform/lambda/fast-api/main.tf index 3a2b5a5f..dea9b7d9 100644 --- a/deployment/terraform/lambda/fast-api/main.tf +++ b/deployment/terraform/lambda/fast-api/main.tf @@ -46,6 +46,15 @@ data "terraform_remote_state" "bulk_address2uprn_combiner" { } } +data "terraform_remote_state" "bulk_upload_finaliser" { + backend = "s3" + config = { + bucket = "bulk-upload-finaliser-terraform-state", + key = "env:/${var.stage}/terraform.tfstate" + region = "eu-west-2" + } +} + ############################################ # Load Credentials ############################################ @@ -105,6 +114,7 @@ module "fastapi" { CATEGORISATION_SQS_URL = data.terraform_remote_state.categorisation.outputs.categorisation_queue_url POSTCODE_SPLITTER_SQS_URL = data.terraform_remote_state.postcode_splitter.outputs.postcode_splitter_queue_url COMBINER_SQS_URL = data.terraform_remote_state.bulk_address2uprn_combiner.outputs.bulk_address2uprn_combiner_queue_url + FINALISER_SQS_URL = data.terraform_remote_state.bulk_upload_finaliser.outputs.bulk_upload_finaliser_queue_url } } @@ -126,7 +136,8 @@ module "fastapi_sqs_policy" { data.terraform_remote_state.engine.outputs.ara_engine_queue_arn, data.terraform_remote_state.categorisation.outputs.categorisation_queue_arn, data.terraform_remote_state.postcode_splitter.outputs.postcode_splitter_queue_arn, - data.terraform_remote_state.bulk_address2uprn_combiner.outputs.bulk_address2uprn_combiner_queue_arn + data.terraform_remote_state.bulk_address2uprn_combiner.outputs.bulk_address2uprn_combiner_queue_arn, + data.terraform_remote_state.bulk_upload_finaliser.outputs.bulk_upload_finaliser_queue_arn ] conditions = null diff --git a/deployment/terraform/lambda/pashub_to_ara/main.tf b/deployment/terraform/lambda/pashub_to_ara/main.tf index eba9c874..85a2a893 100644 --- a/deployment/terraform/lambda/pashub_to_ara/main.tf +++ b/deployment/terraform/lambda/pashub_to_ara/main.tf @@ -22,6 +22,7 @@ module "lambda" { stage = var.stage image_uri = local.image_uri + timeout = var.timeout # Optional: Set maximum_concurrency to limit concurrent SQS-triggered invocations (2-1000) maximum_concurrency = var.maximum_concurrency @@ -47,6 +48,7 @@ module "lambda" { OSMOSIS_ACD_SHAREPOINT_ID = var.osmosis_acd_sharepoint_id PRIVATE_PAY_SHAREPOINT_ID = var.private_pay_sharepoint_id SOCIAL_HOUSING_WAVE_3_SHAREPOINT_ID = var.social_housing_wave_3_sharepoint_id + ECO_SHAREPOINT_ID = var.eco_sharepoint_id PASHUB_EMAIL = var.pashub_email PASHUB_PASSWORD = var.pashub_password PASHUB_COORDINATION_EMAIL = var.pashub_coordination_email diff --git a/deployment/terraform/lambda/pashub_to_ara/variables.tf b/deployment/terraform/lambda/pashub_to_ara/variables.tf index cdeff256..6673908b 100644 --- a/deployment/terraform/lambda/pashub_to_ara/variables.tf +++ b/deployment/terraform/lambda/pashub_to_ara/variables.tf @@ -17,6 +17,12 @@ variable "image_digest" { description = "Image digest (sha256:...)" } +variable "timeout" { + type = number + default = 300 + description = "Lambda timeout in seconds." +} + variable "maximum_concurrency" { type = number default = null @@ -92,6 +98,11 @@ variable "social_housing_wave_3_sharepoint_id" { sensitive = true } +variable "eco_sharepoint_id" { + type = string + sensitive = true +} + variable "pashub_email" { type = string sensitive = true diff --git a/deployment/terraform/shared/dev.tfvars b/deployment/terraform/shared/dev.tfvars index 53ca6d9e..702efbce 100644 --- a/deployment/terraform/shared/dev.tfvars +++ b/deployment/terraform/shared/dev.tfvars @@ -7,7 +7,7 @@ domain_name = "dev.hestia.homes" api_url_prefix = "api" # Database -allocated_storage = 20 +allocated_storage = 50 instance_class = "db.t4g.medium" database_name = "DevAssessmentModelDB" diff --git a/deployment/terraform/shared/main.tf b/deployment/terraform/shared/main.tf index 82a3820a..804082fc 100644 --- a/deployment/terraform/shared/main.tf +++ b/deployment/terraform/shared/main.tf @@ -558,6 +558,36 @@ output "bulk_address2uprn_combiner_s3_arn" { value = module.bulk_address2uprn_combiner_s3.policy_arn } +################################################ +# Bulk Upload Finaliser – Lambda ECR (ADR-0013) +################################################ +module "bulk_upload_finaliser_state_bucket" { + source = "../modules/tf_state_bucket" + bucket_name = "bulk-upload-finaliser-terraform-state" +} + +module "bulk_upload_finaliser_registry" { + source = "../modules/container_registry" + name = "bulk_upload_finaliser" + stage = var.stage +} + +# The finaliser only reads the combiner output (bulk_final_outputs) to insert +# property rows; it writes to Postgres, not S3. +module "bulk_upload_finaliser_s3_read" { + source = "../modules/s3_iam_policy" + + policy_name = "BulkUploadFinaliserReadS3" + policy_description = "Allow bulk_upload_finaliser Lambda to read combiner output from retrofit-data bucket" + bucket_arns = ["arn:aws:s3:::retrofit-data-${var.stage}"] + actions = ["s3:GetObject", "s3:ListBucket"] + resource_paths = ["/bulk_final_outputs/*"] +} + +output "bulk_upload_finaliser_s3_read_arn" { + value = module.bulk_upload_finaliser_s3_read.policy_arn +} + ################################################ # Categorisation – Lambda ECR ################################################ diff --git a/docs/PR_NOTE_system_built_basement_1177.md b/docs/PR_NOTE_system_built_basement_1177.md new file mode 100644 index 00000000..6315010f --- /dev/null +++ b/docs/PR_NOTE_system_built_basement_1177.md @@ -0,0 +1,62 @@ +# PR note — SY system-built vs B basement wall (issue #1177) + +For the reviewer / the `feature/bill-derivation` session. Fold the relevant +parts into the PR description; delete this file before/at merge. + +## What this branch changed (commit `bd25a3c7`) + +`wall_construction == 6` (`WALL_SYSTEM_BUILT`) is the canonical **wall type** +for system-built — and it stays there. The basement signal, which had hijacked +code 6, is moved onto a dedicated flag: + +- `SapBuildingPart.wall_is_basement: Optional[bool]` and + `SapAlternativeWall.is_basement: Optional[bool]`. +- `main_wall_is_basement` / `is_basement_wall` honour the flag when set, else + fall back to the legacy `wall_construction == 6` heuristic (so untouched API + basements and the cert 000565 "B" cohort are unchanged). +- Elmhurst: `_elmhurst_wall_is_basement` sets the flag from the distinct + "SY"/"B" labels (SY→False, B→True, other→None). +- gov-EPC API: `from_api_response` post-processes via + `_clear_basement_flag_when_system_built` — when the cert `addendum.system_build` + is True, the code-6 basement heuristic is cleared. +- `EpcPropertyData.system_build` is a **derived `@property`** (not a stored + field): the MAIN wall is system-built iff `wall_construction == 6` and it is + not flagged basement. (Per the call: "system build is the wall type and + should be on `wall_construction`.") + +Acceptance verified: system-built main wall → `wall_construction == 6`, +`main_wall_is_basement is False`, `system_build is True`; genuine basement main +wall → `main_wall_is_basement is True`, `system_build is False`. Full §4 suite +green (2404 passed), zero new pyright errors. + +## ⚠️ Merge collision: `system_build` field (yours) vs property (this branch) + +The `#1177` prompt referenced `EpcPropertyData.system_build` as an existing +**field** on `feature/bill-derivation`. This branch adds `system_build` as a +**`@property`**. They share the name but live in different regions of the class, +so: + +- **Git will likely merge them silently** (no textual conflict). +- **Python will NOT raise at import** — the class defines fine. +- It raises `AttributeError: property 'system_build' ... has no setter` at the + **first `EpcPropertyData(...)` instantiation** — i.e. the first mapper call, + so the test suite fails immediately. (Reproduced.) + +So the collision is caught fast but is a landmine, not a clean signal. **Resolve +it deliberately at merge** — pick ONE representation: + +- **Recommended (matches the agreed model):** drop the stored field; keep the + derived property (system-built is the wall type). If your code currently + *assigns* `epc.system_build = …`, replace those writes with setting the + underlying wall type / basement flag, or add a setter. +- **Or** keep your stored field and delete this branch's property — but then + populate the field consistently with the wall type on BOTH paths (API addendum + *and* Elmhurst "SY"), so `system_build` and `wall_construction`/the basement + flag never disagree. + +## Tripwire you own + +`tests/domain/modelling/test_elmhurst_cascade_pins.py::test_system_built_generator_offers_ewi_and_iwi_each_pinning_its_after` +is a strict-xfail on your branch (fixtures `system_built_{ewi,iwi}_001431_{before,after}.pdf` +are committed there, not here). With this fix the behaviour it guards is +satisfied, so it should flip xfail→xpass — delete the marker when it does. diff --git a/docs/adr/0013-bulk-upload-finaliser-writes-properties.md b/docs/adr/0013-bulk-upload-finaliser-writes-properties.md new file mode 100644 index 00000000..f9a61d20 --- /dev/null +++ b/docs/adr/0013-bulk-upload-finaliser-writes-properties.md @@ -0,0 +1,125 @@ +# ADR-0013: The `bulk_upload_finaliser` Lambda writes properties and the terminal status + +**Status:** Accepted +**Date:** 2026-06-04 + +> Companion to the `assessment-model` (frontend) repo's +> [ADR-0005](https://github.com/Hestia-Homes/assessment-model/blob/main/docs/adr/0005-async-bulk-upload-finaliser.md), +> which owns the state-machine change and the Drizzle schema. This ADR owns the +> **backend write path**. It applies the direct-write pattern, transaction-boundary +> rule, and port/adapter/row-mirror file layout established by +> [ADR-0003](0003-python-writes-landlord-overrides-directly.md). + +## Context + +Finalising a BulkUpload inserts one `property` row per source row — up to ~40,000. +Today a synchronous Next.js route does it; frontend ADR-0005 moves it to a dispatched +Lambda for the same reasons ADR-0003 moved the classifier write into Python: the work +is internal (a Lambda computes the rows, the same Lambda persists them), the Lambda +already sits next to a Postgres connection, and routing 40k inserts back through an +HTTP hop buys nothing. + +`PropertyRow` in this repo +([`infrastructure/postgres/property_table.py`](../../infrastructure/postgres/property_table.py)) +is currently a **defensive read-only view** — its docstring states the backend never +inserts properties. Finalise changes that. + +## Decision + +1. **New application `applications/bulk_upload_finaliser/`** wraps a handler in + `@subtask_handler` (auto-injected `TaskOrchestrator`; `run_subtask` owns the + subtask start/complete/fail and the Task cascade), exactly like + `applications/landlord_description_overrides/handler.py`. The trigger body: + + ```python + class BulkUploadFinaliserTriggerBody(SubtaskTriggerBody): # task_id, sub_task_id + s3_uri: str # combiner output (combined_output_s3_uri) + portfolio_id: int + bulk_upload_id: UUID + ``` + + Dispatched via a new `POST /v1/bulk-uploads/trigger-finaliser` FastAPI endpoint + (auth `validate_token`) that enqueues to a new SQS queue — mirroring + `trigger-postcode-splitter` / `trigger-landlord-overrides`. + +2. **`PropertyRow` drops its "backend never inserts" invariant** and gains the + insertable columns finalise needs — the **exact nine** the frontend route writes + today: `portfolio_id`, `creation_status` (`'READY'`), `uprn`, + `landlord_property_id` (← `Internal Reference`), `address` (matched ?? user + input), `postcode`, `user_inputted_address`, `user_inputted_postcode`, + `lexiscore`. Drizzle remains the schema source of truth (ADR-0003); this is a + mirror change only. + +3. **Insert policy lives in SQL, idempotent:** + + ```sql + INSERT INTO property (portfolio_id, creation_status, uprn, landlord_property_id, + address, postcode, user_inputted_address, + user_inputted_postcode, lexiscore) + VALUES … + ON CONFLICT (portfolio_id, uprn) WHERE uprn IS NOT NULL + DO NOTHING; + ``` + + This reproduces the frontend's `onConflictDoNothing` exactly — existing properties + are not churned on a re-run. + +4. **The Lambda writes the terminal BulkUpload status directly — DDD, on its own + session.** On success it sets `status='complete'`; on failure `status='failed'`. + Rather than the combiner's `backend/app/db/functions` (which open the legacy + `backend/` connection on the separate `DB_*` config), the finaliser stays + **PostgresConfig-only like the landlord classifier Lambda**: it writes status + through a DDD `BulkUploadStatusWriter` port (`repositories/bulk_upload/`) backed + by a minimal `BulkAddressUploadRow` mirror (`infrastructure/postgres/`), on the + *same* session. So a single DB config (`POSTGRES_*`) drives the run and the + image needs no `backend/`. The `complete` flip happens **in the same transaction + as the property insert** (atomic finalise — either both land or neither); + `failed` is written on a fresh session in the error path. Next.js no longer + writes `complete`; it owns only the `awaiting_review → finalising` + compare-and-swap at dispatch (frontend ADR-0005). + +5. **DDD layering (ADR-0003 + the landlord precedent).** The + `orchestration/bulk_upload_finaliser_orchestrator.py` owns the whole Finalise + domain flow via a single `finalise(rows, portfolio_id, task_id)` that depends + **only on the two repository ports** — it resolves the combiner rows, inserts + the properties, and marks the upload `complete`, with no engine/session/DB of + its own. So it is unit-testable with two in-memory fakes + (`tests/orchestration/test_bulk_upload_finaliser_orchestrator.py`). The Lambda + `applications/bulk_upload_finaliser/handler.py` is the composition root: parse + trigger, read S3, build the engine/session and the concrete adapters, open the + transaction around `finalise`, and handle the failure path. New seams: + - Property insert: `insert_all` (+ the `PropertyIdentityInsert` DTO) is added to + the existing `PropertyRepository` port and its `PropertyPostgresRepository` + adapter — **one repository per aggregate**, reads and writes together, writing + the amended `infrastructure/postgres/property_table.py` mirror. `epc_repo` is + optional: the write path creates identity rows and never hydrates, so the + finaliser constructs the repo with a session alone. + - Status: port `repositories/bulk_upload/bulk_upload_status_writer.py`, adapter + `…_postgres.py`, writing the `infrastructure/postgres/bulk_address_upload_table.py` + mirror (a separate table → a separate repository). + - Transaction boundary stays in the `infrastructure/postgres/engine` helper + (`commit_scope`); the handler opens the context (once, around insert+complete), + never calls `.commit()`; orchestrator and repositories never commit. + +6. **`property_overrides` is out of scope (v2).** The table exists (frontend + migration 0221) but this Lambda does not populate it in v1. When v2 lands it adds + a `PropertyOverrideRow` mirror + `infrastructure/property/property_overrides_postgres_repository.py` + using `onConflictDoUpdate` per frontend ADR-0005. + +## Consequences + +**Positive.** +- 40k inserts run in a Lambda with a single `sub_task` row to observe, not behind a + synchronous HTTP request. +- Reuses the `@subtask_handler` / SQS / direct-Postgres machinery — reviewers have + the classifier handler and the combiner status-writers as precedents. + +**Negative.** +- **`PropertyRow` is no longer read-only.** The defensive invariant that made + accidental backend property writes impossible is gone; correctness now rests on the + finaliser being the only inserting caller. +- **Terminal-status ownership crosses the repo boundary.** The backend now writes a + BulkUpload status (`complete`/`failed`) that Next.js used to own — coordinated only + through the status column (frontend ADR-0005's "two writers, extended"). +- The Drizzle/SQLModel lockstep risk from ADR-0003 now extends to `property`'s + insertable columns. diff --git a/docs/migrations/epc-property-round-trip-fidelity.md b/docs/migrations/epc-property-round-trip-fidelity.md index d9ed6557..53590a2f 100644 --- a/docs/migrations/epc-property-round-trip-fidelity.md +++ b/docs/migrations/epc-property-round-trip-fidelity.md @@ -32,8 +32,9 @@ and **still require the matching DB migration** wherever the physical tables liv `heating_secondary_heating_type`, `heating_shower_outlet_type`, `energy_pv_connection`; `epc_main_heating_detail`: `main_fuel_type`, `heat_emitter_type`, `emitter_temperature`, `main_heating_control`; `epc_building_part`: `wall_construction`, `wall_insulation_type`, - `party_wall_construction`, `flat_roof_insulation_thickness`, `roof_insulation_location`, - `roof_insulation_thickness`; `epc_window`: `glazing_gap`, `orientation`, `window_type`, + `party_wall_construction`, `wall_insulation_thickness`, `flat_roof_insulation_thickness`, + `roof_insulation_location`, `roof_insulation_thickness`; `epc_window`: `glazing_gap`, + `orientation`, `window_type`, `glazing_type`, `window_location`, `window_wall_type`, `draught_proofed`, `permanent_shutters_present`, `transmission_data_source`). - **New scalar columns** — `epc_property`: `heating_number_baths`, `heating_number_baths_wwhrs`, @@ -68,7 +69,7 @@ in the forward mapper so the Python type round-trips exactly (JSON scalars prese |---|---| | `epc_property` | `heating_cylinder_size`, `heating_immersion_heating_type`, `heating_cylinder_insulation_type`, `heating_secondary_heating_type`, `heating_shower_outlet_type`, `energy_pv_connection` | | `epc_main_heating_detail` | `main_fuel_type`, `heat_emitter_type`, `emitter_temperature`, `main_heating_control` | -| `epc_building_part` | `wall_construction`, `wall_insulation_type`, `party_wall_construction`, `flat_roof_insulation_thickness`, `roof_insulation_location`, `roof_insulation_thickness` | +| `epc_building_part` | `wall_construction`, `wall_insulation_type`, `party_wall_construction`, `wall_insulation_thickness`, `flat_roof_insulation_thickness`, `roof_insulation_location`, `roof_insulation_thickness` | | `epc_window` | `glazing_gap`, `orientation`, `window_type`, `glazing_type`, `window_location`, `window_wall_type`, `draught_proofed`, `permanent_shutters_present` | (`energy_meter_type` and `energy_wind_turbines_terrain_type` are `str` in the domain — leave as diff --git a/domain/sap10_calculator/docs/HANDOVER_API_SAMPLE_ACCURACY.md b/domain/sap10_calculator/docs/HANDOVER_API_SAMPLE_ACCURACY.md index 2aa3ff68..ffd15274 100644 --- a/domain/sap10_calculator/docs/HANDOVER_API_SAMPLE_ACCURACY.md +++ b/domain/sap10_calculator/docs/HANDOVER_API_SAMPLE_ACCURACY.md @@ -4,10 +4,82 @@ Point-in-time note. Start from [`AGENT_GUIDE.md`](AGENT_GUIDE.md) for methodolog 1e-4 bar, the per-line debugging loop, the section helpers, and the suite command. - **Branch:** `feature/per-cert-mapper-validation` -- **HEAD:** `0f6b4023` (S0380.229). Next slice: **S0380.230**. +- **HEAD:** `9521d524`. Next SAP slice: **S0380.235**. - **Baseline (§4 suite):** `tests/domain/sap10_calculator/ backend/documents_parser/tests/` - → green (2397 passed, 1 skipped). Pre-existing out-of-scope failures unchanged + → green (2412 passed, 1 skipped). Pre-existing out-of-scope failures unchanged (stone-§5.6 in `domain/sap10_ml/tests/`; `test_from_rdsap_schema.py::...test_total_floor_area`). + +## Shipped this session (S0380.232-234 — the case-19 PV closure) + +The PV diverter (the prior handover's S0380.232 ask) needed two prerequisite +spec bugs fixed first; all three landed: + +| slice | commit | spec | what | +|---|---|---|---| +| **S0380.232** | `212b0c92` | App M1 §3a (p.93, l.5470-5476) | D_PV excludes the LOW-rate portion of an off-peak electric main: `(211)` is only PV-eligible where its §10a code ∈ {30,32,34,35,38}. Storage heaters on 7-hr charge wholly at low rate → fraction 0.0 → excluded. β_Jan 0.894→0.792 (ws 0.791). New `_main_space_heating_high_rate_fraction`. | +| **S0380.233** | `d4a8c02b` | App M1 §6 (p.94, l.5510-5513) | PV-used-in-dwelling credited at the Table 12a ALL_OTHER_USES **weighted** rate (7-hr 14.311 p/kWh), not the bare low rate (5.50). Was under-crediting onsite PV on every off-peak PV cert. Delegates to `_other_fuel_cost_gbp_per_kwh`; STANDARD unchanged. | +| **S0380.234** | `9521d524` | Appendix G4 (p.72-73) | The PV diverter. 3 layers: extractor `Diverter present` + schema `pv_diverter` → `pv_diverter_present` flag (Elmhurst + API mappers) → `_pv_diverter_monthly_kwh` (SPV = export×0.8×0.9, clamp ≤ (62)+(63a), → (63b)m); `cert_to_inputs` recomputes (219) + PV export, β fixed pre-diverter. | + +**Case 19 now: SAP cont 50.33 → 51.34** (ws 51.2221; both round to lodged **51**), +cost (255) 1847.5→1812.3 (ws 1816.6), CO2 3331→3120 (ws 3126), (233a) dwelling +1280.6 (ws **1280.4** — the β fix pins it). The diverter formula is **exact in +summer** (Jun SPV 186.07 = export×0.72, matches ws (63b)). + +**The remaining +0.11 SAP on case 19 = two separate, still-open causes:** +1. **Winter Appendix-M monthly EPV shape.** Our annual EPV (2684.17) matches the + worksheet exactly and Jun-Sep match per-month exactly, but Jan-May/Oct-Dec our + EPV is ~9-11% LOW (worksheet Jan 68.2 vs ours 62.5). Back-solve: ws EPV_m = + |(233a)_m| + |(63b)_m|/0.72. This under-diverts in winter → export (233b) 280.7 + vs ws 184.2, and (219) 3322 vs ws 3188. **A two-array PV apportionment issue + (case 19 has SE + NW arrays with different overshading) — chase in §M / Appendix U + monthly radiation, NOT the diverter (which is validated).** +2. **Fabric (33) +1.0 W/K** (ours 305.04 vs ws 304.04) — a single element off by + exactly 1.0; floor=25.000 is suspiciously round. Walk the per-element §3 breakdown. + +The **eval headline is flat** (42.9→43.0% <0.5; cat-7 5.25→4.93) — expected: the +diverter is rare and the β/price effects are small on the rounded SAP. The value +was pinning the worksheet-validated case 19 + fixing three real spec bugs that the +curated cohort masked. + +## Headline now (1,000-cert 2026 API sample, HEAD `f326e4eb`) + +| metric | value | was (handover baseline `9c0a373f`) | +|---|---|---| +| computed | 882 / 1000 | 882 | +| **% \|err\| < 0.5** | **42.9%** | 41.8% | +| % < 1 / < 2 / < 5 | 56.7% / 74.6% / 90.1% | 54.9 / 71.9 / 87.8 | +| median / mean \|err\| | 0.73 / **2.04** | 0.79 / ~2.4 | +| mean signed | −0.41 | +0.2 | + +**Error by heating cluster** (the load-bearing cut — re-run `analyse_api_sap_clusters.py`): + +| cluster | n | mean \|err\| | %<0.5 | note | +|---|---|---|---|---| +| cat 2 gas boiler + PCDB | 639 | 1.27 | 49.6% | well-trodden | +| cat 2 gas, NO PCDB idx | 91 | 3.18 | 35.2% | non-PCDB Table-4b boilers | +| cat 6 community | 45 | 2.59 | 31.1% | known-hard | +| cat 7 electric storage | 40 | **5.25** | 10.0% | was 7.33 → S0380.227-229 | +| cat 10 electric room heaters | 48 | **5.26** | 16.7% | was 9.49 → S0380.230-231 (bias gone) | +| cat 4 HP + PCDB | 8 | 6.11 | 12.5% | small n, APM | +| Flats (any) | 282 | 2.57 | 30.5% | geometry / communal | +| real PV | 45 | 3.90 | 26.7% | Appendix M | + +**Worst individual offenders** (the long tail — `eval` TOP 40): `2100-5421-0922-1622-3463` +(−60.8, our SAP **negative** −24.8 vs lodged 36 — a flat, 2 bps, cat-2; the single worst, likely +a geometry/communal blow-up — START a per-cert dig here), `2958-8008` (+32, age 6=tiny), +`9836-5829` (−29.5, cat-10 tail), several cat-7/cat-10 in the −20s. + +## Work shipped (this session — S0380.227-231 + 3 mapper commits) + +| commit | what | +|---|---| +| **S0380.227** | dedicated DHW-only system (WHS 911) is NOT separately timed → no Table 2b ×0.9; TF (53) 0.54→0.60, (59) h=3→5 | +| **S0380.228** | electric SECONDARY on off-peak bills at Table 12a `OTHER_DIRECT_ACTING_ELECTRIC` (1.00 high-frac), not 100% low | +| **S0380.229** | dedicated water boiler/circulator (WHC 911-931) feeds cylinder via primary loop → Table 3 primary loss applies | +| **S0380.230** | electric room heaters (cat 10) on off-peak → `OTHER_DIRECT_ACTING_ELECTRIC` (mirror of .228 for the MAIN). cat-10 9.49→7.11 | +| **S0380.231** | Dual-meter electric room heaters → 10-hour tariff (RdSAP §12 Rule 3; codes 691-694,699). cat-10 7.11→5.26, bias +5.08→−0.86 | +| `bd25a3c7` | SY system-built vs B basement: code 6 stays system-built; basement → explicit `wall_is_basement`/`is_basement` flag. `system_build` is a derived property (wall type). API path post-processes via addendum. (issue #1177 — see `docs/PR_NOTE_system_built_basement_1177.md`: field-vs-property merge landmine) | +| `f326e4eb` | Elmhurst path now populates `roof_construction` (int) via `_elmhurst_roof_construction_int` for cross-mapper parity (API set it, Elmhurst didn't) | - **Toolkit (committed):** `scripts/fetch_2026_epc_sample.py`, `scripts/eval_api_sap_accuracy.py`, `scripts/analyse_api_sap_clusters.py`. The 1,000 cached JSONs live in `/tmp/epc_2026_sample/` (gitignored scratch — re-fetch with the sampler; @@ -103,7 +175,7 @@ PDF — only in the P960 header). | **S0380.228** | cost (255) | electric SECONDARY on off-peak bills at Table 12a `OTHER_DIRECT_ACTING_ELECTRIC` (7-hr high-frac **1.00** = £0.1529), not the flat off-peak low (£0.0550). Worksheet (242): "1.00*15.29 + 0.00*5.50". THE primary cost driver (−340). | 60.1→**50.67** | | **S0380.229** | (62) 2493.30→**3169.98** | dedicated water-heating boiler/circulator (WHC 911-931) feeds the cylinder via a primary loop → Table 3 row 1 primary loss applies (keyed off `water_heating_code`, since `_water_heating_main` returns the electric SPACE main). Restored the missing (59)=676.68 kWh/yr. | 50.67→50.33 | -**The ONE remaining case-19 cause — the PV diverter (63b) — is S0380.230 (next).** Worksheet +**The ONE remaining case-19 cause — the PV diverter (63b) — is S0380.232.** Worksheet header line 124 "Diverter = Yes"; Summary §19 "Diverter present: Yes". Per **SAP 10.2 Appendix G4 (PDF p.72-73)** surplus PV is diverted to the cylinder immersion: `S_PV,diverter,m = EPV,m × (1 − βm) × 0.8 × 0.9`, clamped to ≤ (62)m + (63a)m, entered as a @@ -134,15 +206,30 @@ pages → ElmhurstSiteNotesExtractor(...).extract() → from_elmhurst_site_notes ## Remaining work, prioritised -### A. Accuracy clusters (highest value — 80+ certs, mean err 7–10) -1. **Electric storage heaters (cat 7, 39 certs).** Distinct cascade — off-peak tariff split, - charge control (2401/2402), 7-hr/24-hr charge, Table 4a efficiency, responsiveness. **No - worksheet currently validates this path.** Errs both directions (−27..+16). -2. **Electric room heaters (cat 10, 43 certs).** Likewise (controls 2601/2602/2603). Worst - cluster by mean (10.26). -3. **Flats (242, 29% <0.5)** and **PV (40, 28%)** — secondary. +### A. Accuracy clusters (highest value) +1. **PV diverter (S0380.232)** — closes case 19 to 1e-4 AND helps the real-PV cluster (45 certs, + mean 3.90). Fully spec'd in the case-19 section above (Appendix G4). **Has a worksheet** → + 1e-4 bar. Do this first: it's the one open cause on a validated worksheet. +2. **Electric storage heaters (cat 7, 40 certs, mean 5.25).** S0380.227-229 took it 7.33→5.25; + the case-19 PV diverter will help further. Beyond that the tail is per-cert — a **dedicated + cat-7 worksheet** (no PV, no diverter) would let you pin charge-control / responsiveness at + 1e-4 instead of the ±0.5 lodged fallback. +3. **Electric room heaters (cat 10, 48 certs, mean 5.26).** S0380.230-231 fixed the systematic + tariff bias (mean 9.49→5.26, signed +5.08→−0.86); the residual is now scattered per-cert + (e.g. `9836-5829` −29.5, an under-rater). A **cat-10 worksheet** pins the tail at 1e-4. +4. **Non-PCDB gas boilers (cat 2, no idx, 91 certs, mean 3.18)** and **Flats (282, mean 2.57)** — + the next volume levers once the electric clusters are worksheet-pinned. Flats = geometry / + communal; start with the worst (`2100-5421` negative SAP). + - **`2100-5421-0922-1622-3463` diagnosed (S0380.234 session):** NOT a flat — `property_type 0`, + a **352 m² 2-storey uninsulated solid-wall** dwelling (wall_constr 3 / wall_ins 4 as-built; + roof_type 4, no roof insulation). Our space-heating demand is **71,084 kWh/yr** → (37)=995.93 + W/K → SAP −24.8 (lodged 36), cost £14,045. This is the **`as-built insulated-assumed`** + U-value front ([[project_as_built_insulated_assumed_bug]]; S0380.209 fixed walls, "roof next"): + the uninsulated-roof / as-built U over-estimates demand on big old dwellings. API-only (no + worksheet → ±0.5 lodged fallback only); needs a generated worksheet or a roof-U spec audit to + pin. It is one outlier, not a cluster-wide flats bug. -### B. Remaining raises (18 certs — all U-value / heat-loss-sensitive, NOT enum guesses) +### B. Remaining raises (16 certs — all U-value / heat-loss-sensitive, NOT enum guesses) - **`gable_wall_type` 2 & 3 (14 certs).** RdSAP 10 **Table 4** RR walls: 0=Party (U=0.25), 1=Exposed (U=common wall), 2/3 = **Sheltered (U=external×R0.5)** + **Adjacent-to-heated (U=0)**, code↔type order unconfirmed (schema says "not yet seen"). Needs (i) a worksheet to @@ -161,16 +248,24 @@ is what the strict-raise guard exists to prevent. --- -## ★ Additional worksheets that would help most +## ★ Additional worksheets that would help most (the user will generate these on request) -Case 19 (above) already covers electric storage heaters + loose-jacket cylinder + RR. The two -that would add the most NEW coverage: -1. **An electric ROOM-heater dwelling** (SAP code ~691, control 2601/2602) — the **cat-10 - cluster (43 certs, worst by mean error 10.26)**, which case 19 does not touch. -2. **A room-in-roof with a SHELTERED gable and an ADJACENT-TO-HEATED gable** (Table 4 types +The two electric clusters are now systematic-bias-free (S0380.227-231) but their TAILS sit at +the ±0.5-vs-lodged fallback bar because **no worksheet validates them at 1e-4**. The three +highest-value worksheets to ask the user for: +1. **An electric ROOM-heater dwelling** (SAP code ~691, control 2601/2602/2603, Dual meter) — + pins the cat-10 tail (48 certs, mean 5.26) at 1e-4. Make it PV-free + cylinder-free to + isolate the space-heat path from the diverter/HW. +2. **An electric STORAGE-heater dwelling distinct from case 19** (no PV, no WHS-911) — pins the + cat-7 tail (40 certs, mean 5.25): charge control (2401/2402), 7-hr vs 24-hr, responsiveness. +3. **A room-in-roof with a SHELTERED gable and an ADJACENT-TO-HEATED gable** (Table 4 types beyond Party/Exposed) — closes the `gable_wall_type` 2/3 raise (14 certs) and pins the Sheltered (U=ext×R0.5) / Adjacent (U=0) U-values the calculator must add. +Per worksheet send BOTH the **Summary PDF** (input) and the **P960/dr87 worksheet PDF** (the +`(1)..(286)` ground truth). Drop them in `sap worksheets/golden fixture debugging//` and +run the case-19 debug recipe. + The original "design one property" guidance (kept below for reference) is what case 19 was built from. diff --git a/domain/sap10_calculator/docs/HANDOVER_GLAZING_WINDOW_EXTRACTION.md b/domain/sap10_calculator/docs/HANDOVER_GLAZING_WINDOW_EXTRACTION.md new file mode 100644 index 00000000..8f652381 --- /dev/null +++ b/domain/sap10_calculator/docs/HANDOVER_GLAZING_WINDOW_EXTRACTION.md @@ -0,0 +1,94 @@ +# Handover — double_glazing fixture: glazing done, window-extraction open + +Point-in-time note for the agent owning the Elmhurst window extractor / +mapper. Start from [`AGENT_GUIDE.md`](AGENT_GUIDE.md) for the 1e-4 +worksheet-pin methodology and the cascade pipeline. + +- **Branch:** `feature/per-cert-mapper-validation`. HEAD `8133521c`. +- **Fixture:** the double_glazing "before" recommendation pair — + `sap worksheets/Recommendations Elmhurst Files/double_glazing/before/` + (`Summary_001431 (1).pdf` + `P960-0001-001431 - 2026-06-02T115533.961.pdf`). + The Summary is also committed as a test fixture: + `backend/documents_parser/tests/fixtures/Summary_001431_double_glazing.pdf`. +- **Worksheet block to pin** (rating = block 1, region 0; demand = block 2, + postcode): SAP cont **57.2415**, cost (255) **1423.0955**, fabric (33) + **158.4548** / (37) **197.8463**; demand CO2 (272) **3486.0799**, PE (286) + **16796.5617**. (Blocks 3+ add PV/diverter — ignore for the "before" pin.) + +## Done this session (S0380.235-237) — DON'T redo + +| slice | what | +|---|---| +| **S0380.235** `3e45b7fa` | 5 missing Elmhurst §11 glazing labels → SAP10 Table 6b (`Secondary glazing`→7, `…- Normal emissivity`→11, `Triple pre 2002`→10, `Triple with unknown install date`→6, `Single glazing, known data`→15). | +| **S0380.236** `ea35bed2` | Extension party-wall type read independently of "As Main Wall" (`_extract_extensions`): Main `CU`→0.5, Ext `U Unable to determine`→0.25. Worksheet **(32) party heat loss now exact** (32.573 vs 32.5725). | +| **S0380.237** `8133521c` | `Secondary glazing - Low emissivity`→12. Double {1,2,3,7,13} + secondary {4,11,12} families now fully mapped. | + +**Confirmed already correct — do not touch:** +- The calculator's window **U-adjustment `1/(1/U + 0.04)`** (SAP §3.2 curtain + correction) is exact: lodged 4.80→4.0268, 3.10→2.7580, 1.40→1.3258, all + match the worksheet to 1e-4. Our 14 extracted windows sum to **exactly + 56.090**. The 1e-4 gap is NOT in the calculator. +- Glazing label→code mapping (g_L is the only cascade effect; lodged U/g + drive §3/§6 via `_g_perpendicular` preferring the lodged value). + +## Open — current residual **SAP +1.13** (ours 58.37 vs ws 57.24), all in WINDOWS + +The Summary §11 lodges **17 physical window rows**; we end up with **14** +`sap_windows`. Three windows are lost, in two distinct ways: + +### Cause 1 (HARD — read before touching `_is_elmhurst_roof_window`) +The mapper routes the two `Double pre 2002` windows (lodged U 3.1 / 3.4) to +**roof** windows via the `U > 3.0` backstop in +`_is_elmhurst_roof_window` (`datatypes/epc/domain/mapper.py`, the final +`return w.u_value > _ELMHURST_ROOF_WINDOW_U_THRESHOLD`). This fixture's +worksheet bills them as **wall** windows (27). + +**The trap:** cohort cert **000516** has a window that is *byte-identical* +in every extractable Summary field — `Double pre 2002`, U=3.1, +`location="External wall"`, bp `Main`, orient `North-East` — and *its* +worksheet bills it as a **roof** window (27a). Verified: gating the U>3 +rule on `location == "External wall"` makes this fixture pass but +**breaks both 000516 pins** (`test_summary_000516_full_chain_…` and +`test_from_elmhurst_site_notes_matches_hand_built_000516`). + +So identical Summary inputs are classified oppositely by the two +worksheets. **No rule keyed on the fields we currently extract can satisfy +both.** Resolving this needs a NEW disambiguating signal — likely a +roof/wall or rooflight field Elmhurst lodges in §11 (or the BP roof +structure) that the extractor doesn't yet capture. Do NOT flip the U>3 +heuristic to fix this fixture; it silently regresses 000516. + +### Cause 2 (tractable — a plain parsing miss) +The extractor produces **16 windows from 17 §11 rows** — it drops the +`Double glazing, known data` row (BFRC, lodged U=1.00 → adjusted 0.9615, +1st Extension, area 1.00; worksheet "Windows 12" on Ext1). The label maps +fine (→3); the physical row just isn't extracted. Fixing this alone won't +pin the fixture (Cause 1 still blocks) but it's a real, isolatable +extractor bug. + +## Tracer recipe (rebuild — the throwaway lived in /tmp) +```python +# from repo root, PYTHONPATH=/workspaces/model +import re, subprocess; from pathlib import Path +from backend.documents_parser.elmhurst_extractor import ElmhurstSiteNotesExtractor +from datatypes.epc.domain.mapper import EpcPropertyDataMapper +from domain.sap10_calculator.rdsap.cert_to_inputs import ( + SAP_10_2_SPEC_PRICES, cert_to_inputs, cert_to_demand_inputs, + heat_transmission_section_from_cert) +from domain.sap10_calculator.calculator import calculate_sap_from_inputs +def pages(pdf): + n=int(re.search(r"Pages:\s+(\d+)",subprocess.run(["pdfinfo",str(pdf)], + capture_output=True,text=True).stdout).group(1)); out=[] + for i in range(1,n+1): + L=subprocess.run(["pdftotext","-layout","-f",str(i),"-l",str(i),str(pdf),"-"], + capture_output=True,text=True).stdout + out.append("\n".join(tok for ln in L.splitlines() + for tok in re.split(r"\s{2,}",ln.strip()) if tok)) + return out +D=Path("sap worksheets/Recommendations Elmhurst Files/double_glazing/before") +sn=ElmhurstSiteNotesExtractor(pages(D/"Summary_001431 (1).pdf")).extract() +epc=EpcPropertyDataMapper.from_elmhurst_site_notes(sn) +# len(sn.windows)==16 (should be 17); len(epc.sap_windows)==14 (2 → roof, 1 dropped) +``` +Per-window A×U on the worksheet uses the ADJUSTED U `1/(1/U_lodged+0.04)`; +sum the §3 `(27)` lines to 60.5577 (we get 56.090 from 14 windows). diff --git a/domain/sap10_calculator/rdsap/cert_to_inputs.py b/domain/sap10_calculator/rdsap/cert_to_inputs.py index 433f3270..95fb2d75 100644 --- a/domain/sap10_calculator/rdsap/cert_to_inputs.py +++ b/domain/sap10_calculator/rdsap/cert_to_inputs.py @@ -164,7 +164,10 @@ from domain.sap10_calculator.worksheet.energy_requirements import ( from domain.sap10_calculator.worksheet.fabric_energy_efficiency import ( fabric_energy_efficiency_kwh_per_m2_yr, ) -from domain.sap10_calculator.worksheet.photovoltaic import pv_split_monthly +from domain.sap10_calculator.worksheet.photovoltaic import ( + PhotovoltaicSplit, + pv_split_monthly, +) from domain.sap10_calculator.worksheet.space_cooling import ( SpaceCoolingResult, space_cooling_monthly_kwh, @@ -2082,23 +2085,6 @@ def _off_peak_low_rate_gbp_per_kwh(tariff: Tariff) -> float: return low * _PENCE_TO_GBP -def _off_peak_low_rate_gbp_per_kwh_via_meter_heuristic(meter_type: object) -> float: - """Off-peak low-rate £/kWh for callsites that detect off-peak via the - `_is_off_peak_meter` heuristic (RdSAP meter code 3 = Unknown is - treated as off-peak for electric end-uses; see _is_off_peak_meter - docstring). When the meter resolves to a known off-peak tariff - (codes 1/4/5), bills at that tariff's Table 32 low rate; when the - meter resolves to STANDARD (codes 2 = Single, 3 = Unknown), falls - back to the SEVEN_HOUR rate (5.50, Table 32 code 31). Codifies the - heuristic that pre-S0380.138 was baked into the literal - `prices.e7_low_rate_p_per_kwh` constant.""" - tariff = tariff_from_meter_type(meter_type) - if tariff is Tariff.STANDARD: - _high, low = _tariff_high_low_rates_p_per_kwh(Tariff.SEVEN_HOUR) - return low * _PENCE_TO_GBP - return _off_peak_low_rate_gbp_per_kwh(tariff) - - # Tariff → (high_rate_fuel_code, low_rate_fuel_code) for the SAP 10.2 # Table 12d (CO2) / Table 12e (PE) monthly factors. Mirror of the # Table 32 cost-rates dict above: 7-hour and 10-hour tariffs split into @@ -2138,6 +2124,16 @@ def _table_12a_system_for_main( main.main_heating_index_number is not None and heat_pump_record(main.main_heating_index_number) is not None ) + # Electric room heaters (RdSAP main_heating_category 10) are direct- + # acting electric → SAP 10.2 Table 12a Grid 1 (PDF p.191) "Other + # systems including direct-acting electric" row (7-hour high-rate + # fraction 1.00, 10-hour 0.50). Distinct from electric STORAGE + # heaters (category 7), which charge off-peak and correctly fall + # through to None here (→ 100% low rate). Gated on `_is_electric_main` + # so a non-electric room heater (gas / solid-fuel cat 10) is excluded; + # all callers already pre-gate on electric, this is belt-and-braces. + if main.main_heating_category == 10 and _is_electric_main(main): + return Table12aSystem.OTHER_DIRECT_ACTING_ELECTRIC # ASHP — Table 4a rows 211-217 (earlier generations) + 221-227 # (2013+) cover the air-source space. Warm-air ASHPs are 521-524. if code is not None and ( @@ -2173,6 +2169,41 @@ def _space_heating_fuel_cost_gbp_per_kwh( return blended * _PENCE_TO_GBP +def _main_space_heating_high_rate_fraction( + main: Optional[MainHeatingDetail], + tariff: Tariff, +) -> float: + """SAP 10.2 Appendix M1 §3a (PDF p.93) — the fraction of the main + space-heating fuel that is billed at the HIGH rate in Section 10a, + i.e. carries an "electricity not at the low-rate" fuel code (30, 32, + 34, 35 or 38). Only this high-rate portion of E_space,m may enter the + PV-eligible demand D_PV,m; the low-rate portion (code 31/33/36/37/39) + is excluded. + + Mirrors `_space_heating_fuel_cost_gbp_per_kwh`'s rate split exactly so + the D_PV inclusion and the §10a billing stay consistent: + - non-electric main, or STANDARD tariff → 1.0 (no off-peak split; + the eligible-code gate in `_pv_eligible_demand_monthly_kwh` + already excludes non-electric fuels, and a STANDARD-tariff + electric main bills 100% at code 30). + - electric main on an off-peak tariff whose Table 12a Grid 1 SH row + is wired → the published high-rate fraction. Electric STORAGE + heaters (Table 12a `_table_12a_system_for_main` → None, charged + wholly off-peak) and any system whose Grid 1 SH row is not yet + wired bill 100% at the low rate → fraction 0.0, so E_space,m is + excluded from D_PV entirely (worksheet (240) high-rate cost = 0). + """ + if not _is_electric_main(main) or tariff is Tariff.STANDARD: + return 1.0 + system = _table_12a_system_for_main(main) + if system is None: + return 0.0 + try: + return space_heating_high_rate_fraction(system, tariff) + except NotImplementedError: + return 0.0 + + def _hot_water_fuel_cost_gbp_per_kwh( water_heating_fuel: Optional[int], main: Optional[MainHeatingDetail], @@ -2464,6 +2495,7 @@ def _pv_eligible_demand_monthly_kwh( main_fuel_code_table_12: Optional[int], secondary_fuel_code_table_12: Optional[int], water_heating_fuel_code_table_12: Optional[int], + main_space_high_rate_fraction: float = 1.0, ) -> tuple[float, ...]: """SAP 10.2 Appendix M1 §3a (p.93) — monthly PV-eligible demand D_PV,m. Always includes lighting + appliances + cooking + electric @@ -2472,6 +2504,18 @@ def _pv_eligible_demand_monthly_kwh( (codes 30, 32, 34, 35, 38 per spec). Includes E_water,m only when the water heating fuel code is 30 (standard electricity) per spec. + `main_space_high_rate_fraction` scales the main-heating contribution + by the portion billed at the HIGH rate (code 30) in Section 10a. + Per the §3a inclusion rule "(211) should be included only where the + fuel code applied to it in Section 10a is 30, 32, 34, 35 or 38 (i.e. + electricity not at the low-rate)", off-peak electric mains (e.g. + storage heaters charged wholly at the low rate, fraction 0.0) must + NOT add their (211) to D_PV. Defaults to 1.0 → unchanged for + STANDARD-tariff electric mains and the gas-main / electric-secondary + cohort. Without this, off-peak storage-heater dwellings over-counted + D_PV by the full (211) in winter, inflating R_PV,m → β → the onsite + PV split (case 19: β_Jan 0.894 → 0.792, matching worksheet 0.791). + Secondary space heating is included on the same footing as main: Appendix M1 §3a counts E_space,m as the dwelling's total electric space-heating demand, which for a gas-main / electric-secondary @@ -2481,9 +2525,13 @@ def _pv_eligible_demand_monthly_kwh( worksheet (233a) gap localised on the cohort-2 gas+PV certs: cert 3136 onsite 726.9 → 790.3 vs worksheet 792.1). - The off-peak immersion × (243) Ewater branch and the Appendix G4 - PV diverter adjustment are deferred — current cohort fixtures - don't exercise them.""" + The off-peak immersion × (243) Ewater branch is deferred. The + Appendix G4 PV-diverter saving is intentionally NOT reflected here: + per the §3a note (PDF p.93, lines 5485-5486) "If there is a PV + diverter, then for the purposes of this β factor calculation (219)m + should not include the diverter savings" — so D_PV uses the + pre-diverter (219), and the diverter (63b)m is applied afterwards in + `_pv_diverter_monthly_kwh`.""" include_main_space = ( main_fuel_code_table_12 is not None and main_fuel_code_table_12 in _PV_ELIGIBLE_SPACE_HEATING_FUEL_CODES @@ -2506,7 +2554,7 @@ def _pv_eligible_demand_monthly_kwh( + pumps_fans_monthly_kwh[m] ) if include_main_space: - d += main_1_fuel_monthly_kwh[m] + d += main_space_high_rate_fraction * main_1_fuel_monthly_kwh[m] if include_secondary_space: d += secondary_fuel_monthly_kwh[m] if include_water: @@ -2515,6 +2563,70 @@ def _pv_eligible_demand_monthly_kwh( return tuple(monthly) +# SAP 10.2 Appendix G4 step 4 (PDF p.73) — correction factors applied to +# the surplus PV available to the diverter: 0.8 for the cylinder's +# ability to accept the heat, and fPV,diverter,storageloss = 0.9 for the +# increased cylinder losses from storing water at a higher temperature. +_PV_DIVERTER_CYLINDER_ACCEPTANCE_FACTOR: Final[float] = 0.8 +_PV_DIVERTER_STORAGE_LOSS_FACTOR: Final[float] = 0.9 + + +def _pv_diverter_monthly_kwh( + *, + epc: EpcPropertyData, + pv_export_monthly_kwh: tuple[float, ...], + water_demand_monthly_kwh: tuple[float, ...], + avg_daily_hot_water_l: float, + battery_capacity_kwh: float, + pv_generation_kwh: float, +) -> Optional[tuple[float, ...]]: + """SAP 10.2 Appendix G4 (PDF p.72-73) — monthly PV-diverter water- + heating input SPV,diverter,m (positive kWh), entered as the negative + worksheet (63b)m. + + `pv_export_monthly_kwh` is the pre-diverter surplus EPV,m × (1 − βm) + — the portion of PV generation not consumed by the dwelling's + instantaneous demand, which would otherwise be exported. Per G4 step + 4: + + SPV,diverter,m = EPV,m × (1 − βm) × 0.8 × fPV,diverter,storageloss + + clamped to ≤ (62)m + (63a)m (`water_demand_monthly_kwh`; (63a) the + WWHRS reduction, 0 here) so the diverter never supplies more than the + water-heating demand. + + Returns None — diverter disregarded by software (G4 step 1) — unless + ALL four inclusion conditions hold: + a. a PV system connected to the dwelling supply (EPV > 0); + b. a cylinder whose volume exceeds (43) the average daily hot-water + use; + c. no solar water heating present; + d. no battery storage present. + `pv_diverter_present` (Summary §19 / API `pv_diverter`) gates the + whole calculation: an absent diverter returns None immediately. + """ + if not epc.sap_energy_source.pv_diverter_present: + return None + # a. PV connected to the dwelling (case "a" Appendix M1 step 2). + if pv_generation_kwh <= 0.0: + return None + # b. Cylinder volume (litres) must exceed (43) average daily HW use. + cylinder_volume_l = _hot_water_cylinder_volume_l(epc) + if cylinder_volume_l is None or cylinder_volume_l <= avg_daily_hot_water_l: + return None + # c. No solar water heating. d. No battery storage. + if epc.solar_water_heating or battery_capacity_kwh > 0.0: + return None + correction = ( + _PV_DIVERTER_CYLINDER_ACCEPTANCE_FACTOR + * _PV_DIVERTER_STORAGE_LOSS_FACTOR + ) + return tuple( + min(pv_export_monthly_kwh[m] * correction, water_demand_monthly_kwh[m]) + for m in range(12) + ) + + # RdSAP 10 §11.1 b): when the kWp is not lodged but the cert lodges a # "% of roof area" PV figure, derive the PV peak power as # `0.12 × PV area`, with PV area being the dwelling's roof area for @@ -2602,30 +2714,25 @@ def _pv_export_credit_gbp_per_kwh() -> float: def _pv_dwelling_import_price_gbp_per_kwh( - meter_type: object, prices: PriceTable + tariff: Tariff, prices: PriceTable ) -> float: """PV dwelling-consumption price per kWh per SAP 10.2 Appendix M1 §6 - (p.94): "apply the normal import electricity price to PV energy used - within the dwelling". Onsite-consumed PV displaces grid IMPORTS, so - it bills at the standard electricity import tariff (Table 32 code 30 - under the RdSAP10 amendment per ADR-0010 §10 = 13.19 p/kWh — the - same rate `_fuel_cost`'s `other_uses_p_per_kwh` already pays for - lighting/pumps/fans, and crucially the same rate Table 32 code 60 - pays for the EXPORT credit. In Table 32 these collapse to a single - 13.19 p value, so the IMPORT/EXPORT split is mathematically - equivalent to the legacy single-rate-EXPORT credit — but the - distinction matters when an off-peak tariff lands: §6 then directs - a weighted Table 12a high/low rate, deferred until the first off- - peak cost cert ships.""" - if _is_off_peak_meter(meter_type, fuel_is_electric=True): - # Off-peak weighted Table 12a rate (deferred — `_fuel_cost` - # short-circuits Tariff != STANDARD before reaching this path). - # Routes through the meter-heuristic helper so an Unknown-meter - # cert (code 3 = "treat as off-peak for electric end-uses" per - # _is_off_peak_meter) falls back to the SEVEN_HOUR low rate - # rather than raising on STANDARD. - return _off_peak_low_rate_gbp_per_kwh_via_meter_heuristic(meter_type) - return table_32_unit_price_p_per_kwh(30) * _PENCE_TO_GBP + (PDF p.94, lines 5510-5513): "apply the normal import electricity + price to PV energy used within the dwelling … In the case of the + former, use a weighted average of high and low rates (Table 12a)." + + Onsite-consumed PV displaces the dwelling's "all other uses" + electricity (lighting / appliances / pumps), so it bills at the same + Table 12a Grid 2 ALL_OTHER_USES rate `_other_fuel_cost_gbp_per_kwh` + derives — a STANDARD-tariff dwelling pays the flat Table 32 code 30 + 13.19 p/kWh (unchanged from the legacy single-rate path), while an + off-peak dwelling pays the weighted high/low blend (7-hour: + 0.90 × 15.29 + 0.10 × 5.50 = 14.311 p/kWh, matching worksheet + (252)/(269) "PV used in dwelling" on case 19). + + Pre-S0380.233 the off-peak branch returned the bare low rate + (5.50 p/kWh), under-crediting onsite PV on every off-peak cert.""" + return _other_fuel_cost_gbp_per_kwh(tariff, prices) def _other_fuel_cost_gbp_per_kwh( @@ -6079,9 +6186,7 @@ def _fuel_cost( pv_dwelling_kwh_per_yr=pv_dwelling_kwh_per_yr, pv_exported_kwh_per_yr=pv_exported_kwh_per_yr, pv_dwelling_import_price_gbp_per_kwh=( - _pv_dwelling_import_price_gbp_per_kwh( - epc.sap_energy_source.meter_type, prices - ) + _pv_dwelling_import_price_gbp_per_kwh(_rdsap_tariff(epc), prices) ), ) @@ -6537,6 +6642,7 @@ def cert_to_inputs( # the scalar `water_eff` (Table 4a/4b boilers, legacy fallback). # Q_space (kWh/month) per spec = (98c)m × (204) = (98c)m × (1 − # sec_frac) for single-main fixtures. + space_heating_monthly_useful_kwh: tuple[float, ...] = (0.0,) * 12 if wh_result is not None: # Eq D1 Q_space is the DHW boiler's OWN space-heating load — its # (204)/(205) share of total — not the dwelling total (202). See @@ -6711,6 +6817,12 @@ def cert_to_inputs( ) if epc.sap_heating.water_heating_fuel is not None else None ), + # SAP 10.2 Appendix M1 §3a — exclude the low-rate portion of an + # off-peak electric main from D_PV (the §10a high/low split that + # `_space_heating_fuel_cost_gbp_per_kwh` already bills). + main_space_high_rate_fraction=_main_space_heating_high_rate_fraction( + main, _rdsap_tariff(epc), + ), ) pv_split = pv_split_monthly( epv_monthly_kwh=pv_monthly_kwh, @@ -6718,17 +6830,70 @@ def cert_to_inputs( battery_capacity_kwh=_pv_battery_capacity_kwh(epc), ) + # SAP 10.2 Appendix G4 (PDF p.72-73) — PV diverter. The β factor above + # is computed on the PRE-diverter (219) per the §3a note; now apply + # the diverter saving. SPV,diverter,m diverts the surplus PV (the + # would-be export EPV,m × (1 − βm)) into the cylinder immersion: + # - (63b)m = −SPV,diverter,m reduces the §4 output (64)m → less main- + # system water-heating fuel (219); + # - the export drops to EPV,ex,m = EPV,m(1 − βm) + (63b)m / 0.9 (the + # diverted energy is no longer exported); the onsite dwelling + # portion EPV,dw,m = EPV,m × βm is unchanged (the β is fixed). + hw_output_monthly_for_factors = ( + wh_result.output_monthly_kwh if wh_result is not None else (0.0,) * 12 + ) + pv_diverter_monthly_kwh = _pv_diverter_monthly_kwh( + epc=epc, + pv_export_monthly_kwh=pv_split.epv_exported_monthly_kwh, + water_demand_monthly_kwh=( + wh_result.total_demand_monthly_kwh if wh_result is not None + else (0.0,) * 12 + ), + avg_daily_hot_water_l=( + wh_result.annual_avg_hot_water_l_per_day if wh_result is not None + else 0.0 + ), + battery_capacity_kwh=_pv_battery_capacity_kwh(epc), + pv_generation_kwh=sum(pv_monthly_kwh), + ) + if pv_diverter_monthly_kwh is not None and wh_result is not None: + pv63b_monthly_kwh = tuple(-s for s in pv_diverter_monthly_kwh) + # (64)m = (62)m + (63a)m + (63b)m — reduce the §4 output by the + # diverter input, then recompute (219) from the reduced output. + hw_output_monthly_for_factors = tuple( + max(0.0, wh_result.output_monthly_kwh[m] + pv63b_monthly_kwh[m]) + for m in range(12) + ) + if section_12_4_4_blend is None: + hw_kwh = _apply_water_efficiency( + wh_output_monthly_kwh=hw_output_monthly_for_factors, + wh_output_annual_kwh=sum(hw_output_monthly_for_factors), + water_efficiency_pct=water_eff, + eq_d1_winter_summer_pct=eq_d1_winter_summer_pct, + space_heating_monthly_useful_kwh=space_heating_monthly_useful_kwh, + interlock_penalty_pp=eq_d1_interlock_penalty_pp, + ) + # EPV,ex,m = EPV,m(1 − βm) + (63b)m / fPV,diverter,storageloss. + adjusted_export_monthly_kwh = tuple( + pv_split.epv_exported_monthly_kwh[m] + + pv63b_monthly_kwh[m] / _PV_DIVERTER_STORAGE_LOSS_FACTOR + for m in range(12) + ) + pv_split = PhotovoltaicSplit( + beta_monthly=pv_split.beta_monthly, + epv_dwelling_monthly_kwh=pv_split.epv_dwelling_monthly_kwh, + epv_exported_monthly_kwh=adjusted_export_monthly_kwh, + ) + # SAP 10.2 §12.4.4 overrides — when summer immersion applies (back- # boiler combo + cylinder + WHC from main heating), the HW cost / # CO2 / PE factors are kWh-weighted blends of the winter boiler fuel # + summer electric immersion. The standing-charges line adds the # off-peak electric standing because the cylinder is heated by an # off-peak immersion Jun-Sep. When the rule does NOT apply, the - # locals fall back to the existing single-fuel HW helpers. - hw_monthly_kwh_for_factors = ( - wh_result.output_monthly_kwh if wh_result is not None - else (0.0,) * 12 - ) + # locals fall back to the existing single-fuel HW helpers. The HW + # factors weight by the diverter-adjusted (64)m output. + hw_monthly_kwh_for_factors = hw_output_monthly_for_factors if section_12_4_4_blend is not None: ( _hw_total_unused, @@ -6931,7 +7096,7 @@ def cert_to_inputs( pv_generation_kwh_per_yr=_pv_generation_kwh_per_yr(epc, climate), pv_export_credit_gbp_per_kwh=_pv_export_credit_gbp_per_kwh(), pv_dwelling_import_price_gbp_per_kwh=_pv_dwelling_import_price_gbp_per_kwh( - epc.sap_energy_source.meter_type, prices + _rdsap_tariff(epc), prices ), # SAP 10.2 Appendix M1 §3-4 PV split — the cascade applies # IMPORT PEF (Table 12) to the onsite portion and EXPORT PEF diff --git a/domain/sap10_calculator/tables/table_12a.py b/domain/sap10_calculator/tables/table_12a.py index f98e1c27..2ee7c331 100644 --- a/domain/sap10_calculator/tables/table_12a.py +++ b/domain/sap10_calculator/tables/table_12a.py @@ -250,13 +250,16 @@ _RULE_2_STORAGE_CODES: Final[frozenset[int]] = frozenset( # Rule 3: direct-acting electric + heat pumps + electric room heaters # → 10-hour. §12 lists "heat pump (211 to 224, 521 to 524, or # database)" — the "database" branch fires when the cert lodges a -# PCDB Table 362 heat-pump index regardless of SAP code. +# PCDB Table 362 heat-pump index regardless of SAP code. §12 also +# names "electric room heaters" verbatim (RdSAP 10 PDF p.62) — Table 4a +# electric room-heater codes 691 (panel/convector/radiant), 692 (fan), +# 693 (portable), 694 (water-/oil-filled), 699 (assumed). Without these +# a Dual-meter room-heater cert fell through to Rule 4 (7-hour default). _RULE_3_TEN_HOUR_CODES: Final[frozenset[int]] = frozenset( [191] # direct-acting electric boiler + list(range(211, 225)) # heat pumps 211-224 + list(range(521, 525)) # warm-air heat pumps 521-524 - # TODO: electric room heater codes (SAP Table 4a row 6xx for - # electric panel / radiant heaters) when a fixture surfaces them. + + [691, 692, 693, 694, 699] # electric room heaters (Table 4a) ) diff --git a/domain/sap10_calculator/worksheet/heat_transmission.py b/domain/sap10_calculator/worksheet/heat_transmission.py index dff61a08..a961d60f 100644 --- a/domain/sap10_calculator/worksheet/heat_transmission.py +++ b/domain/sap10_calculator/worksheet/heat_transmission.py @@ -980,21 +980,28 @@ def heat_transmission_from_cert( # U_main_wall per spec page 23 ("Common wall U-value is inferred # from the U-value of the main wall in the building part below"; # gables fall under the same Table 4 rule). - rr_a_rr = geom["rr_simplified_a_rr_m2"] - rr_common = geom["rr_common_wall_area_m2"] - rr_gable = geom["rr_gable_area_m2"] + # `rr_roof_area` is the worksheet's simplified A_RR (the notional + # room-in-roof roof area, RdSAP 10 §3.10.1). Its perimeter common + # walls + gables are billed to walls; the leftover residual is the + # roof-going area that takes the roof U-value. + rr_roof_area = geom["rr_simplified_a_rr_m2"] + rr_common_wall_area = geom["rr_common_wall_area_m2"] + rr_gable_area = geom["rr_gable_area_m2"] rr_detailed_area = 0.0 - if rr_a_rr > 0: + if rr_roof_area > 0: rir = part.sap_room_in_roof - assert rir is not None # rr_a_rr > 0 ⇒ rir present per _part_geometry - walls += uw * (rr_common + rr_gable) + assert rir is not None # rr_roof_area > 0 ⇒ rir present per _part_geometry + walls += uw * (rr_common_wall_area + rr_gable_area) # Deduct any "Roof of Room" rooflights piercing the RR shell # (see `rw_area_on_rr` rationale at the gross-roof block). - a_rr_final = max(0.0, rr_a_rr - rr_common - rr_gable - rw_area_on_rr) + rr_residual_roof_area = max( + 0.0, + rr_roof_area - rr_common_wall_area - rr_gable_area - rw_area_on_rr, + ) u_rr = u_rr_default_all_elements( country=country, age_band=rir.construction_age_band, ) - roof += u_rr * a_rr_final + roof += u_rr * rr_residual_roof_area elif part.sap_room_in_roof is not None and part.sap_room_in_roof.detailed_surfaces: # RdSAP10 §3.10 Detailed RR — iterate per-surface lodgement. # Slope / flat_ceiling / stud_wall route to roof (worksheet @@ -1007,7 +1014,8 @@ def heat_transmission_from_cert( # band". Wall-going RIR surfaces (gable_wall, gable_wall_ # external, common_wall) deduct from the simplified A_RR # to leave the residual area, mirroring the Simplified - # branch's `a_rr_final = rr_a_rr - rr_common - rr_gable`. + # branch's `rr_residual_roof_area = rr_roof_area - + # rr_common_wall_area - rr_gable_area`. # Roof-going surfaces (slope / flat_ceiling / stud_wall) # do NOT deduct — they sit inside the RR shell rather than # forming its perimeter walls. @@ -1179,7 +1187,7 @@ def heat_transmission_from_cert( main_wall_area + (alt_walls_total_area - alt_window_area) + roof_area + floor_area_total - + w_area + d_area + rw_area_part + rr_a_rr + rr_detailed_area + + w_area + d_area + rw_area_part + rr_roof_area + rr_detailed_area + cantilever_area ) total_external_area += part_external_area diff --git a/etl/hubspot/__init__.py b/etl/hubspot/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/etl/hubspot/scripts/scraper/main.py b/etl/hubspot/scripts/scraper/main.py index 176e9b15..589e526b 100644 --- a/etl/hubspot/scripts/scraper/main.py +++ b/etl/hubspot/scripts/scraper/main.py @@ -67,7 +67,9 @@ def handler(body: dict[str, Any], context: Any) -> None: logger.info( f"Triggering MagicPlan fetcher for HubSpot deal ID {hubspot_deal_id}" ) - _trigger_magicplan_fetcher(sqs_client, hubspot_deal, listing, hubspot_deal_id) + _trigger_magicplan_fetcher( + sqs_client, hubspot_deal, listing, hubspot_deal_id + ) else: # Deal already in db, check whether anything has changed logger.info( @@ -119,13 +121,18 @@ def handler(body: dict[str, Any], context: Any) -> None: logger.info( f"Triggering MagicPlan fetcher for HubSpot deal ID {hubspot_deal_id}" ) - _trigger_magicplan_fetcher(sqs_client, hubspot_deal, listing, hubspot_deal_id) + _trigger_magicplan_fetcher( + sqs_client, hubspot_deal, listing, hubspot_deal_id + ) print("done") def _trigger_magicplan_fetcher( - sqs_client: Any, hubspot_deal: Dict[str, str], listing: Optional[dict[str, str]], hubspot_deal_id: str + sqs_client: Any, + hubspot_deal: Dict[str, str], + listing: Optional[dict[str, str]], + hubspot_deal_id: str, ) -> None: message_body = { "address": hubspot_deal.get("dealname"), @@ -136,9 +143,7 @@ def _trigger_magicplan_fetcher( QueueUrl=get_settings().MAGICPLAN_SQS_URL, MessageBody=json.dumps(message_body), ) - logger.info( - f"Sent message to MagicPlan queue. MessageId: {response['MessageId']}" - ) + logger.info(f"Sent message to MagicPlan queue. MessageId: {response['MessageId']}") def _trigger_pashub_fetcher( @@ -148,7 +153,7 @@ def _trigger_pashub_fetcher( "pashub_link": hubspot_deal["pashub_link"], "address": None, # potentially available from Listing, leave as None for now "hubspot_deal_id": deal_id, - "sharepoint_link": hubspot_deal.get("sharepoint_link", None), + # "sharepoint_link": hubspot_deal.get("sharepoint_link", None), # Don't send sharepoint link for now as they are inconsistent "uprn": hubspot_deal.get("national_uprn", None), "landlord_property_id": hubspot_deal.get("owner_property_id", None), "deal_stage": hubspot_deal.get("deal_stage", None), diff --git a/infrastructure/postgres/bulk_address_upload_table.py b/infrastructure/postgres/bulk_address_upload_table.py new file mode 100644 index 00000000..e43d7e84 --- /dev/null +++ b/infrastructure/postgres/bulk_address_upload_table.py @@ -0,0 +1,29 @@ +from __future__ import annotations + +from datetime import datetime, timezone +from typing import ClassVar, Optional +from uuid import UUID + +from sqlmodel import Field, SQLModel + + +class BulkAddressUploadRow(SQLModel, table=True): + """Minimal mirror of the FE-owned ``bulk_address_uploads`` table. + + The schema source of truth is the Next.js Drizzle repo + (``src/app/db/schema/bulk_address_uploads.ts``); the backend's fuller mirror + lives at ``backend/app/db/models/bulk_address_uploads.py``. This DDD-side + mirror declares only the columns the finaliser writes — the terminal status — + so the finaliser can flip status on its own ``PostgresConfig`` session without + pulling in the legacy ``backend/`` connection (ADR-0013). Keep the column names + in step with the Drizzle source. + """ + + __tablename__: ClassVar[str] = "bulk_address_uploads" # pyright: ignore[reportIncompatibleVariableOverride] + + id: UUID = Field(primary_key=True) + task_id: Optional[UUID] = Field(default=None, index=True) + status: str + updated_at: datetime = Field( + default_factory=lambda: datetime.now(timezone.utc) + ) diff --git a/infrastructure/postgres/epc_property_table.py b/infrastructure/postgres/epc_property_table.py index 539628bd..4b235d34 100644 --- a/infrastructure/postgres/epc_property_table.py +++ b/infrastructure/postgres/epc_property_table.py @@ -549,7 +549,15 @@ class EpcBuildingPartModel(SQLModel, table=True): building_part_number: Optional[int] = Field(default=None) wall_dry_lined: Optional[bool] = Field(default=None) wall_thickness_mm: Optional[int] = Field(default=None) - wall_insulation_thickness: Optional[str] = Field(default=None) + # Union[str, int] — int mm when the API lodges + # `wall_insulation_thickness == "measured"` (resolved by + # `_api_resolve_wall_insulation_thickness`), else the lodged string + # ("NI", a numeric string, ...). JSONB to preserve int vs str on + # round-trip, exactly like the sibling `roof_insulation_thickness` / + # `flat_roof_insulation_thickness`. + wall_insulation_thickness: Optional[Union[str, int]] = Field( + default=None, sa_column=Column(JSONB, nullable=True) + ) floor_heat_loss: Optional[int] = Field(default=None) floor_insulation_thickness: Optional[str] = Field(default=None) flat_roof_insulation_thickness: Optional[Union[str, int]] = Field( diff --git a/infrastructure/postgres/property_table.py b/infrastructure/postgres/property_table.py index 6bd2d644..c333cad4 100644 --- a/infrastructure/postgres/property_table.py +++ b/infrastructure/postgres/property_table.py @@ -2,24 +2,49 @@ from __future__ import annotations from typing import ClassVar, Optional +from sqlalchemy import Column +from sqlalchemy import Enum as SAEnum from sqlmodel import Field, SQLModel +# Mirror of the FE-owned `creation_status` pgEnum (property.ts: +# propertyCreationStatusEnum = {LOADING, READY, ERROR}). A single SAEnum instance +# so test-schema create_all emits one CREATE TYPE; prod owns the type via Drizzle. +property_creation_status_sa_enum = SAEnum( + "LOADING", "READY", "ERROR", name="creation_status" +) + class PropertyRow(SQLModel, table=True): - """Defensive view of the FE-owned ``property`` table. + """Mirror of the FE-owned ``property`` table. - The schema and migrations for ``property`` are owned by the front-end - Next.js repo; this declares only the identity columns the modelling backend - reads/writes, so FE-owned migrations to other columns don't ripple into us. + The schema and migrations for ``property`` are owned by the front-end Next.js + repo (``src/app/db/schema/property.ts``); this declares the identity columns + the modelling backend reads, plus the subset the ``bulk_upload_finaliser`` + Lambda **inserts** at Finalise (ADR-0013). It is no longer read-only — the + finaliser is the one backend caller that inserts. Columns not declared here + are still owned by FE migrations and don't ripple into us. """ __tablename__: ClassVar[str] = "property" # pyright: ignore[reportIncompatibleVariableOverride] - # Non-Optional: this is a read-only defensive view of the FE-owned ``property`` - # table — the backend never inserts rows, so every row read carries an id. - id: int = Field(primary_key=True) + # bigserial in the FE schema — DB-assigned on insert, so Optional/None on the + # way in and always populated on the way out. + id: Optional[int] = Field(default=None, primary_key=True) portfolio_id: int - postcode: str - address: str + # Nullable in the FE schema. The finaliser writes `matched ?? user-inputted`, + # which is absent for fully-unmatched rows. + postcode: Optional[str] = Field(default=None) + address: Optional[str] = Field(default=None) uprn: Optional[int] = Field(default=None) landlord_property_id: Optional[str] = Field(default=None) + + # Insertable columns the finaliser writes (ADR-0013). All nullable in the FE + # schema except `creation_status` (NOT NULL); the finaliser always sets it to + # 'READY', so a nullable mirror is safe — the real column enforces NOT NULL. + creation_status: Optional[str] = Field( + default=None, + sa_column=Column(property_creation_status_sa_enum, nullable=True), + ) + user_inputted_address: Optional[str] = Field(default=None) + user_inputted_postcode: Optional[str] = Field(default=None) + lexiscore: Optional[float] = Field(default=None) diff --git a/orchestration/bulk_upload_finaliser_orchestrator.py b/orchestration/bulk_upload_finaliser_orchestrator.py new file mode 100644 index 00000000..46ad5b12 --- /dev/null +++ b/orchestration/bulk_upload_finaliser_orchestrator.py @@ -0,0 +1,146 @@ +"""Finalises a BulkUpload into ``property`` rows (ADR-0013). + +The domain logic of Finalise: turn the combiner output rows into property identity +rows (the same resolution the old Next.js ``/finalize`` route did) and persist them +through the injected writer. Like every orchestrator it never commits — the caller +owns the transaction boundary (see the Lambda handler). +""" + +from __future__ import annotations + +import re +from typing import Any, Optional + +from uuid import UUID + +from repositories.bulk_upload.bulk_upload_status_writer import BulkUploadStatusWriter +from repositories.property.property_repository import ( + PropertyIdentityInsert, + PropertyRepository, +) + +# Combiner-output columns — identical to the old frontend /finalize route and the +# backend combined-results reader (router.py). +ADDRESS_COLS = ("Address 1", "Address 2", "Address 3") +POSTCODE_COL = "postcode" +INTERNAL_REF_COL = "Internal Reference" +UPRN_COL = "address2uprn_uprn" +MATCHED_ADDRESS_COL = "address2uprn_address" +LEXISCORE_COL = "address2uprn_lexiscore" +MISSING_SENTINEL = "invalid postcode" +UK_POSTCODE_RE = re.compile(r"[A-Z]{1,2}\d[A-Z\d]?\s*\d[A-Z]{2}", re.IGNORECASE) + + +def _normalize(value: Any) -> str: + if value is None: + return "" + return str(value).strip() + + +def _is_missing(value: str) -> bool: + return value == "" or value.lower() == MISSING_SENTINEL + + +def _parse_uprn(raw: Any) -> Optional[int]: + val = _normalize(raw) + if _is_missing(val): + return None + try: + return int(val) + except ValueError: + return None + + +def _parse_lexiscore(raw: Any) -> Optional[float]: + val = _normalize(raw) + if _is_missing(val): + return None + try: + return float(val) + except ValueError: + return None + + +def _extract_postcode(matched: Optional[str], fallback: str) -> Optional[str]: + if matched: + m = UK_POSTCODE_RE.search(matched) + if m: + return m.group(0).upper() + return fallback or None + + +class BulkUploadFinaliserOrchestrator: + """Owns the domain flow of Finalise, depending only on repository ports. + + Both collaborators are ports (``PropertyRepository``, + ``BulkUploadStatusWriter``); the concrete Postgres adapters are wired by the + Lambda handler (the composition root). So a unit test constructs this with two + fakes and exercises ``finalise`` end-to-end — no engine, session, or DB. The + orchestrator never commits: the caller opens the transaction around + ``finalise`` so the insert and the ``complete`` flip land atomically. + """ + + def __init__( + self, + property_repo: PropertyRepository, + status_writer: BulkUploadStatusWriter, + ) -> None: + self._property_repo = property_repo + self._status_writer = status_writer + + def finalise( + self, rows: list[dict[str, str]], portfolio_id: int, task_id: UUID + ) -> int: + """Resolve the combiner rows, insert the properties, and mark the upload + ``complete`` — all via the injected repositories, no DB connection of its + own. Returns the number of properties inserted. Does not commit.""" + inserts = self.to_property_rows(rows, portfolio_id) + inserted = self._property_repo.insert_all(inserts) + self._status_writer.set_status(task_id, "complete") + return inserted + + def to_property_rows( + self, rows: list[dict[str, str]], portfolio_id: int + ) -> list[PropertyIdentityInsert]: + """Resolve combiner rows into property identity inserts. + + Pure (no DB / IO) and independently testable. Reproduces the old + ``/finalize`` route's resolution exactly: matched address falls back to the + user-inputted one; postcode is extracted from the matched address or falls + back to the user-inputted postcode. + """ + return [self._row_to_insert(raw, portfolio_id) for raw in rows] + + @staticmethod + def _row_to_insert( + raw: dict[str, str], portfolio_id: int + ) -> PropertyIdentityInsert: + user_inputted_address = ( + ", ".join(p for p in (_normalize(raw.get(c)) for c in ADDRESS_COLS) if p) + or None + ) + user_inputted_postcode = _normalize(raw.get(POSTCODE_COL)) or None + + uprn = _parse_uprn(raw.get(UPRN_COL)) + + matched_address_raw = _normalize(raw.get(MATCHED_ADDRESS_COL)) + matched_address = ( + None if _is_missing(matched_address_raw) else matched_address_raw + ) + + address = matched_address or user_inputted_address + postcode = _extract_postcode(matched_address, user_inputted_postcode or "") + internal_ref = _normalize(raw.get(INTERNAL_REF_COL)) or None + lexiscore = _parse_lexiscore(raw.get(LEXISCORE_COL)) + + return PropertyIdentityInsert( + portfolio_id=portfolio_id, + uprn=uprn, + landlord_property_id=internal_ref, + address=address, + postcode=postcode, + user_inputted_address=user_inputted_address, + user_inputted_postcode=user_inputted_postcode, + lexiscore=lexiscore, + creation_status="READY", + ) diff --git a/pytest.ini b/pytest.ini index 47b40c17..1303dcb4 100644 --- a/pytest.ini +++ b/pytest.ini @@ -7,6 +7,7 @@ testpaths = recommendations/tests backend/tests backend/address2UPRN/tests + backend/ordnanceSurvey/tests backend/app/db/functions/tests backend/categorisation/tests backend/condition/tests @@ -25,7 +26,5 @@ testpaths = etl/epc_clean/tests etl/hubspot/tests etl/spatial/tests - domain/sap10_ml/tests - tests/ markers = integration: mark a test as an integration test diff --git a/repositories/bulk_upload/bulk_upload_status_writer.py b/repositories/bulk_upload/bulk_upload_status_writer.py new file mode 100644 index 00000000..70fdb1b2 --- /dev/null +++ b/repositories/bulk_upload/bulk_upload_status_writer.py @@ -0,0 +1,24 @@ +from __future__ import annotations + +from abc import ABC, abstractmethod +from uuid import UUID + + +class BulkUploadStatusWriter(ABC): + """Port: writes the terminal BulkUpload status at Finalise (ADR-0013). + + The finaliser owns the terminal write — Next.js no longer writes ``complete``; + it only flips ``awaiting_review → finalising`` at dispatch (ADR-0005). This is + the backend half of that "two writers" split, alongside the combiner's + ``combining``/``awaiting_review`` writes. + """ + + @abstractmethod + def set_status(self, task_id: UUID, status: str) -> None: + """Set the status of the bulk_address_uploads row for ``task_id``. + + Does not commit — the caller owns the transaction boundary, so the status + flip can share the same transaction as the property insert (atomic + finalise) or run in its own session for the failure path. + """ + ... diff --git a/repositories/bulk_upload/bulk_upload_status_writer_postgres.py b/repositories/bulk_upload/bulk_upload_status_writer_postgres.py new file mode 100644 index 00000000..ba83c4d7 --- /dev/null +++ b/repositories/bulk_upload/bulk_upload_status_writer_postgres.py @@ -0,0 +1,33 @@ +"""Postgres adapter for ``BulkUploadStatusWriter`` (ADR-0013). + +Flips the ``bulk_address_uploads`` status on the caller's session — so the +finaliser can mark ``complete`` in the *same* transaction as the property insert +(atomic finalise), and mark ``failed`` in a fresh session on the error path. +""" + +from __future__ import annotations + +from datetime import datetime, timezone +from uuid import UUID + +from sqlmodel import Session, select + +from infrastructure.postgres.bulk_address_upload_table import BulkAddressUploadRow +from repositories.bulk_upload.bulk_upload_status_writer import BulkUploadStatusWriter + + +class BulkUploadStatusWriterPostgresRepository(BulkUploadStatusWriter): + def __init__(self, session: Session) -> None: + self._session = session + + def set_status(self, task_id: UUID, status: str) -> None: + row = self._session.exec( + select(BulkAddressUploadRow).where( + BulkAddressUploadRow.task_id == task_id + ) + ).first() + if row is None: + raise ValueError(f"No bulk_address_uploads row for task_id {task_id}") + row.status = status + row.updated_at = datetime.now(timezone.utc) + self._session.add(row) diff --git a/repositories/property/property_postgres_repository.py b/repositories/property/property_postgres_repository.py index 9c39c91c..c7bb1364 100644 --- a/repositories/property/property_postgres_repository.py +++ b/repositories/property/property_postgres_repository.py @@ -1,7 +1,9 @@ from __future__ import annotations -from typing import Optional +from typing import Optional, cast +from sqlalchemy import Table +from sqlalchemy.dialects.postgresql import insert as pg_insert from sqlmodel import Session, col, select from domain.geospatial.planning_restrictions import PlanningRestrictions @@ -9,36 +11,57 @@ from domain.property.properties import Properties from domain.property.property import Property, PropertyIdentity from infrastructure.postgres.property_table import PropertyRow from repositories.epc.epc_repository import EpcRepository -from repositories.property.property_repository import PropertyRepository +from repositories.property.property_repository import ( + PropertyIdentityInsert, + PropertyRepository, +) from repositories.spatial.spatial_repository import SpatialRepository class PropertyPostgresRepository(PropertyRepository): - """Hydrates the Property aggregate from the FE-owned ``property`` row plus the - EPC slice (via an injected `EpcRepository`) and the planning protections - (via an injected `SpatialRepository`, keyed by UPRN — ADR-0020). Reads only - from repos — no external IO — so a hydrated Property is a pure function of - repository state (ADR-0003). + """Postgres adapter for the ``property`` table — reads and writes (ADR-0003). + + Reads hydrate the Property aggregate from the FE-owned row plus the EPC slice + (via an injected `EpcRepository`) and the planning protections (via an + injected `SpatialRepository`, keyed by UPRN — ADR-0020), so a hydrated + Property is a pure function of repository state. ``epc_repo`` / ``spatial_repo`` + are optional: the Finalise write path (``insert_all``) creates new identity + rows and never hydrates, so callers that only insert construct this with a + session alone. """ def __init__( self, session: Session, - epc_repo: EpcRepository, - spatial_repo: SpatialRepository, + epc_repo: Optional[EpcRepository] = None, + spatial_repo: Optional[SpatialRepository] = None, ) -> None: self._session = session self._epc_repo = epc_repo self._spatial_repo = spatial_repo + # ``__table__`` is injected at runtime on table=True classes but the stubs + # don't expose it; pin to ``Table`` so the dialect insert is typed. + self._table: Table = cast(Table, getattr(PropertyRow, "__table__")) + + def _epc(self) -> EpcRepository: + if self._epc_repo is None: + raise ValueError( + "PropertyPostgresRepository needs an EpcRepository to read; it was " + "constructed for the write-only Finalise path." + ) + return self._epc_repo def get(self, property_id: int) -> Property: row = self._session.get(PropertyRow, property_id) if row is None: raise ValueError(f"property {property_id} not found") identity = PropertyIdentity( + # `postcode`/`address` are nullable in the FE schema (the finaliser may + # insert a row with neither); coerce the degenerate null to "" so the + # identity type stays a plain str. portfolio_id=row.portfolio_id, - postcode=row.postcode, - address=row.address, + postcode=row.postcode or "", + address=row.address or "", uprn=row.uprn, landlord_property_id=row.landlord_property_id, ) @@ -47,7 +70,7 @@ class PropertyPostgresRepository(PropertyRepository): ) return Property( identity=identity, - epc=self._epc_repo.get_for_property(property_id), + epc=self._epc().get_for_property(property_id), planning_restrictions=_restrictions_of(row.uprn, restrictions), ) @@ -58,7 +81,7 @@ class PropertyPostgresRepository(PropertyRepository): select(PropertyRow).where(col(PropertyRow.id).in_(property_ids)) ).all() row_by_id = {row.id: row for row in rows} - epcs = self._epc_repo.get_for_properties(property_ids) + epcs = self._epc().get_for_properties(property_ids) restrictions: dict[int, PlanningRestrictions] = self._restrictions_for( [row.uprn for row in rows if row.uprn is not None] ) @@ -71,8 +94,8 @@ class PropertyPostgresRepository(PropertyRepository): Property( identity=PropertyIdentity( portfolio_id=row.portfolio_id, - postcode=row.postcode, - address=row.address, + postcode=row.postcode or "", + address=row.address or "", uprn=row.uprn, landlord_property_id=row.landlord_property_id, ), @@ -82,13 +105,47 @@ class PropertyPostgresRepository(PropertyRepository): ) return Properties(items) - def _restrictions_for( - self, uprns: list[int] - ) -> dict[int, PlanningRestrictions]: - if not uprns: + def _restrictions_for(self, uprns: list[int]) -> dict[int, PlanningRestrictions]: + # No spatial repo (the write-only Finalise path) → no cached protections; + # `_restrictions_of` then defaults every UPRN to unrestricted. + if not uprns or self._spatial_repo is None: return {} return self._spatial_repo.get_for_uprns(uprns) + def insert_all(self, rows: list[PropertyIdentityInsert]) -> int: + if not rows: + return 0 + + values = [ + { + "portfolio_id": r.portfolio_id, + "creation_status": r.creation_status, + "uprn": r.uprn, + "landlord_property_id": r.landlord_property_id, + "address": r.address, + "postcode": r.postcode, + "user_inputted_address": r.user_inputted_address, + "user_inputted_postcode": r.user_inputted_postcode, + "lexiscore": r.lexiscore, + } + for r in rows + ] + + stmt = pg_insert(self._table).values(values) + # Matches `uq_property_portfolio_uprn` (partial: WHERE uprn IS NOT NULL), + # reproducing today's Next.js onConflictDoNothing — a re-run leaves existing + # properties untouched (contrast property_overrides, which recalculates). + stmt = stmt.on_conflict_do_nothing( + index_elements=["portfolio_id", "uprn"], + index_where=self._table.c.uprn.isnot(None), + ) + + # SQLModel re-exports SQLAlchemy's Session.execute; one overload is marked + # deprecated in the stubs, and they resolve the INSERT to a bare + # ``Result`` (no ``rowcount``) — both are stub limitations, not real. + result = self._session.execute(stmt) # pyright: ignore[reportDeprecated] + return cast(int, result.rowcount) # pyright: ignore[reportUnknownMemberType, reportAttributeAccessIssue] + def _restrictions_of( uprn: Optional[int], by_uprn: dict[int, PlanningRestrictions] diff --git a/repositories/property/property_repository.py b/repositories/property/property_repository.py index 1f3df1da..1773b530 100644 --- a/repositories/property/property_repository.py +++ b/repositories/property/property_repository.py @@ -1,17 +1,40 @@ from __future__ import annotations from abc import ABC, abstractmethod +from dataclasses import dataclass +from typing import Optional from domain.property.properties import Properties from domain.property.property import Property -class PropertyRepository(ABC): - """Loads and saves the Property aggregate. +@dataclass(frozen=True) +class PropertyIdentityInsert: + """One row inserted into the FE-owned ``property`` table at Finalise (ADR-0013). - Composes the aggregate whole from the FE-owned ``property`` identity row plus - its source-data slices (EPC today; Site Notes / enrichments as later slices - land). Aggregates load whole — never half a Property (ADR-0002). + Mirrors the exact column set today's Next.js ``/finalize`` writes: nine fields + plus ``creation_status='READY'``. ``address``/``postcode`` are the resolved + (matched ?? user-inputted) values and may be ``None``. + """ + + portfolio_id: int + uprn: Optional[int] + landlord_property_id: Optional[str] + address: Optional[str] + postcode: Optional[str] + user_inputted_address: Optional[str] + user_inputted_postcode: Optional[str] + lexiscore: Optional[float] + creation_status: str = "READY" + + +class PropertyRepository(ABC): + """Reads and writes the FE-owned ``property`` table. + + Reads hydrate the Property aggregate whole — never half a Property — from the + identity row plus its source-data slices (EPC today; Site Notes / enrichments + as later slices land) (ADR-0002, ADR-0012). Writes bulk-insert identity rows at + Finalise (ADR-0013). One repository per aggregate. """ @abstractmethod @@ -23,3 +46,11 @@ class PropertyRepository(ABC): rather than one round-trip per property (ADR-0012). Order follows the input ids.""" ... + + @abstractmethod + def insert_all(self, rows: list[PropertyIdentityInsert]) -> int: + """Bulk-insert identity rows, skipping any whose ``(portfolio_id, uprn)`` + already exists (the FE partial unique index, ``WHERE uprn IS NOT NULL``). + Rows with no UPRN are always inserted. Returns the number actually + inserted; an empty list is a no-op returning 0 (ADR-0013).""" + ... diff --git a/tests/domain/sap10_calculator/rdsap/test_cert_to_inputs.py b/tests/domain/sap10_calculator/rdsap/test_cert_to_inputs.py index fc95112c..f99afce9 100644 --- a/tests/domain/sap10_calculator/rdsap/test_cert_to_inputs.py +++ b/tests/domain/sap10_calculator/rdsap/test_cert_to_inputs.py @@ -59,7 +59,11 @@ from domain.sap10_calculator.rdsap.cert_to_inputs import ( _is_electric_water, # pyright: ignore[reportPrivateUsage] _is_off_peak_meter, # pyright: ignore[reportPrivateUsage] _main_floor_u_value, # pyright: ignore[reportPrivateUsage] + _main_space_heating_high_rate_fraction, # pyright: ignore[reportPrivateUsage] _other_fuel_cost_gbp_per_kwh, # pyright: ignore[reportPrivateUsage] + _pv_diverter_monthly_kwh, # pyright: ignore[reportPrivateUsage] + _pv_dwelling_import_price_gbp_per_kwh, # pyright: ignore[reportPrivateUsage] + _pv_eligible_demand_monthly_kwh, # pyright: ignore[reportPrivateUsage] _primary_loss_applies, # pyright: ignore[reportPrivateUsage] _rdsap_extract_fans_default, # pyright: ignore[reportPrivateUsage] _pv_overshading_factor, # pyright: ignore[reportPrivateUsage] @@ -1766,6 +1770,209 @@ def test_other_fuel_cost_for_18_hour_tariff_uses_18_hour_high_rate() -> None: ) +def test_main_space_high_rate_fraction_zero_for_off_peak_storage_heaters() -> None: + # Arrange — SAP 10.2 Appendix M1 §3a (PDF p.93): E_space,m (211) is + # included in D_PV "only where the fuel code applied to it in Section + # 10a is 30, 32, 34, 35 or 38 (i.e. electricity not at the low-rate)". + # Electric STORAGE heaters (code 402) on a 7-hour off-peak tariff are + # charged wholly at the low rate (Table 12a Grid 1 SH fraction 0.00 / + # `_table_12a_system_for_main` → None) — worksheet (240) high-rate + # cost = 0 — so none of (211) may enter D_PV. + from domain.sap10_calculator.tables.table_12a import Tariff + + storage_main = MainHeatingDetail( + has_fghrs=False, + main_fuel_type=29, # electricity + heat_emitter_type=1, + emitter_temperature=1, + main_heating_control=2402, + main_heating_category=7, + sap_main_heating_code=402, + ) + + # Act + off_peak = _main_space_heating_high_rate_fraction( + storage_main, Tariff.SEVEN_HOUR + ) + standard = _main_space_heating_high_rate_fraction( + storage_main, Tariff.STANDARD + ) + gas = _main_space_heating_high_rate_fraction( + _gas_boiler_detail(sap_main_heating_code=102), Tariff.SEVEN_HOUR + ) + + # Assert + assert abs(off_peak - 0.0) <= 1e-9 + # STANDARD tariff has no high/low split → 100% high rate. + assert abs(standard - 1.0) <= 1e-9 + # Non-electric main never carries an off-peak split. + assert abs(gas - 1.0) <= 1e-9 + + +def test_pv_eligible_demand_excludes_low_rate_main_space_heating() -> None: + # Arrange — SAP 10.2 Appendix M1 §3a (PDF p.93). A main billed wholly + # at the low rate (high-rate fraction 0.0) must contribute zero to + # D_PV even though its Table-12 code (30) is in the eligible set; the + # secondary (also code 30) at its full high-rate fraction stays in. + main_1 = tuple(float(100 + m) for m in range(12)) + secondary = tuple(float(10 + m) for m in range(12)) + base = tuple(float(5) for _ in range(12)) # lighting et al. + + # Act + excluded = _pv_eligible_demand_monthly_kwh( + lighting_monthly_kwh=base, + appliances_monthly_kwh=(0.0,) * 12, + cooking_monthly_kwh=(0.0,) * 12, + electric_shower_monthly_kwh=(0.0,) * 12, + pumps_fans_monthly_kwh=(0.0,) * 12, + main_1_fuel_monthly_kwh=main_1, + secondary_fuel_monthly_kwh=secondary, + hot_water_monthly_kwh=(0.0,) * 12, + main_fuel_code_table_12=30, + secondary_fuel_code_table_12=30, + water_heating_fuel_code_table_12=26, # gas → no E_water + main_space_high_rate_fraction=0.0, + ) + included = _pv_eligible_demand_monthly_kwh( + lighting_monthly_kwh=base, + appliances_monthly_kwh=(0.0,) * 12, + cooking_monthly_kwh=(0.0,) * 12, + electric_shower_monthly_kwh=(0.0,) * 12, + pumps_fans_monthly_kwh=(0.0,) * 12, + main_1_fuel_monthly_kwh=main_1, + secondary_fuel_monthly_kwh=secondary, + hot_water_monthly_kwh=(0.0,) * 12, + main_fuel_code_table_12=30, + secondary_fuel_code_table_12=30, + water_heating_fuel_code_table_12=26, + main_space_high_rate_fraction=1.0, + ) + + # Assert — excluded drops the full (211); secondary stays in both. + for m in range(12): + assert abs(excluded[m] - (base[m] + secondary[m])) <= 1e-9 + assert abs(included[m] - (base[m] + secondary[m] + main_1[m])) <= 1e-9 + + +def test_pv_dwelling_import_price_blends_high_low_on_off_peak() -> None: + # Arrange — SAP 10.2 Appendix M1 §6 (PDF p.94, lines 5510-5513): PV + # used in the dwelling is credited at "a weighted average of high and + # low rates (Table 12a)". On a 7-hour tariff the ALL_OTHER_USES blend + # is 0.90 × 15.29 + 0.10 × 5.50 = 14.311 p/kWh (worksheet case 19 + # (252) "PV used in dwelling" = 14.3110). STANDARD tariff has no + # split → flat Table 32 code 30 = 13.19 p/kWh (unchanged). + from domain.sap10_calculator.tables.table_12a import Tariff + + # Act + off_peak = _pv_dwelling_import_price_gbp_per_kwh( + Tariff.SEVEN_HOUR, SAP_10_2_SPEC_PRICES + ) + standard = _pv_dwelling_import_price_gbp_per_kwh( + Tariff.STANDARD, SAP_10_2_SPEC_PRICES + ) + + # Assert + assert abs(off_peak - 0.14311) <= 1e-6 + assert abs(standard - 0.1319) <= 1e-6 + + +def _pv_diverter_epc(): + """A minimal dwelling that satisfies every Appendix G4 inclusion + condition: a 210 L cylinder (code 4), no solar HW, no battery, with + `pv_diverter_present` set on the energy source.""" + from dataclasses import replace + + epc = make_minimal_sap10_epc( + total_floor_area_m2=90.0, + habitable_rooms_count=4, + country_code="ENG", + has_hot_water_cylinder=True, + solar_water_heating=False, + sap_heating=make_sap_heating( + main_heating_details=[_gas_boiler_detail(sap_main_heating_code=102)], + cylinder_size=4, # RdSAP Table 28 code 4 → 210 L + ), + ) + return replace( + epc, + sap_energy_source=replace( + epc.sap_energy_source, pv_diverter_present=True + ), + ) + + +def test_pv_diverter_monthly_applies_g4_correction_and_clamp() -> None: + # Arrange — SAP 10.2 Appendix G4 step 4 (PDF p.73): SPV,diverter,m = + # EPV,m(1 − βm) × 0.8 × 0.9, clamped to ≤ (62)m + (63a)m. With a + # 100-kWh monthly surplus the uncapped diverter input is 72 kWh; a + # month whose water demand is only 50 kWh clamps it to 50. + epc = _pv_diverter_epc() + export = tuple(100.0 for _ in range(12)) + demand = tuple(50.0 if m < 6 else 1000.0 for m in range(12)) + + # Act + out = _pv_diverter_monthly_kwh( + epc=epc, + pv_export_monthly_kwh=export, + water_demand_monthly_kwh=demand, + avg_daily_hot_water_l=120.0, # < 210 L cylinder + battery_capacity_kwh=0.0, + pv_generation_kwh=1200.0, + ) + + # Assert + assert out is not None + for m in range(12): + expected = min(100.0 * 0.8 * 0.9, demand[m]) + assert abs(out[m] - expected) <= 1e-9 + + +def test_pv_diverter_disregarded_when_any_g4_condition_fails() -> None: + # Arrange — SAP 10.2 Appendix G4 step 1: if a PV system / large-enough + # cylinder / no-solar-HW / no-battery condition is not met, software + # disregards the diverter (returns None). + from dataclasses import replace + + epc = _pv_diverter_epc() + export: tuple[float, ...] = (100.0,) * 12 + demand: tuple[float, ...] = (1000.0,) * 12 + + def divert( + e: object, avg_l: float = 120.0, battery: float = 0.0, pv_gen: float = 1200.0 + ) -> Optional[tuple[float, ...]]: + return _pv_diverter_monthly_kwh( + epc=e, # pyright: ignore[reportArgumentType] + pv_export_monthly_kwh=export, + water_demand_monthly_kwh=demand, + avg_daily_hot_water_l=avg_l, + battery_capacity_kwh=battery, + pv_generation_kwh=pv_gen, + ) + + # Act / Assert — sanity: all conditions met → not None. + assert divert(epc) is not None + # Diverter not present. + assert ( + divert( + replace( + epc, + sap_energy_source=replace( + epc.sap_energy_source, pv_diverter_present=False + ), + ) + ) + is None + ) + # No PV generation (condition a). + assert divert(epc, pv_gen=0.0) is None + # Cylinder not larger than (43) average daily HW use (condition b). + assert divert(epc, avg_l=9999.0) is None + # Battery present (condition d). + assert divert(epc, battery=5.0) is None + # Solar water heating present (condition c). + assert divert(replace(epc, solar_water_heating=True)) is None + + def test_is_off_peak_meter_recognises_bare_18_hour_lodging() -> None: # Arrange — RdSAP 10 §17 page 85 row 10-2 lodges 18-hour meter # as the bare "18-hour" or "18 Hour" form (Elmhurst Summary §14.2 @@ -2938,6 +3145,55 @@ def test_space_heating_off_peak_fallback_uses_actual_tariff_low_rate_not_e7() -> assert abs(cost_eighteen_hour - 0.0741) <= 1e-6 +def test_space_heating_electric_room_heater_off_peak_bills_at_direct_acting_high_rate() -> None: + # Arrange — an ELECTRIC room heater (RdSAP main_heating_category 10, + # e.g. SAP code 691) is direct-acting electric, so SAP 10.2 Table 12a + # Grid 1 (PDF p.191) puts it on the "Other systems including direct- + # acting electric" row: 7-hour high-rate fraction 1.00, 10-hour 0.50. + # Unlike STORAGE heaters (category 7), which charge off-peak and so + # correctly bill 100% at the low rate, a room heater runs on demand — + # mostly at the HIGH rate. `_table_12a_system_for_main` only mapped + # ASHP, so a room heater fell through to the "100% low-rate" fallback + # (5.50 p, £0.0550), under-charging space heating by ~9.79 p/kWh and + # systematically OVER-rating the cat-10 cluster (1,000-cert API sample: + # 48 certs, mean |err| 9.49, signed +5.08). The fix maps electric + # cat-10 mains to OTHER_DIRECT_ACTING_ELECTRIC. Mirror of S0380.228 + # (which fixed the same fallback for electric SECONDARY heating). + from domain.sap10_calculator.tables.table_12a import Tariff + electric_room_heater_main = MainHeatingDetail( + has_fghrs=False, + main_fuel_type=30, # standard electricity + heat_emitter_type=2, + emitter_temperature=1, + main_heating_control=2602, + main_heating_category=10, # electric room heaters + sap_main_heating_code=691, + ) + gas_room_heater_main = MainHeatingDetail( + has_fghrs=False, + main_fuel_type=1, # mains gas — NOT electric + heat_emitter_type=2, + emitter_temperature=1, + main_heating_control=2602, + main_heating_category=10, # gas room heater (also cat 10) + sap_main_heating_code=631, + ) + + # Act — 7-hour off-peak tariff. + electric_rate = _space_heating_fuel_cost_gbp_per_kwh( + electric_room_heater_main, Tariff.SEVEN_HOUR, prices=SAP_10_2_SPEC_PRICES, + ) + gas_rate = _space_heating_fuel_cost_gbp_per_kwh( + gas_room_heater_main, Tariff.SEVEN_HOUR, prices=SAP_10_2_SPEC_PRICES, + ) + + # Assert — electric room heater: 1.00 × 15.29 p = £0.1529 (high rate). + # Gas room heater is unaffected (non-electric → single Table 32 rate, + # not the off-peak electric split). + assert abs(electric_rate - 0.1529) <= 1e-6 + assert abs(gas_rate - 0.0550) > 1e-6 + + def test_heat_network_dlf_full_table_12c_age_band_coverage() -> None: # Arrange — SAP 10.2 Table 12c (page 193) heat-network Distribution # Loss Factor by dwelling age band A..M. None → K-or-newer diff --git a/tests/domain/sap10_calculator/test_table_12a.py b/tests/domain/sap10_calculator/test_table_12a.py index 2b294ca2..a15cb2ed 100644 --- a/tests/domain/sap10_calculator/test_table_12a.py +++ b/tests/domain/sap10_calculator/test_table_12a.py @@ -17,12 +17,35 @@ from domain.sap10_calculator.tables.table_12a import ( Table12aSystem, Tariff, other_use_high_rate_fraction, + rdsap_tariff_for_cert, space_heating_high_rate_fraction, tariff_from_meter_type, water_heating_high_rate_fraction, ) +def test_dual_meter_electric_room_heater_resolves_to_ten_hour_tariff() -> None: + # Arrange — RdSAP 10 §12 (PDF p.62) Dual-meter tariff dispatch: for a + # Dual meter the choice between 7-hour and 10-hour is made by the main + # heating type. Rule 3 verbatim: "if the main system ... is a direct- + # acting electric boiler (191), or electric room heaters ... it is + # 10-hour tariff." The electric room-heater codes are Table 4a 691 + # (panel/convector/radiant), 692 (fan), 693 (portable), 694 (water-/ + # oil-filled), 699 (assumed). Pre-slice these fell through to Rule 4 + # (7-hour default), so a Dual-meter room-heater cert was billed at the + # 7-hour rates with Table 12a high-rate fraction 1.00 instead of the + # 10-hour rates with fraction 0.50 — over-charging direct-acting heat + # once S0380.230 routed it to OTHER_DIRECT_ACTING_ELECTRIC. + + # Act / Assert — every electric room-heater code on a Dual meter → 10-hour. + for code in (691, 692, 693, 694, 699): + assert rdsap_tariff_for_cert(1, main_1_sap_code=code) is Tariff.TEN_HOUR + # Storage heaters (Rule 2) stay 7-hour; a gas room heater (non-Rule-3 + # code) keeps the Rule 4 Dual default of 7-hour. + assert rdsap_tariff_for_cert(1, main_1_sap_code=401) is Tariff.SEVEN_HOUR + assert rdsap_tariff_for_cert(1, main_1_sap_code=601) is Tariff.SEVEN_HOUR + + def test_tariff_enum_has_five_members() -> None: """Table 12a columns: standard (no off-peak split), 7-hour, 10-hour, 18-hour, 24-hour. Worksheet-shape fidelity: TEN_HOUR is included for diff --git a/tests/domain/sap10_calculator/worksheet/_elmhurst_worksheet_000474.py b/tests/domain/sap10_calculator/worksheet/_elmhurst_worksheet_000474.py index 6a3a68a7..bb0998c5 100644 --- a/tests/domain/sap10_calculator/worksheet/_elmhurst_worksheet_000474.py +++ b/tests/domain/sap10_calculator/worksheet/_elmhurst_worksheet_000474.py @@ -66,6 +66,8 @@ def build_epc() -> EpcPropertyData: """ main = SapBuildingPart( identifier=BuildingPartIdentifier.MAIN, + # API parity: roof_construction int mirrors the gov-EPC mapper + roof_construction=4, construction_age_band="B", wall_construction=_WC_CAVITY, wall_insulation_type=4, @@ -99,6 +101,8 @@ def build_epc() -> EpcPropertyData: ) extension_1 = SapBuildingPart( identifier=BuildingPartIdentifier.EXTENSION_1, + # API parity: roof_construction int mirrors the gov-EPC mapper + roof_construction=4, construction_age_band="B", wall_construction=_WC_CAVITY, wall_insulation_type=4, @@ -131,6 +135,8 @@ def build_epc() -> EpcPropertyData: ) extension_2 = SapBuildingPart( identifier=BuildingPartIdentifier.EXTENSION_2, + # API parity: roof_construction int mirrors the gov-EPC mapper + roof_construction=3, construction_age_band="B", wall_construction=_WC_CAVITY, wall_insulation_type=4, diff --git a/tests/domain/sap10_calculator/worksheet/_elmhurst_worksheet_000477.py b/tests/domain/sap10_calculator/worksheet/_elmhurst_worksheet_000477.py index 737a3f82..e1f63d0c 100644 --- a/tests/domain/sap10_calculator/worksheet/_elmhurst_worksheet_000477.py +++ b/tests/domain/sap10_calculator/worksheet/_elmhurst_worksheet_000477.py @@ -64,6 +64,8 @@ def build_epc() -> EpcPropertyData: """ main = SapBuildingPart( identifier=BuildingPartIdentifier.MAIN, + # API parity: roof_construction int mirrors the gov-EPC mapper + roof_construction=4, construction_age_band="B", wall_construction=_WC_CAVITY, wall_insulation_type=4, diff --git a/tests/domain/sap10_calculator/worksheet/_elmhurst_worksheet_000480.py b/tests/domain/sap10_calculator/worksheet/_elmhurst_worksheet_000480.py index bc73529c..af392b42 100644 --- a/tests/domain/sap10_calculator/worksheet/_elmhurst_worksheet_000480.py +++ b/tests/domain/sap10_calculator/worksheet/_elmhurst_worksheet_000480.py @@ -65,6 +65,8 @@ def build_epc() -> EpcPropertyData: """ main = SapBuildingPart( identifier=BuildingPartIdentifier.MAIN, + # API parity: roof_construction int mirrors the gov-EPC mapper + roof_construction=4, construction_age_band="B", wall_construction=_WC_CAVITY, wall_insulation_type=4, @@ -134,6 +136,8 @@ def build_epc() -> EpcPropertyData: ) extension = SapBuildingPart( identifier=BuildingPartIdentifier.EXTENSION_1, + # API parity: roof_construction int mirrors the gov-EPC mapper + roof_construction=4, construction_age_band="B", wall_construction=_WC_CAVITY, wall_insulation_type=4, diff --git a/tests/domain/sap10_calculator/worksheet/_elmhurst_worksheet_000487.py b/tests/domain/sap10_calculator/worksheet/_elmhurst_worksheet_000487.py index 1f545fb4..104fa0cb 100644 --- a/tests/domain/sap10_calculator/worksheet/_elmhurst_worksheet_000487.py +++ b/tests/domain/sap10_calculator/worksheet/_elmhurst_worksheet_000487.py @@ -61,6 +61,8 @@ def build_epc() -> EpcPropertyData: """ main = SapBuildingPart( identifier=BuildingPartIdentifier.MAIN, + # API parity: roof_construction int mirrors the gov-EPC mapper + roof_construction=4, construction_age_band="B", wall_construction=_WC_CAVITY, wall_insulation_type=4, # "A As Built" @@ -131,6 +133,8 @@ def build_epc() -> EpcPropertyData: ) extension = SapBuildingPart( identifier=BuildingPartIdentifier.EXTENSION_1, + # API parity: roof_construction int mirrors the gov-EPC mapper + roof_construction=4, construction_age_band="B", wall_construction=_WC_CAVITY, wall_insulation_type=4, diff --git a/tests/domain/sap10_calculator/worksheet/_elmhurst_worksheet_000490.py b/tests/domain/sap10_calculator/worksheet/_elmhurst_worksheet_000490.py index e77a121e..9bf66262 100644 --- a/tests/domain/sap10_calculator/worksheet/_elmhurst_worksheet_000490.py +++ b/tests/domain/sap10_calculator/worksheet/_elmhurst_worksheet_000490.py @@ -66,6 +66,8 @@ def build_epc() -> EpcPropertyData: """ main = SapBuildingPart( identifier=BuildingPartIdentifier.MAIN, + # API parity: roof_construction int mirrors the gov-EPC mapper + roof_construction=4, construction_age_band="B", wall_construction=_WC_CAVITY, wall_insulation_type=4, @@ -98,6 +100,8 @@ def build_epc() -> EpcPropertyData: ) extension = SapBuildingPart( identifier=BuildingPartIdentifier.EXTENSION_1, + # API parity: roof_construction int mirrors the gov-EPC mapper + roof_construction=4, construction_age_band="B", wall_construction=_WC_CAVITY, wall_insulation_type=4, diff --git a/tests/domain/sap10_calculator/worksheet/_elmhurst_worksheet_000516.py b/tests/domain/sap10_calculator/worksheet/_elmhurst_worksheet_000516.py index d6014167..d161dd9b 100644 --- a/tests/domain/sap10_calculator/worksheet/_elmhurst_worksheet_000516.py +++ b/tests/domain/sap10_calculator/worksheet/_elmhurst_worksheet_000516.py @@ -70,6 +70,8 @@ def build_epc() -> EpcPropertyData: """ main = SapBuildingPart( identifier=BuildingPartIdentifier.MAIN, + # API parity: roof_construction int mirrors the gov-EPC mapper + roof_construction=3, construction_age_band="A", wall_construction=_WC_CAVITY, wall_insulation_type=4, diff --git a/tests/domain/sap10_calculator/worksheet/test_heat_transmission.py b/tests/domain/sap10_calculator/worksheet/test_heat_transmission.py index 1757bdd6..ad7ad30d 100644 --- a/tests/domain/sap10_calculator/worksheet/test_heat_transmission.py +++ b/tests/domain/sap10_calculator/worksheet/test_heat_transmission.py @@ -1274,6 +1274,86 @@ def test_explicit_wall_is_basement_flag_disambiguates_system_built_from_basement assert alt_api_code_6.is_basement_wall is True +def test_system_build_property_derives_from_main_wall_construction_type() -> None: + # Arrange — system-built is a WALL TYPE: RdSAP10 WALL_SYSTEM_BUILT=6 + # on the MAIN wall. It shares the integer with basement, so a code-6 + # main wall is system-built only when it is NOT flagged basement. The + # `system_build` property reads the wall type (wall_construction) + the + # dedicated basement flag — it does not need a separate dwelling-level + # field. + from dataclasses import replace + base_main = make_building_part( + identifier=BuildingPartIdentifier.MAIN, + construction_age_band="G", + wall_construction=6, wall_insulation_type=4, + party_wall_construction=1, roof_construction=4, + floor_dimensions=[ + make_floor_dimension( + total_floor_area_m2=80.0, room_height_m=2.5, + party_wall_length_m=0.0, heat_loss_perimeter_m=35.0, floor=0, + ), + ], + ) + system_built = make_minimal_sap10_epc( + sap_building_parts=[replace(base_main, wall_is_basement=False)], + ) + basement = make_minimal_sap10_epc( + sap_building_parts=[replace(base_main, wall_is_basement=True)], + ) + cavity = make_minimal_sap10_epc( + sap_building_parts=[replace(base_main, wall_construction=4)], + ) + no_main = make_minimal_sap10_epc(sap_building_parts=[]) + + # Act / Assert — code 6 + not basement → system-built; code 6 + basement + # → not system-built; a non-6 wall type → not system-built; no main → None. + assert system_built.system_build is True + assert basement.system_build is False + assert cavity.system_build is False + assert no_main.system_build is None + + +def test_system_built_addendum_clears_basement_on_code_6_walls_api_path() -> None: + # Arrange — gov-EPC API system-built cert: the per-wall code 6 can't be + # told from a basement at lodging time, so once the cert addendum marks + # the dwelling system-built, `from_api_response` clears the code-6 + # basement heuristic. A genuine basement (no addendum signal) keeps it. + from dataclasses import replace + + from datatypes.epc.domain.epc_property_data import Addendum + from datatypes.epc.domain.mapper import _clear_basement_flag_when_system_built # pyright: ignore[reportPrivateUsage] + + code_6_main = make_building_part( + identifier=BuildingPartIdentifier.MAIN, + construction_age_band="G", + wall_construction=6, wall_insulation_type=4, + party_wall_construction=1, roof_construction=4, + floor_dimensions=[ + make_floor_dimension( + total_floor_area_m2=80.0, room_height_m=2.5, + party_wall_length_m=0.0, heat_loss_perimeter_m=35.0, floor=0, + ), + ], + ) + system_built_cert = replace( + make_minimal_sap10_epc(sap_building_parts=[code_6_main]), + addendum=Addendum(system_build=True), + ) + genuine_basement_cert = make_minimal_sap10_epc(sap_building_parts=[code_6_main]) + + # Act + cleared = _clear_basement_flag_when_system_built(system_built_cert) + untouched = _clear_basement_flag_when_system_built(genuine_basement_cert) + + # Assert — system-built cert: code-6 main wall is no longer basement, + # and the wall-type-derived system_build reads True. Genuine basement + # (no addendum) is unchanged → still basement. + assert cleared.sap_building_parts[0].main_wall_is_basement is False + assert cleared.system_build is True + assert untouched.sap_building_parts[0].main_wall_is_basement is True + assert untouched.system_build is False + + def test_basement_alt_wall_uses_table_23_u_value_not_cascade() -> None: """RdSAP §5.17 / Table 23 governs basement-wall U-values: 0.7 for age A-F, 0.6 for G-H, 0.45 for I, 0.35 for J, ..., 0.26 for M. The diff --git a/tests/orchestration/fakes.py b/tests/orchestration/fakes.py index b0abd38c..3efacd29 100644 --- a/tests/orchestration/fakes.py +++ b/tests/orchestration/fakes.py @@ -21,7 +21,10 @@ from repositories.plan.plan_repository import PlanRepository from repositories.product.product_repository import ProductRepository from repositories.property_baseline.property_baseline_repository import PropertyBaselineRepository from repositories.epc.epc_repository import EpcRepository -from repositories.property.property_repository import PropertyRepository +from repositories.property.property_repository import ( + PropertyIdentityInsert, + PropertyRepository, +) from repositories.scenario.scenario_repository import ScenarioRepository from repositories.solar.solar_repository import SolarRepository from repositories.spatial.spatial_repository import SpatialRepository @@ -60,6 +63,10 @@ class FakePropertyRepo(PropertyRepository): def get_many(self, property_ids: list[int]) -> Properties: return Properties([self._hydrate(property_id) for property_id in property_ids]) + def insert_all(self, rows: list[PropertyIdentityInsert]) -> int: + self.inserted: list[PropertyIdentityInsert] = list(rows) + return len(rows) + class FakeEpcRepo(EpcRepository): def __init__(self, by_property: Optional[dict[int, EpcPropertyData]] = None) -> None: diff --git a/tests/orchestration/test_bulk_upload_finaliser_orchestrator.py b/tests/orchestration/test_bulk_upload_finaliser_orchestrator.py new file mode 100644 index 00000000..da6de277 --- /dev/null +++ b/tests/orchestration/test_bulk_upload_finaliser_orchestrator.py @@ -0,0 +1,105 @@ +"""The finaliser orchestrator runs end-to-end against fake writers — no DB. + +This is the payoff of injecting the repository ports (ADR-0013): the whole +Finalise domain flow (resolve combiner rows -> insert properties -> mark complete) +is unit-testable with two in-memory fakes, no engine/session/Postgres. +""" + +from __future__ import annotations + +from uuid import UUID, uuid4 + +from domain.property.properties import Properties +from domain.property.property import Property +from orchestration.bulk_upload_finaliser_orchestrator import ( + BulkUploadFinaliserOrchestrator, +) +from repositories.bulk_upload.bulk_upload_status_writer import BulkUploadStatusWriter +from repositories.property.property_repository import ( + PropertyIdentityInsert, + PropertyRepository, +) + + +class FakePropertyRepository(PropertyRepository): + """Records inserts; the read half is unused on the Finalise path.""" + + def __init__(self) -> None: + self.inserted: list[PropertyIdentityInsert] = [] + + def insert_all(self, rows: list[PropertyIdentityInsert]) -> int: + self.inserted = list(rows) + return len(rows) + + def get(self, property_id: int) -> Property: # pragma: no cover + raise NotImplementedError + + def get_many(self, property_ids: list[int]) -> Properties: # pragma: no cover + raise NotImplementedError + + +class FakeStatusWriter(BulkUploadStatusWriter): + def __init__(self) -> None: + self.calls: list[tuple[UUID, str]] = [] + + def set_status(self, task_id: UUID, status: str) -> None: + self.calls.append((task_id, status)) + + +def _orchestrator() -> tuple[ + BulkUploadFinaliserOrchestrator, FakePropertyRepository, FakeStatusWriter +]: + prop = FakePropertyRepository() + status = FakeStatusWriter() + return BulkUploadFinaliserOrchestrator(prop, status), prop, status + + +def test_finalise_inserts_resolved_rows_and_marks_complete() -> None: + orchestrator, prop, status = _orchestrator() + task_id = uuid4() + rows = [ + { + "Address 1": "1 Some Street", + "postcode": "A0 0AA", + "Internal Reference": "REF-1", + "address2uprn_uprn": "100023", + "address2uprn_address": "1 SOME STREET, TOWN, SW1A 1AA", + "address2uprn_lexiscore": "0.95", + }, + ] + + inserted = orchestrator.finalise(rows, portfolio_id=7, task_id=task_id) + + assert inserted == 1 + (row,) = prop.inserted + assert row.portfolio_id == 7 + assert row.uprn == 100023 + assert row.landlord_property_id == "REF-1" + assert row.address == "1 SOME STREET, TOWN, SW1A 1AA" # matched wins + assert row.postcode == "SW1A 1AA" # extracted from the matched address + assert row.user_inputted_address == "1 Some Street" + assert row.lexiscore == 0.95 + assert row.creation_status == "READY" + # The upload is marked complete via the injected status writer. + assert status.calls == [(task_id, "complete")] + + +def test_finalise_falls_back_to_user_input_when_unmatched() -> None: + orchestrator, prop, _status = _orchestrator() + rows = [ + { + "Address 1": "2 Other Road", + "postcode": "B1 1BB", + "address2uprn_uprn": "invalid postcode", + "address2uprn_address": "invalid postcode", + "address2uprn_lexiscore": "", + }, + ] + + orchestrator.finalise(rows, portfolio_id=9, task_id=uuid4()) + + (row,) = prop.inserted + assert row.uprn is None # missing sentinel -> None + assert row.address == "2 Other Road" # falls back to user input + assert row.postcode == "B1 1BB" # falls back to user-inputted postcode + assert row.lexiscore is None diff --git a/tests/repositories/epc/test_epc_round_trip.py b/tests/repositories/epc/test_epc_round_trip.py index 192027f7..8233bda3 100644 --- a/tests/repositories/epc/test_epc_round_trip.py +++ b/tests/repositories/epc/test_epc_round_trip.py @@ -48,3 +48,37 @@ def test_epc_property_data_round_trips(schema_dir: str, db_engine: Engine) -> No # Assert assert reloaded == original + + +def test_building_part_wall_insulation_thickness_preserves_int( + db_engine: Engine, +) -> None: + # SAP 10.2 §5.7/Table 8: when the API lodges + # `wall_insulation_thickness == "measured"`, the mapper resolves the + # value to an int mm. The `epc_building_part.wall_insulation_thickness` + # column must therefore preserve int vs str on round-trip (JSONB), like + # its `roof_insulation_thickness` sibling — a plain str column would + # round-trip the int 100 back as "100" and corrupt the Table 8 lookup. + from dataclasses import replace + + # Arrange — take a green fixture and force the measured-int case. + original = _load_epc("RdSAP-Schema-21.0.0") + assert original.sap_building_parts, "fixture must have a building part" + bp0 = replace(original.sap_building_parts[0], wall_insulation_thickness=100) + original = replace( + original, + sap_building_parts=[bp0, *original.sap_building_parts[1:]], + ) + + # Act + with Session(db_engine) as session: + epc_property_id = EpcPostgresRepository(session).save(original) + session.commit() + with Session(db_engine) as session: + reloaded = EpcPostgresRepository(session).get(epc_property_id) + + # Assert — the int survives as an int, not the string "100". + assert reloaded is not None + value = reloaded.sap_building_parts[0].wall_insulation_thickness + assert value == 100 + assert isinstance(value, int) diff --git a/utils/sharepoint/domna_sharepoint_client.py b/utils/sharepoint/domna_sharepoint_client.py index 3e9168ba..c36cb418 100644 --- a/utils/sharepoint/domna_sharepoint_client.py +++ b/utils/sharepoint/domna_sharepoint_client.py @@ -37,7 +37,7 @@ class DomnaSharepointClient: site_id=self.sharepoint_drive.value, ) - return sharepoint_client.list_folder_contents(path) + return sharepoint_client.list_folder_contents(path, page_size=500) def does_folder_exists_at(self, file_name: str, file_path: str): folders: Dict[str, Any] = self.get_folders_in_path(file_path) diff --git a/utils/sharepoint/domna_sites.py b/utils/sharepoint/domna_sites.py index e5efb82c..ce579af6 100644 --- a/utils/sharepoint/domna_sites.py +++ b/utils/sharepoint/domna_sites.py @@ -9,3 +9,4 @@ class DomnaSites(Enum): OSMOSIS_ACD = os.getenv("OSMOSIS_ACD_SHAREPOINT_ID") PRIVATE_PAY = os.getenv("PRIVATE_PAY_SHAREPOINT_ID") SOCIAL_HOUSING_WAVE_3 = os.getenv("SOCIAL_HOUSING_WAVE_3_SHAREPOINT_ID") + ECO = os.getenv("ECO_SHAREPOINT_ID")