fixed merge conflicts from main

deleted scaffolding packages folders
deleted scaffolding services folder
2026-06-30 13:10:47 +00:00 · 2026-05-26 11:21:09 +00:00 · 2026-05-26 10:43:16 +00:00 · 2026-05-26 10:41:00 +00:00 · 2026-05-26 10:36:12 +00:00 · 2026-05-26 10:32:18 +00:00
413 changed files with 97161 additions and 768 deletions
--- a/.devcontainer/backend/devcontainer.json
+++ b/.devcontainer/backend/devcontainer.json
@ -5,7 +5,7 @@
  "remoteUser": "vscode",
  "workspaceFolder": "/workspaces/model",
  "initializeCommand": "docker network create shared-dev 2>/dev/null || true; test -d \"$HOME/.config/gh\" || test -n \"$GITHUB_TOKEN\" || { echo >&2 'error: no GitHub auth found. Run `gh auth login && gh auth setup-git` on the host, or export GITHUB_TOKEN, then retry.'; exit 1; }",
-  "postCreateCommand": "gh repo clone Hestia-Homes/agentic-toolkit /tmp/agentic-toolkit -- --branch 0.0.5 --depth 1 && bash /tmp/agentic-toolkit/setup.sh",
+  "postCreateCommand": "gh repo clone Hestia-Homes/agentic-toolkit /tmp/agentic-toolkit -- --branch 0.0.7 --depth 1 && bash /tmp/agentic-toolkit/setup.sh",
  "postStartCommand": "bash .devcontainer/backend/post-install.sh",
  "mounts": [
    "source=${localEnv:HOME},target=/workspaces/home,type=bind",
--- a/.dockerignore
+++ b/.dockerignore
@ -6,7 +6,7 @@ backend/.idea/*
 backend/.env
 recommendations/tests/*
 model_data/tests/*
-infrastructure/*
+deployment/*
 data_collection/*
 node_modules/*
 conservation_areas/*
--- a/.github/workflows/_build_image.yml
+++ b/.github/workflows/_build_image.yml
@ -40,6 +40,8 @@ on:
        required: false
      EPC_AUTH_TOKEN:
        required: false
+      OPEN_EPC_API_TOKEN:
+        required: false

 jobs:
  build:
@ -50,6 +52,7 @@ jobs:
      DEV_DB_PORT: ${{ secrets.DEV_DB_PORT }}
      DEV_DB_NAME: ${{ secrets.DEV_DB_NAME }}
      EPC_AUTH_TOKEN: ${{ secrets.EPC_AUTH_TOKEN }}
+      OPEN_EPC_API_TOKEN: ${{ secrets.OPEN_EPC_API_TOKEN }}

    outputs:
      image_digest: ${{ steps.digest.outputs.image_digest }}
--- a/.github/workflows/_deploy_lambda.yml
+++ b/.github/workflows/_deploy_lambda.yml
@ -80,6 +80,10 @@ on:
        required: false
      TF_VAR_pashub_password:
        required: false
+      TF_VAR_pashub_coordination_email:
+        required: false
+      TF_VAR_pashub_coordination_password:
+        required: false
      TF_VAR_hubspot_api_key:
        required: false

@ -154,6 +158,8 @@ jobs:
          TF_VAR_social_housing_wave_3_sharepoint_id: ${{ secrets.TF_VAR_social_housing_wave_3_sharepoint_id }}
          TF_VAR_pashub_email: ${{ secrets.TF_VAR_pashub_email }}
          TF_VAR_pashub_password: ${{ secrets.TF_VAR_pashub_password }}
+          TF_VAR_pashub_coordination_email: ${{ secrets.TF_VAR_pashub_coordination_email }}
+          TF_VAR_pashub_coordination_password: ${{ secrets.TF_VAR_pashub_coordination_password }}
          TF_VAR_hubspot_api_key: ${{ secrets.TF_VAR_hubspot_api_key }}
          TF_VAR_magicplan_customer_id: ${{ secrets.TF_VAR_magicplan_customer_id }}
          TF_VAR_magicplan_api_key: ${{ secrets.TF_VAR_magicplan_api_key }}
@ -202,6 +208,8 @@ jobs:
          TF_VAR_social_housing_wave_3_sharepoint_id: ${{ secrets.TF_VAR_social_housing_wave_3_sharepoint_id }}
          TF_VAR_pashub_email: ${{ secrets.TF_VAR_pashub_email }}
          TF_VAR_pashub_password: ${{ secrets.TF_VAR_pashub_password }}
+          TF_VAR_pashub_coordination_email: ${{ secrets.TF_VAR_pashub_coordination_email }}
+          TF_VAR_pashub_coordination_password: ${{ secrets.TF_VAR_pashub_coordination_password }}
          TF_VAR_hubspot_api_key: ${{ secrets.TF_VAR_hubspot_api_key }}
          TF_VAR_magicplan_customer_id: ${{ secrets.TF_VAR_magicplan_customer_id }}
          TF_VAR_magicplan_api_key: ${{ secrets.TF_VAR_magicplan_api_key }}
--- a/.github/workflows/_smoke_test_lambda.yml
+++ b/.github/workflows/_smoke_test_lambda.yml
@ -0,0 +1,85 @@
+name: Lambda smoke test
+
+on:
+  workflow_call:
+    inputs:
+      dockerfile_path:
+        required: true
+        type: string
+      build_context:
+        required: false
+        default: "."
+        type: string
+      service_name:
+        required: true
+        type: string
+
+jobs:
+  smoke-test:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Download AWS Lambda RIE
+        run: |
+          mkdir -p ~/.aws-lambda-rie
+          curl -fsSL -o ~/.aws-lambda-rie/aws-lambda-rie \
+            https://github.com/aws/aws-lambda-runtime-interface-emulator/releases/latest/download/aws-lambda-rie
+          chmod +x ~/.aws-lambda-rie/aws-lambda-rie
+
+      - name: Build Lambda image
+        run: |
+          docker build \
+            --platform linux/amd64 \
+            -f ${{ inputs.dockerfile_path }} \
+            -t ${{ inputs.service_name }}-smoke-test:latest \
+            ${{ inputs.build_context }}
+
+      - name: Start Lambda container
+        run: |
+          IMG=${{ inputs.service_name }}-smoke-test:latest
+          ENTRY=$(docker inspect --format='{{range .Config.Entrypoint}}{{.}} {{end}}' "$IMG")
+          CMD_ARGS=$(docker inspect --format='{{range .Config.Cmd}}{{.}} {{end}}' "$IMG")
+
+          if echo "$ENTRY" | grep -q "lambda-entrypoint.sh"; then
+            # AWS base image — RIE is bundled
+            docker run -d --name ${{ inputs.service_name }}-smoke-test \
+              -p 9000:8080 \
+              "$IMG"
+          else
+            # Custom base — mount RIE from runner and re-wire entrypoint
+            docker run -d --name ${{ inputs.service_name }}-smoke-test \
+              -v "$HOME/.aws-lambda-rie:/aws-lambda-rie" \
+              -p 9000:8080 \
+              --entrypoint /aws-lambda-rie/aws-lambda-rie \
+              "$IMG" \
+              $ENTRY $CMD_ARGS
+          fi
+
+      - name: Invoke Lambda and check for import errors
+        run: |
+          response=$(curl -s --retry-connrefused --retry 15 --retry-delay 1 \
+            -X POST \
+            http://localhost:9000/2015-03-31/functions/function/invocations \
+            -H "Content-Type: application/json" \
+            -d '{"Records":[{"body":"{}"}]}')
+
+          echo "Response: $response"
+
+          if [ -z "$response" ]; then
+            echo "No response from Lambda RIE"
+            exit 1
+          fi
+
+          if echo "$response" | grep -qE 'ImportModuleError|ModuleNotFoundError|ImportError'; then
+            echo "Import error detected in handler"
+            exit 1
+          fi
+
+      - name: Dump container logs
+        if: always()
+        run: docker logs ${{ inputs.service_name }}-smoke-test
+
+      - name: Tear down container
+        if: always()
+        run: docker rm -f ${{ inputs.service_name }}-smoke-test
--- a/.github/workflows/deploy_terraform.yml
+++ b/.github/workflows/deploy_terraform.yml
@ -62,20 +62,20 @@ jobs:
      - uses: hashicorp/setup-terraform@v3

      - name: Terraform Init
-        working-directory: infrastructure/terraform/shared
+        working-directory: deployment/terraform/shared
        run: terraform init -reconfigure

      - name: Terraform Workspace
-        working-directory: infrastructure/terraform/shared
+        working-directory: deployment/terraform/shared
        run: terraform workspace select ${STAGE} || terraform workspace new ${STAGE}

      - name: Terraform Plan
-        working-directory: infrastructure/terraform/shared
+        working-directory: deployment/terraform/shared
        run: terraform plan -var-file=${STAGE}.tfvars -out=tfplan

      - name: Terraform Apply
        if: env.TERRAFORM_APPLY == 'true'
-        working-directory: infrastructure/terraform/shared
+        working-directory: deployment/terraform/shared
        run: terraform apply -auto-approve tfplan

  # ============================================================
@ -101,7 +101,7 @@ jobs:
    uses: ./.github/workflows/_deploy_lambda.yml
    with:
      lambda_name: ara_engine
-      lambda_path: infrastructure/terraform/lambda/engine
+      lambda_path: deployment/terraform/lambda/engine
      stage: ${{ needs.determine_stage.outputs.stage }}
      ecr_repo: engine-${{ needs.determine_stage.outputs.stage }}
      image_digest: ${{ needs.ara_engine_image.outputs.image_digest }}
@ -133,6 +133,7 @@ jobs:
        DEV_DB_PORT=$DEV_DB_PORT
        DEV_DB_NAME=$DEV_DB_NAME
        EPC_AUTH_TOKEN=$EPC_AUTH_TOKEN
+        OPEN_EPC_API_TOKEN=$OPEN_EPC_API_TOKEN
    secrets:
      AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
      AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
@ -141,6 +142,7 @@ jobs:
      DEV_DB_PORT: ${{ secrets.DEV_DB_PORT }}
      DEV_DB_NAME: ${{ secrets.DEV_DB_NAME }}
      EPC_AUTH_TOKEN: ${{ secrets.DEV_EPC_AUTH_TOKEN }}
+      OPEN_EPC_API_TOKEN: ${{ secrets.DEV_OPEN_EPC_API_TOKEN }}

  # ============================================================
  # Deploy Address 2 UPRN Lambda
@ -150,7 +152,7 @@ jobs:
    uses: ./.github/workflows/_deploy_lambda.yml
    with:
      lambda_name: address2uprn
-      lambda_path: infrastructure/terraform/lambda/address2UPRN
+      lambda_path: deployment/terraform/lambda/address2UPRN
      stage: ${{ needs.determine_stage.outputs.stage }}
      ecr_repo: address2uprn-${{ needs.determine_stage.outputs.stage }}
      image_digest: ${{ needs.address2uprn_image.outputs.image_digest }}
@ -169,7 +171,7 @@ jobs:
    uses: ./.github/workflows/_build_image.yml
    with:
      ecr_repo: postcode_splitter-${{ needs.determine_stage.outputs.stage }}
-      dockerfile_path: backend/postcode_splitter/handler/Dockerfile
+      dockerfile_path: applications/postcode_splitter/Dockerfile
      build_context: .
      build_args: |
        DEV_DB_HOST=$DEV_DB_HOST
@ -191,7 +193,7 @@ jobs:
    uses: ./.github/workflows/_deploy_lambda.yml
    with:
      lambda_name: postcodeSplitter
-      lambda_path: infrastructure/terraform/lambda/postcodeSplitter
+      lambda_path: deployment/terraform/lambda/postcodeSplitter
      stage: ${{ needs.determine_stage.outputs.stage }}
      ecr_repo: postcode_splitter-${{ needs.determine_stage.outputs.stage }}
      image_digest: ${{ needs.postcodeSplitter_image.outputs.image_digest }}
@ -231,7 +233,7 @@ jobs:
    uses: ./.github/workflows/_deploy_lambda.yml
    with:
      lambda_name: bulk_address2uprn_combiner
-      lambda_path: infrastructure/terraform/lambda/bulk_address2uprn_combiner
+      lambda_path: deployment/terraform/lambda/bulk_address2uprn_combiner
      stage: ${{ needs.determine_stage.outputs.stage }}
      ecr_repo: bulk_address2uprn_combiner-${{ needs.determine_stage.outputs.stage }}
      image_digest: ${{ needs.bulk_address2uprn_combiner_image.outputs.image_digest }}
@ -271,7 +273,7 @@ jobs:
    uses: ./.github/workflows/_deploy_lambda.yml
    with:
      lambda_name: condition-etl
-      lambda_path: infrastructure/terraform/lambda/condition-etl
+      lambda_path: deployment/terraform/lambda/condition-etl
      stage: ${{ needs.determine_stage.outputs.stage }}
      ecr_repo: condition-etl-${{ needs.determine_stage.outputs.stage }}
      image_digest: ${{ needs.condition_etl_image.outputs.image_digest }}
@ -311,7 +313,7 @@ jobs:
    uses: ./.github/workflows/_deploy_lambda.yml
    with:
      lambda_name: categorisation
-      lambda_path: infrastructure/terraform/lambda/categorisation
+      lambda_path: deployment/terraform/lambda/categorisation
      stage: ${{ needs.determine_stage.outputs.stage }}
      ecr_repo: categorisation-${{ needs.determine_stage.outputs.stage }}
      image_digest: ${{ needs.categorisation_image.outputs.image_digest }}
@ -351,7 +353,7 @@ jobs:
    uses: ./.github/workflows/_deploy_lambda.yml
    with:
      lambda_name: ordnanceSurvey
-      lambda_path: infrastructure/terraform/lambda/ordnanceSurvey
+      lambda_path: deployment/terraform/lambda/ordnanceSurvey
      stage: ${{ needs.determine_stage.outputs.stage }}
      ecr_repo: ordnance-${{ needs.determine_stage.outputs.stage }}
      image_digest: ${{ needs.ordnanceSurvey_image.outputs.image_digest }}
@ -386,7 +388,7 @@ jobs:
    uses: ./.github/workflows/_deploy_lambda.yml
    with:
      lambda_name: pashub_to_ara
-      lambda_path: infrastructure/terraform/lambda/pashub_to_ara
+      lambda_path: deployment/terraform/lambda/pashub_to_ara
      stage: ${{ needs.determine_stage.outputs.stage }}
      ecr_repo: pashub_to_ara-${{ needs.determine_stage.outputs.stage }}
      image_digest: ${{ needs.pashub_to_ara_image.outputs.image_digest }}
@ -407,6 +409,8 @@ jobs:
      TF_VAR_social_housing_wave_3_sharepoint_id: ${{ secrets.SOCIAL_HOUSING_WAVE_3_SHAREPOINT_ID }}
      TF_VAR_pashub_email: ${{ secrets.PASHUB_EMAIL }}
      TF_VAR_pashub_password: ${{ secrets.PASHUB_PASSWORD }}
+      TF_VAR_pashub_coordination_email: ${{ secrets.PASHUB_COORDINATION_EMAIL }}
+      TF_VAR_pashub_coordination_password: ${{ secrets.PASHUB_COORDINATION_PASSWORD }}


  # ============================================================
@ -417,7 +421,7 @@ jobs:
    uses: ./.github/workflows/_deploy_lambda.yml
    with:
      lambda_name: ara_fast_api
-      lambda_path: infrastructure/terraform/lambda/fast-api
+      lambda_path: deployment/terraform/lambda/fast-api
      stage: ${{ needs.determine_stage.outputs.stage }}
      terraform_apply: ${{ needs.determine_stage.outputs.terraform_apply }}
    secrets:
@ -456,17 +460,17 @@ jobs:
      - uses: hashicorp/setup-terraform@v3

      - name: Terraform Init
-        working-directory: infrastructure/terraform/cdn_certificate
+        working-directory: deployment/terraform/cdn_certificate
        run: terraform init -reconfigure

      - name: Terraform Workspace
-        working-directory: infrastructure/terraform/cdn_certificate
+        working-directory: deployment/terraform/cdn_certificate
        run: |
          terraform workspace select $STAGE \
            || terraform workspace new $STAGE

      - name: Terraform Plan
-        working-directory: infrastructure/terraform/cdn_certificate
+        working-directory: deployment/terraform/cdn_certificate
        run: |
          terraform plan \
            -var="stage=${STAGE}" \
@ -474,7 +478,7 @@ jobs:

      - name: Terraform Apply
        if: env.TERRAFORM_APPLY == 'true'
-        working-directory: infrastructure/terraform/cdn_certificate
+        working-directory: deployment/terraform/cdn_certificate
        run: terraform apply -auto-approve tfplan


@ -501,17 +505,17 @@ jobs:
      - uses: hashicorp/setup-terraform@v3

      - name: Terraform Init
-        working-directory: infrastructure/terraform/cdn
+        working-directory: deployment/terraform/cdn
        run: terraform init -reconfigure

      - name: Terraform Workspace
-        working-directory: infrastructure/terraform/cdn
+        working-directory: deployment/terraform/cdn
        run: |
          terraform workspace select $STAGE \
            || terraform workspace new $STAGE

      - name: Terraform Plan
-        working-directory: infrastructure/terraform/cdn
+        working-directory: deployment/terraform/cdn
        run: |
          terraform plan \
            -var="stage=${STAGE}" \
@ -519,7 +523,7 @@ jobs:

      - name: Terraform Apply
        if: env.TERRAFORM_APPLY == 'true'
-        working-directory: infrastructure/terraform/cdn
+        working-directory: deployment/terraform/cdn
        run: terraform apply -auto-approve tfplan

  # ============================================================
@ -560,7 +564,7 @@ jobs:
    uses: ./.github/workflows/_deploy_lambda.yml
    with:
      lambda_name: magic_plan
-      lambda_path: infrastructure/terraform/lambda/magic_plan
+      lambda_path: deployment/terraform/lambda/magic_plan
      stage: ${{ needs.determine_stage.outputs.stage }}
      ecr_repo: magic-plan-${{ needs.determine_stage.outputs.stage }}
      image_digest: ${{ needs.magic_plan_image.outputs.image_digest }}
@ -583,7 +587,7 @@ jobs:
    uses: ./.github/workflows/_deploy_lambda.yml
    with:
      lambda_name: hubspot-etl-to-ara
-      lambda_path: infrastructure/terraform/lambda/hubspot_deal_etl
+      lambda_path: deployment/terraform/lambda/hubspot_deal_etl
      stage: ${{ needs.determine_stage.outputs.stage }}
      ecr_repo: hubspot-etl-${{ needs.determine_stage.outputs.stage }}
      image_digest: ${{ needs.hubspot_etl_image.outputs.image_digest }}
--- a/.github/workflows/lambda_smoke_tests.yml
+++ b/.github/workflows/lambda_smoke_tests.yml
@ -0,0 +1,114 @@
+name: Lambda Smoke Tests
+
+on:
+  pull_request:
+    branches:
+      - main
+
+jobs:
+  # ============================================================
+  # Ara Engine
+  # ============================================================
+  ara_engine_smoke_test:
+    uses: ./.github/workflows/_smoke_test_lambda.yml
+    with:
+      dockerfile_path: backend/docker/engine.Dockerfile
+      build_context: .
+      service_name: ara-engine
+
+  # ============================================================
+  # Address 2 UPRN
+  # ============================================================
+  address2uprn_smoke_test:
+    uses: ./.github/workflows/_smoke_test_lambda.yml
+    with:
+      dockerfile_path: backend/address2UPRN/handler/Dockerfile
+      build_context: .
+      service_name: address2uprn
+
+  # ============================================================
+  # Postcode Splitter
+  # ============================================================
+  postcode_splitter_smoke_test:
+    uses: ./.github/workflows/_smoke_test_lambda.yml
+    with:
+      dockerfile_path: backend/postcode_splitter/handler/Dockerfile
+      build_context: .
+      service_name: postcode-splitter
+
+  postcode_splitter_ddd_smoke_test:
+    uses: ./.github/workflows/_smoke_test_lambda.yml
+    with:
+      dockerfile_path: applications/postcode_splitter/Dockerfile
+      build_context: .
+      service_name: postcode-splitter-ddd
+
+  # ============================================================
+  # Bulk Address2UPRN Combiner
+  # ============================================================
+  bulk_address2uprn_combiner_smoke_test:
+    uses: ./.github/workflows/_smoke_test_lambda.yml
+    with:
+      dockerfile_path: backend/bulk_address2uprn_combiner/handler/Dockerfile
+      build_context: .
+      service_name: bulk-address2uprn-combiner
+
+  # ============================================================
+  # Condition ETL
+  # ============================================================
+  condition_etl_smoke_test:
+    uses: ./.github/workflows/_smoke_test_lambda.yml
+    with:
+      dockerfile_path: backend/condition/handler/Dockerfile
+      build_context: .
+      service_name: condition-etl
+
+  # ============================================================
+  # Categorisation
+  # ============================================================
+  categorisation_smoke_test:
+    uses: ./.github/workflows/_smoke_test_lambda.yml
+    with:
+      dockerfile_path: backend/categorisation/handler/Dockerfile
+      build_context: .
+      service_name: categorisation
+
+  # ============================================================
+  # Ordnance Survey
+  # ============================================================
+  ordnance_survey_smoke_test:
+    uses: ./.github/workflows/_smoke_test_lambda.yml
+    with:
+      dockerfile_path: backend/ordnanceSurvey/handler/Dockerfile
+      build_context: .
+      service_name: ordnance-survey
+
+  # ============================================================
+  # Pas Hub Fetcher
+  # ============================================================
+  pashub_smoke_test:
+    uses: ./.github/workflows/_smoke_test_lambda.yml
+    with:
+      dockerfile_path: backend/pashub_fetcher/handler/Dockerfile
+      build_context: .
+      service_name: pashub
+
+  # ============================================================
+  # MagicPlan
+  # ============================================================
+  magic_plan_smoke_test:
+    uses: ./.github/workflows/_smoke_test_lambda.yml
+    with:
+      dockerfile_path: backend/magic_plan/handler/Dockerfile
+      build_context: .
+      service_name: magic-plan
+
+  # ============================================================
+  # HubSpot Scraper
+  # ============================================================
+  hubspot_scraper_smoke_test:
+    uses: ./.github/workflows/_smoke_test_lambda.yml
+    with:
+      dockerfile_path: etl/hubspot/scripts/scraper/handler/Dockerfile
+      build_context: .
+      service_name: hubspot-scraper
--- a/.github/workflows/unit_tests.yml
+++ b/.github/workflows/unit_tests.yml
@ -60,3 +60,15 @@ jobs:
            -e DB_PASSWORD=test \
            -e DB_PORT=5432 \
            model-test pytest -vv -m 'not integration'
+
+      # The DDD rewrite (tests/) defines SQLModel table classes that map to the
+      # same physical tables as the legacy backend models. Both sets share the
+      # one global SQLModel.metadata, so they cannot be imported into the same
+      # pytest process. It runs as a separate invocation until the legacy
+      # models are retired. Its DB is spawned in-process by pytest-postgresql,
+      # so no DB service or env is required.
+      - name: Run DDD tests
+        run: |
+          docker run --rm \
+            --network host \
+            model-test pytest -vv tests/
--- a/.gitignore
+++ b/.gitignore
@ -121,6 +121,7 @@ celerybeat.pid

 # Environments
 .env
+.env.local
 .venv
 env/
 venv/
@ -241,6 +242,7 @@ fabric.properties
 # Locally stored data
 local_data/*
 /local_data/*
+/data/ml_training/
 etl/epc/local_data/*
 /backend/condition/sample_data/lbwf/*
 /backend/condition/sample_data/peabody/*
@ -279,6 +281,8 @@ cache/
 *.png
 *.pptx
 *.csv
+# Tracked reference CSV: SAP enum codes (gov api /api/codes) co-located with EpcPropertyData.
+!datatypes/epc/domain/epc_codes.csv
 *.xlsx
 # *.pdf
 **/Chunks/
--- a/.idea/.name
+++ b/.idea/.name
@ -0,0 +1 @@
+AGENTS.md
--- a/.idea/webResources.xml
+++ b/.idea/webResources.xml
@ -0,0 +1,14 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="WebResourcesPaths">
+    <contentEntries>
+      <entry url="file://$PROJECT_DIR$">
+        <entryData>
+          <resourceRoots>
+            <path value="file://$PROJECT_DIR$" />
+          </resourceRoots>
+        </entryData>
+      </entry>
+    </contentEntries>
+  </component>
+</project>
--- a/AGENTS.md
+++ b/AGENTS.md
@ -1,29 +0,0 @@
-
-<!-- BACKLOG.MD MCP GUIDELINES START -->
-
-<CRITICAL_INSTRUCTION>
-
-## BACKLOG WORKFLOW INSTRUCTIONS
-
-This project uses Backlog.md MCP for all task and project management activities.
-
-**CRITICAL GUIDANCE**
-
- If your client supports MCP resources, read `backlog://workflow/overview` to understand when and how to use Backlog for this project.
- If your client only supports tools or the above request fails, call `backlog.get_backlog_instructions()` to load the tool-oriented overview. Use the `instruction` selector when you need `task-creation`, `task-execution`, or `task-finalization`.
-
- **First time working here?** Read the overview resource IMMEDIATELY to learn the workflow
- **Already familiar?** You should have the overview cached ("## Backlog.md Overview (MCP)")
- **When to read it**: BEFORE creating tasks, or when you're unsure whether to track work
-
-These guides cover:
- Decision framework for when to create tasks
- Search-first workflow to avoid duplicates
- Links to detailed guides for task creation, execution, and finalization
- MCP tools reference
-
-You MUST read the overview resource to understand the complete workflow. The information is NOT summarized here.
-
-</CRITICAL_INSTRUCTION>
-
-<!-- BACKLOG.MD MCP GUIDELINES END -->
--- a/CLAUDE.md
+++ b/CLAUDE.md
@ -1,33 +1,4 @@

-<!-- BACKLOG.MD MCP GUIDELINES START -->
-
-<CRITICAL_INSTRUCTION>
-
-## BACKLOG WORKFLOW INSTRUCTIONS
-
-This project uses Backlog.md MCP for all task and project management activities.
-
-**CRITICAL GUIDANCE**
-
- If your client supports MCP resources, read `backlog://workflow/overview` to understand when and how to use Backlog for this project.
- If your client only supports tools or the above request fails, call `backlog.get_backlog_instructions()` to load the tool-oriented overview. Use the `instruction` selector when you need `task-creation`, `task-execution`, or `task-finalization`.
-
- **First time working here?** Read the overview resource IMMEDIATELY to learn the workflow
- **Already familiar?** You should have the overview cached ("## Backlog.md Overview (MCP)")
- **When to read it**: BEFORE creating tasks, or when you're unsure whether to track work
-
-These guides cover:
- Decision framework for when to create tasks
- Search-first workflow to avoid duplicates
- Links to detailed guides for task creation, execution, and finalization
- MCP tools reference
-
-You MUST read the overview resource to understand the complete workflow. The information is NOT summarized here.
-
-</CRITICAL_INSTRUCTION>
-
-<!-- BACKLOG.MD MCP GUIDELINES END -->
-
 ## Available Skills

 Five Claude Code skills are installed in this repo's dev container. Each maps to a phase of the feature lifecycle.
--- a/CONTEXT.md
+++ b/CONTEXT.md
@ -58,7 +58,7 @@ A UK postal code used to group nearby addresses; the primary search key for find
 _Avoid_: zip code, postal code

 **User Address**:
-A free-text address string provided by a user or imported from a customer dataset, before any normalisation or matching.
+A structured dataclass (`domain.addresses.user_address.UserAddress`) capturing a customer-supplied address: a free-text `user_address` line, a canonical `postcode` (sanitised on construction), and an optional `internal_reference`. The bare string sense — the raw free-text address line as it arrives from upstream ingestion, before being wrapped — remains valid when discussing CSV columns, API payloads, or other upstream contexts; in domain code, prefer the dataclass.
 _Avoid_: user input, raw address, user_inputed_address

 **Comparable Properties**:
@ -82,11 +82,11 @@ The EpcPropertyData scored by the modelling pipeline for a single Property, deri
 _Avoid_: modelling EPC, working EPC, resolved EPC, derived EPC

 **Rebaselining**:
-Re-predicting a Property's SAP, carbon emissions, and heat demand via ML so the modelling pipeline scores it against the current SAP10 methodology. Triggered when either (a) the Effective EPC was lodged under a pre-SAP10 schema (`sap_version < 10.0`), so the recorded scores reflect a superseded methodology, or (b) Site Notes / Landlord Overrides changed the physical state of the Property (walls / heating / windows / etc.) so the lodged scores no longer reflect what's installed. Both triggers may fire together. Produces Effective Performance; Lodged Performance is preserved unchanged. Does not include kWh — that is always derived deterministically by EPC Energy Derivation.
+Re-predicting a Property's SAP score, CO2 emissions, Primary Energy Intensity, space heating kWh, and hot water kWh via ML so the modelling pipeline scores it against the current SAP10 methodology. Triggered when either (a) the Effective EPC was lodged under a pre-SAP10 schema (`sap_version < 10.0`), so the recorded scores reflect a superseded methodology, or (b) Site Notes / Landlord Overrides changed the physical state of the Property (walls / heating / windows / etc.) so the lodged scores no longer reflect what's installed. Both triggers may fire together. Produces Effective Performance; Lodged Performance is preserved unchanged. kWh is included as ML targets per ADR-0007 — see [[epc-ml-transform]].
 _Avoid_: re-scoring, re-prediction, performance recomputation, refresh (for cache-freshness)

 **Baseline Performance**:
-A Property's current performance aggregate, holding both Lodged Performance and Effective Performance plus annual kWh / fuel split / bills derived from the Effective EPC. Persisted as one row; surfaced as one block in the UI.
+A Property's current performance aggregate, holding both Lodged Performance and Effective Performance plus annual space heating kWh, hot water kWh, fuel split, and bills derived from the Effective EPC — kWh values come from the EPC's recorded fields for SAP10 baselines or from ML when Rebaselining fires; bills are derived deterministically from kWh × current Fuel Rates. Persisted as one row; surfaced as one block in the UI.
 _Avoid_: baseline predictions, predicted baseline, rebaselined values

 **Lodged Performance**:
@ -97,18 +97,60 @@ _Avoid_: original performance, raw EPC values, recorded baseline
 The SAP / EPC Band / carbon emissions / heat demand the modelling pipeline actually scored against — equal to Lodged Performance when no Rebaselining trigger fires, replaced by ML output when triggered. The half of Baseline Performance that says "what we modelled".
 _Avoid_: modelled performance, rebaselined performance (only correct when rebaselining ran), scored values

+**Calculated SAP10 Performance**:
+The SAP score, EPC Band, CO2 emissions, Primary Energy Intensity, space heating kWh, and hot water kWh produced by **SAP10 Calculation** from a Property's EpcPropertyData. Distinct from Effective Performance (ML output) and Lodged Performance (gov register) during the validation phase. Surfaced alongside Effective Performance in the UI; may supersede Effective Performance in a later ADR once parity is confirmed against the cert-reported SAP across ≥1000 sample certs lodged on the calculator's target spec version (see [[sap-spec-version]]). ADR-0009 (as amended by ADR-0010).
+_Avoid_: calculator output, computed performance, worksheet performance, SAP10 output
+
+**SAP10 Calculation**:
+The process that runs the deterministic SAP 10.2 (14-03-2025 amendment) worksheet over a Property's EpcPropertyData and emits **Calculated SAP10 Performance**. Implemented by the `Sap10Calculator` service class in `domain/sap/`. Reads cert fabric/heating/geometry fields, applies the RdSAP 10 (10-06-2025) cert→input mapping, executes the 12-month heat balance per SAP 10.2 §§1-14, looks up boiler/heat-pump performance in the **PCDB** when the cert lodges a product index, and returns a `SapResult` carrying the five Calculated SAP10 Performance quantities plus a monthly breakdown and worksheet-line audit trail. Distinct from **Rebaselining**, which is ML-based. ADR-0009 originally targeted SAP 10.3 (13-01-2026); ADR-0010 retargets to SAP 10.2 (14-03-2025) until the cert corpus migrates.
+_Avoid_: SAP calculation (ambiguous with the gov calculator), SAP scoring, calculator run, SAP 10.3 calculation (active target is 10.2 — see [[sap-spec-version]])
+
+**SAP Spec Version**:
+The dated revision of the SAP specification that produced a given SAP/PEUI/CO2 value. Domain-meaningful because the same EpcPropertyData yields different `sap_score` under different spec versions — fuel-price tables, CO2 factors, PCDB references, and rating-equation deflators all change between revisions. **Lodged Performance** carries the version current when the cert was lodged (mostly SAP 10.1 / SAP 10.2 pre- and post-14-03-2025 amendment in the corpus). **Calculated SAP10 Performance** is locked to SAP 10.2 (14-03-2025). A 1-to-1 Lodged-vs-Calculated comparison therefore only makes sense within a **Validation Cohort** of certs lodged on the same spec version.
+_Avoid_: SAP version (ambiguous with the `sap_version` field on the cert, which only carries the major version like 10.2 — not the amendment date), spec revision
+
+**Validation Cohort**:
+The subset of corpus certs used to validate **SAP10 Calculation** against **Lodged Performance**, filtered to certs lodged after the calculator's target **SAP Spec Version** rolled out in commercial assessor software — currently `inspection_date ≥ 2025-07-01` (a buffer past 14-03-2025 to allow vendor rollout). Smaller than the full corpus but each cert is comparable under the same spec, so probe MAE is a clean signal of calculator-vs-spec correctness rather than spec-version mixture noise. ADR-0010.
+_Avoid_: parity cohort, validation set, corpus sample
+
+**Measure Application**:
+The process that translates an Optimised Package into cert-field changes and produces the "ending state snapshot" EpcPropertyData that Plan Phase persists. Implemented by the `MeasureApplicator` service class in `domain/sap/` (or a sibling package). Each Measure Type's translation rules (e.g. `loft_insulation` → `roof_insulation_thickness_mm = 270mm`, `ashp` → `main_heating_details[0]` replacement) live here. Pure function — does not run SAP10 Calculation itself; the caller chains `MeasureApplicator.apply(epc, package) → Sap10Calculator.calculate(post_epc)`. ADR-0009.
+_Avoid_: measure overrides (rejected during ADR-0009 grill — phantom mid-layer), package applier, retrofit simulator
+
 **EPC Energy Derivation**:
-The deterministic process that derives a Property's annual kWh, fuel split across heating, hot water, lighting, appliances and cooking, and bills from the Effective EPC — applying a UCL Correction for known EPC over/under-prediction and deducing fuel type from the SAP heating fields. No ML.
-_Avoid_: kWh prediction, baseline kWh, energy estimation
+The process that derives a Property's fuel split and annual bills from its space heating kWh and hot water kWh values plus the heating fuel deduced from SAP fields. kWh values themselves come from the EPC's recorded fields (`renewable_heat_incentive.space_heating_existing_dwelling` and `.water_heating`) for SAP10 baselines, or from ML prediction when Rebaselining fires or when scoring a post-measure state. Bills are computed deterministically from delivered kWh × current Fuel Rates + standing charges + SEG credits. The UCL Correction is no longer applied at runtime — it is folded into ML training labels (see [[epc-ml-transform]] and ADR-0007).
+_Avoid_: kWh prediction (kWh is now an ML target — see Rebaselining), baseline kWh, energy estimation

 **UCL Correction**:
-The per-band linear correction (Few et al. 2023, _Energy & Buildings_ 288 113024) applied to EPC-modelled total primary energy use intensity to align it with metered consumption. Calibrated against gas-heated, non-PV homes in England and Wales rated under SAP 2012; the current implementation extrapolates it to all properties (open question §15.14).
+The per-band linear correction (Few et al. 2023, _Energy & Buildings_ 288 113024) that aligns EPC-modelled Primary Energy Intensity with metered consumption. Folded into ML training labels at fit time (per ADR-0007) rather than applied at runtime — the trained model emits metered-equivalent PEUI directly, avoiding the discontinuities at EPC band boundaries that arose when the per-band linear correction was applied post-prediction. Calibrated against gas-heated, non-PV homes in England and Wales rated under SAP 2012; the current implementation extrapolates it to all properties (open question §15.14).
 _Avoid_: UCL adjustment, energy correction, metered correction

 **EPC Anomaly Flag**:
 A per-field indicator that a Property's value for an EPC field differs significantly from Comparable Properties; advisory only — surfaces in the UI to prompt user review, does not block modelling.
 _Avoid_: outlier, mismatch, divergence flag

+### ML training
+
+**EPC ML Transform**:
+The versioned class at `packages/domain/src/domain/ml/transform.py` that maps an EpcPropertyData to a fixed-width row of features + targets. The single ML-data contract between this repo and the AutoGluon training repo. Owns the windows compression, building-parts compression, Top-N Code Taxonomy, and UCL folding decisions. Each version is tagged on the deployed scoring lambda; a mismatch is a deploy-time fail.
+_Avoid_: feature builder, ML mapper, EPC vectoriser
+
+**Feature Schema Version**:
+The semver version of the EPC ML Transform (e.g. `0.1.0`), included in the parquet output path and the deployed scoring lambda's tag. MAJOR bump when columns are removed or renamed; MINOR when optional columns are added; PATCH for non-behavioural fixes.
+_Avoid_: transform version, schema version (overloaded with the SAP RdSAP schema version on EPCs), model version
+
+**Primary Energy Intensity** (**PEUI**):
+A Property's total annual primary energy use per square metre of floor area (kWh/m²/yr), the SAP10 quantity recorded as `energy_consumption_current` on the EPC. Covers all end uses (heating, hot water, lighting, appliances, cooking) weighted by SAP primary energy factors per fuel. The quantity the UCL Correction aligns to metered consumption.
+_Avoid_: heat demand (which colloquially means the building's space heating thermal requirement — a distinct concept), energy demand, total energy use, kWh per square metre
+
+**PV Capacity Source**:
+A flag on the EPC ML Transform feature set indicating whether a Property's PV capacity is `measured` (from `sap_energy_source.photovoltaic_supply[].peak_power`), `estimated_from_roof_area` (the `percent_roof_area` fallback used when the surveyor could not confirm array configuration), or `none` (no PV present). Lets the model weight the correct capacity signal per property.
+_Avoid_: PV source, PV configuration type, solar source
+
+**Top-N Code Taxonomy**:
+The empirical top-N SAP code list (covering ~95% of mass on the training sample) committed by the EPC ML Transform for each list-aggregated categorical field (`wall_construction`, `glazing_type`, `frame_material`, etc.). Rare codes go into a per-field `_other` bucket. The taxonomy is locked at each Feature Schema Version; changes warrant a MINOR bump (adding) or MAJOR bump (removing codes).
+_Avoid_: code list, code dictionary, vocab
+
 ### Reference data

 **Fuel Rates**:
@ -214,8 +256,8 @@ _Avoid_: API key, auth token, secret
 - A **UPRN** identifies a physical dwelling permanently; it does not change when the property changes owner — but each portfolio gets its own **Property** keyed against it.
 - When a **Property** has both **Site Notes** and a public **EPC**, the newer of the two derives the **Effective EPC**. **Landlord Overrides** apply only when the **EPC** is the source — never when **Site Notes** are.
 - A Property's **Baseline Performance** holds two halves: **Lodged Performance** (the gov register's SAP / band / carbon / heat) and **Effective Performance** (what the modelling pipeline scored against). The two are equal unless **Rebaselining** fires.
- **Rebaselining** produces **Effective Performance** by ML re-prediction when either (a) the Effective EPC was lodged under a pre-SAP10 schema, or (b) the Effective EPC's physical state diverges from the lodged EPC. **Lodged Performance** is never overwritten.
- **EPC Energy Derivation** contributes the annual kWh, fuel split, and bills on every Property unconditionally, reading current **Fuel Rates** and **Carbon Factors** from their respective repos.
+- **Rebaselining** produces **Effective Performance** by ML re-prediction across SAP score, CO2 emissions, Primary Energy Intensity, space heating kWh, and hot water kWh, when either (a) the Effective EPC was lodged under a pre-SAP10 schema, or (b) the Effective EPC's physical state diverges from the lodged EPC. **Lodged Performance** is never overwritten.
+- **EPC Energy Derivation** derives **fuel split** and **bills** from kWh values (sourced from the EPC's `renewable_heat_incentive` fields for baseline SAP10 properties, or from ML when Rebaselining fires), reading current **Fuel Rates** and **Carbon Factors** from their respective repos.
 - The **EPC Prediction Service** uses **Comparable Properties** for both gap-filling and producing **EPC Anomaly Flags**.
 - A **Scenario** carries one or more ordered **Scenario Phases**. Triggering the model against N Scenarios produces N **Plans** per Property; each Plan carries an ordered list of **Plan Phases** matching the Scenario's shape.
 - Each **Plan Phase** holds its **Optimised Package**, the ending state snapshot, and any **Rolled-over Options** that flow as candidates into the next Plan Phase. A single-phase Scenario is one Scenario Phase with all measure types allowed; the same machinery handles it.
@ -227,7 +269,7 @@ _Avoid_: API key, auth token, secret

 > **Dev:** "A landlord uploads a corrected boiler for one of their properties. What happens?"
 >
-> **Domain expert:** "That's a **Landlord Override** on the heating fields. Save it against the **Property**. The **Effective EPC** has changed, so **Rebaselining** runs to re-predict SAP / carbon / heat, and **EPC Energy Derivation** re-runs to update kWh / bills based on the new fuel deduction. With fresh **Baseline Performance** we regenerate **Recommendations**."
+> **Domain expert:** "That's a **Landlord Override** on the heating fields. Save it against the **Property**. The **Effective EPC** has changed, so **Rebaselining** runs to re-predict SAP / carbon / PEUI / space heating kWh / hot water kWh, and **EPC Energy Derivation** re-runs to update the fuel split and bills based on the new kWh values and fuel deduction. With fresh **Baseline Performance** we regenerate **Recommendations**."

 > **Dev:** "What if the same Property also has Site Notes?"
 >
@ -255,7 +297,7 @@ _Avoid_: API key, auth token, secret
 - **"energy assessment"** in the existing codebase (`energy_assessment_functions`, `energy_assessments_by_uprn`) refers to what is now canonically called **Site Notes**. New code uses **Site Notes**.
 - **"patch"** / `patch_epc` in the existing codebase has been merged into **Landlord Overrides**; the original concept is deprecated.
 - **"already_installed measures"** in the existing codebase is likely subsumed by **Landlord Overrides** ("we have a heat pump now" → override the heating fields). Final call deferred to implementation.
- **"address"** appears as both the raw **User Address** (free-text) and a structured field on an **EPC Search Result** (normalised lines). Always qualify: "user address" vs "EPC address" or "address line 1".
+- **"address"** appears as both the raw **User Address** (free-text from customer data, or the structured `UserAddress` dataclass that wraps it) and a structured field on an **EPC Search Result** (normalised lines). Always qualify: "user address" vs "EPC address" or "address line 1". Within `domain/`, **User Address** specifically means the `UserAddress` dataclass; in upstream ingestion contexts (CSV columns, SQS payloads) it can still mean the raw string sense.
 - **"score"** is used for `AddressMatch.score()` output, the `lexiscore` column, and informally. Prefer **Lexiscore** in domain discussions; reserve "score" for method-level code comments.
 - **"user_inputed_address"** in `backend/address2UPRN/main.py` is a misspelling and a synonym for **User Address** — the canonical term. New code should use `user_address`.
 - **"EPC"** is overloaded as both the document and the rating band letter. Use **EPC** for the document, **EPC Band** for the letter.
--- a/Dockerfile.test.dockerignore
+++ b/Dockerfile.test.dockerignore
@ -4,7 +4,7 @@ model_data/local_data/
 backend/node_modules/
 backend/.idea/
 backend/.env
-infrastructure/
+deployment/
 data_collection/
 node_modules/
 conservation_areas/
--- a/services/ara/tests/init.py
+++ b/services/ara/tests/init.py
--- a/applications/postcode_splitter/Dockerfile
+++ b/applications/postcode_splitter/Dockerfile
@ -0,0 +1,34 @@
+FROM public.ecr.aws/lambda/python:3.11
+
+# Postgres host/port/database are baked into the image at build time from
+# the deploy workflow's --build-arg values (GitHub Actions DEV_DB_* secrets),
+# mirroring backend/postcode_splitter/handler/Dockerfile. They map onto the
+# POSTGRES_* names PostgresConfig.from_env reads. Username/password are NOT
+# baked in -- Terraform injects those as Lambda env vars from Secrets Manager.
+ARG DEV_DB_HOST
+ARG DEV_DB_PORT
+ARG DEV_DB_NAME
+
+ENV POSTGRES_HOST=${DEV_DB_HOST}
+ENV POSTGRES_PORT=${DEV_DB_PORT}
+ENV POSTGRES_DATABASE=${DEV_DB_NAME}
+
+WORKDIR /var/task
+
+COPY applications/postcode_splitter/requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+# Copy the layered source the handler imports from. The new splitter pulls
+# only DDD-shaped packages — no pandas, no legacy backend/.
+COPY domain/ domain/
+COPY infrastructure/ infrastructure/
+COPY orchestration/ orchestration/
+COPY repositories/ repositories/
+COPY utilities/ utilities/
+COPY applications/ applications/
+
+# Place the handler at the Lambda task root so the runtime can resolve
+# ``main.handler`` without an extra package prefix.
+COPY applications/postcode_splitter/handler.py /var/task/main.py
+
+CMD ["main.handler"]
--- a/applications/postcode_splitter/init.py
+++ b/applications/postcode_splitter/init.py
--- a/applications/postcode_splitter/handler.py
+++ b/applications/postcode_splitter/handler.py
@ -0,0 +1,52 @@
+from __future__ import annotations
+
+import os
+from typing import Any
+
+import boto3
+
+from applications.postcode_splitter.postcode_splitter_trigger_body import (
+    PostcodeSplitterTriggerBody,
+)
+from infrastructure.address2uprn_queue_client import Address2UprnQueueClient
+from infrastructure.csv_s3_client import CsvS3Client
+from orchestration.postcode_splitter_orchestrator import PostcodeSplitterOrchestrator
+from orchestration.task_orchestrator import TaskOrchestrator
+from repositories.user_address.user_address_csv_s3_repository import (
+    UserAddressCsvS3Repository,
+)
+from utilities.aws_lambda.subtask_handler import subtask_handler
+
+
+@subtask_handler()
+def handler(
+    body: dict[str, Any], context: Any, task_orchestrator: TaskOrchestrator
+) -> dict[str, list[str]]:
+    trigger = PostcodeSplitterTriggerBody.model_validate(body)
+
+    bucket = os.environ["S3_BUCKET_NAME"]
+    queue_url = os.environ["ADDRESS2UPRN_QUEUE_URL"]
+
+    # boto3.client is overloaded per-service in the installed stubs; cast
+    # to Any so the strict-mode checker treats it as opaque.
+    boto3_client: Any = boto3.client  # pyright: ignore[reportUnknownMemberType, reportUnknownVariableType]
+    boto_s3: Any = boto3_client("s3")
+    boto_sqs: Any = boto3_client("sqs")
+
+    csv_client = CsvS3Client(boto_s3, bucket)
+    user_address_repo = UserAddressCsvS3Repository(csv_client, bucket)
+    queue_client = Address2UprnQueueClient(boto_sqs, queue_url)
+
+    splitter = PostcodeSplitterOrchestrator(
+        task_orchestrator=task_orchestrator,
+        user_address_repo=user_address_repo,
+        queue_client=queue_client,
+    )
+
+    child_ids = splitter.split_and_dispatch(
+        parent_task_id=trigger.task_id,
+        parent_subtask_id=trigger.sub_task_id,
+        input_s3_uri=trigger.s3_uri,
+    )
+
+    return {"child_subtask_ids": [str(cid) for cid in child_ids]}
--- a/applications/postcode_splitter/local_handler/.env.local.example
+++ b/applications/postcode_splitter/local_handler/.env.local.example
@ -0,0 +1,34 @@
+# Local-test environment for the postcode_splitter Lambda.
+#
+#   cp .env.local.example .env.local   then fill in the values below.
+#
+# .env.local is gitignored. The container hits REAL AWS and a REAL Postgres,
+# so every value here points at infrastructure that actually exists.
+#
+# NOTE: the new DDD code uses different env var names than the repo root
+# .env. The mapping (root .env name -> var here) is given per section.
+# Keep comments on their own lines — docker-compose's env_file parser folds a
+# trailing "# ..." into the value.
+
+# --- Postgres (orchestration/default_orchestrator -> PostgresConfig.from_env) ---
+# POSTGRES_HOST <- DB_HOST, PORT <- DB_PORT, USERNAME <- DB_USERNAME,
+# PASSWORD <- DB_PASSWORD, DATABASE <- DB_NAME.
+POSTGRES_HOST=
+POSTGRES_PORT=5432
+POSTGRES_USERNAME=
+POSTGRES_PASSWORD=
+POSTGRES_DATABASE=
+# POSTGRES_DRIVER=psycopg2   (optional; defaults to psycopg2)
+
+# --- Handler config (applications/postcode_splitter/handler.py) ---
+# S3_BUCKET_NAME: bucket holding the input address CSV (root .env: DATA_BUCKET).
+# ADDRESS2UPRN_QUEUE_URL: SQS queue the splitter fans batches out to; not in
+# the root .env (Terraform sets it in prod).
+S3_BUCKET_NAME=
+ADDRESS2UPRN_QUEUE_URL=
+
+# --- AWS credentials for boto3 (S3 + SQS clients) ---
+AWS_ACCESS_KEY_ID=
+AWS_SECRET_ACCESS_KEY=
+AWS_DEFAULT_REGION=eu-west-2
+# AWS_SESSION_TOKEN=   (only if using temporary/SSO credentials)
--- a/applications/postcode_splitter/local_handler/docker-compose.yml
+++ b/applications/postcode_splitter/local_handler/docker-compose.yml
@ -0,0 +1,9 @@
+services:
+  postcode-splitter:
+    build:
+      context: ../../../
+      dockerfile: applications/postcode_splitter/Dockerfile
+    ports:
+      - "9001:8080"
+    env_file:
+      - .env.local
--- a/applications/postcode_splitter/local_handler/invoke_local_lambda.py
+++ b/applications/postcode_splitter/local_handler/invoke_local_lambda.py
@ -0,0 +1,28 @@
+#!/usr/bin/env python3
+import json
+import requests
+
+HOST = "localhost"
+PORT = "9001"
+
+LAMBDA_URL = f"http://{HOST}:{PORT}/2015-03-31/functions/function/invocations"
+
+payload = {
+    "Records": [
+        {
+            "body": json.dumps(
+                {
+                    "task_id": "e295d89b-a7c5-4a9a-8b4e-b405fab1f298",
+                    "sub_task_id": "f4a9944f-41f0-4a33-8669-5016ec574068",
+                    "s3_uri": "s3://retrofit-data-dev/bulk_onboarding_inputs/hyde2 (1).csv",
+                }
+            )
+        }
+    ]
+}
+
+response = requests.post(LAMBDA_URL, json=payload)
+
+print("Status code:", response.status_code)
+print("Response:")
+print(response.text)
--- a/applications/postcode_splitter/local_handler/run_local.sh
+++ b/applications/postcode_splitter/local_handler/run_local.sh
@ -0,0 +1,12 @@
+#!/usr/bin/env bash
+set -euo pipefail
+cd "$(dirname "$0")"
+
+if [ ! -f .env.local ]; then
+  cp .env.local.example .env.local
+  echo "Created .env.local from the template — fill it in, then re-run." >&2
+  exit 1
+fi
+
+docker compose build --no-cache
+docker compose up --force-recreate
--- a/applications/postcode_splitter/postcode_splitter_trigger_body.py
+++ b/applications/postcode_splitter/postcode_splitter_trigger_body.py
@ -0,0 +1,11 @@
+from uuid import UUID
+
+from pydantic import BaseModel, ConfigDict
+
+
+class PostcodeSplitterTriggerBody(BaseModel):
+    model_config = ConfigDict(extra="allow")
+
+    task_id: UUID
+    sub_task_id: UUID
+    s3_uri: str
--- a/applications/postcode_splitter/requirements.txt
+++ b/applications/postcode_splitter/requirements.txt
@ -0,0 +1,4 @@
+boto3
+pydantic
+sqlmodel
+psycopg2-binary
--- a/asset_list/app.py
+++ b/asset_list/app.py
@ -79,23 +79,23 @@ def app():
    """

    data_folder = "/workspaces/model/asset_list"
-    data_filename = "input.xlsx"
-    sheet_name = "Handovers"
-    postcode_column = "POSTCODE"
-    address1_column = "Full Addres"
+    data_filename = "hyde.xlsx"
+    sheet_name = "AddressProfilingResults"
+    postcode_column = "Postcode"
+    address1_column = "Address"
    address1_method = None
-    fulladdress_column = "Full Addres"
+    fulladdress_column = "Postcode"
    address_cols_to_concat = []
    missing_postcodes_method = None
    landlord_year_built = None
-    landlord_os_uprn = "domna_found_uprn"
-    landlord_property_type = "PROPERTY TYPE"  # Good to include if landlord gave
-    landlord_built_form = "Type Description"  # Good to include if landlord gave
+    landlord_os_uprn = None
+    landlord_property_type = "Property Type"  # Good to include if landlord gave
+    landlord_built_form = None  # Good to include if landlord gave
    landlord_wall_construction = None
    landlord_roof_construction = None
    landlord_heating_system = None
    landlord_existing_pv = None
-    landlord_property_id = "PROP REF"
+    landlord_property_id = "Organisation Reference"
    landlord_sap = None
    outcomes_filename = None
    outcomes_sheetname = None
@ -469,8 +469,3 @@ def app():
                    writer, sheet_name="Duplicate Properties", index=False
                )

-
-
-
-for key,value in dict.items():
-    lsakjfldsa
--- a/backend/address2UPRN/handler/Dockerfile
+++ b/backend/address2UPRN/handler/Dockerfile
@ -6,11 +6,13 @@ ARG DEV_DB_HOST
 ARG DEV_DB_PORT
 ARG DEV_DB_NAME
 ARG EPC_AUTH_TOKEN
+ARG OPEN_EPC_API_TOKEN

 ENV DB_HOST=${DEV_DB_HOST}
 ENV DB_PORT=${DEV_DB_PORT}
 ENV DB_NAME=${DEV_DB_NAME}
 ENV EPC_AUTH_TOKEN=${EPC_AUTH_TOKEN}
+ENV OPEN_EPC_API_TOKEN=${OPEN_EPC_API_TOKEN}


 # Set working directory (Lambda task root)
--- a/backend/address2UPRN/handler/requirements.txt
+++ b/backend/address2UPRN/handler/requirements.txt
@ -8,4 +8,5 @@ boto3==1.35.44
 sqlmodel
 sqlalchemy==2.0.36
 psycopg2-binary==2.9.10
-pydantic-settings==2.6.0
+pydantic-settings==2.6.0
+httpx
--- a/backend/address2UPRN/tests/test_csv.py
+++ b/backend/address2UPRN/tests/test_csv.py
@ -12,12 +12,21 @@ FIXTURE_PATH = Path(__file__).parent / "test_data.csv"
 # Each parametrized case fires at least one EPC request; without throttling,
 # GitHub-hosted runners burst fast enough to hit 429s.
 EPC_THROTTLE_SECONDS = 1.0
+EPC_LONG_PAUSE_EVERY = 100
+EPC_LONG_PAUSE_SECONDS = 5.0
+
+_epc_request_count = 0


@pytest.fixture(autouse=True)
 def _throttle_epc_requests():
+    global _epc_request_count
    yield
-    time.sleep(EPC_THROTTLE_SECONDS)
+    _epc_request_count += 1
+    if _epc_request_count % EPC_LONG_PAUSE_EVERY == 0:
+        time.sleep(EPC_LONG_PAUSE_SECONDS)
+    else:
+        time.sleep(EPC_THROTTLE_SECONDS)


 def load_test_cases():
--- a/backend/address2UPRN/tests/test_data.csv
+++ b/backend/address2UPRN/tests/test_data.csv
@ -364,4 +364,7 @@ FLAT B 158 LEAHURST ROAD,SE13 5NL,100021976974
 164a Victoria Square,M4 5FA,77211315
 165a Victoria Square,M4 5FA,77211316
 166a Victoria Square,M4 5FA,None
-"FLAT 3; 42 MORETON ROAD, SOUTH CROYDON, SURREY",CR2 7DL,None
+"FLAT 3; 42 MORETON ROAD, SOUTH CROYDON, SURREY",CR2 7DL,None
+71A  Stoneleigh Avenue,NE12 8NP,None
+71B  Stoneleigh Avenue,NE12 8NP,None
+71  Stoneleigh Avenue,NE12 8NP,47086009
--- a/backend/app/config.py
+++ b/backend/app/config.py
@ -86,6 +86,8 @@ class Settings(BaseSettings):
    # Pas Hub
    PASHUB_EMAIL: Optional[str] = None
    PASHUB_PASSWORD: Optional[str] = None
+    PASHUB_COORDINATION_EMAIL: Optional[str] = None
+    PASHUB_COORDINATION_PASSWORD: Optional[str] = None

    # Optional AWS creds (only required in local)
    AWS_ACCESS_KEY_ID: Optional[str] = None
--- a/backend/app/db/functions/magic_plan_functions.py
+++ b/backend/app/db/functions/magic_plan_functions.py
@ -14,15 +14,15 @@ from backend.app.db.models.magic_plan import (
 )


-def save_plan(session: Session, plan: Plan) -> None:
-    plan_id: int = _upsert_plan(session, plan)
+def save_plan(session: Session, plan: Plan, uploaded_file_id: int) -> None:
+    plan_id: int = _upsert_plan(session, plan, uploaded_file_id)
    _delete_children(session, plan_id)
    floor_ids: list[int] = _insert_floors(session, plan.floors, plan_id)
    room_ids: list[int] = _insert_rooms(session, plan.floors, floor_ids)
    _insert_windows_and_doors(session, plan.floors, room_ids)


-def _upsert_plan(session: Session, plan: Plan) -> int:
+def _upsert_plan(session: Session, plan: Plan, uploaded_file_id: int) -> int:
    stmt = (
        pg_insert(MagicPlanPlanModel)
        .values(
@ -30,6 +30,7 @@ def _upsert_plan(session: Session, plan: Plan) -> int:
            name=plan.name,
            address=plan.address,
            postcode=plan.postcode,
+            uploaded_file_id=uploaded_file_id,
        )
        .on_conflict_do_update(
            index_elements=["magic_plan_uid"],
@ -37,6 +38,7 @@ def _upsert_plan(session: Session, plan: Plan) -> int:
                "name": plan.name,
                "address": plan.address,
                "postcode": plan.postcode,
+                "uploaded_file_id": uploaded_file_id,
            },
        )
        .returning(col(MagicPlanPlanModel.id))
--- a/backend/app/db/functions/tests/test_magic_plan_functions.py
+++ b/backend/app/db/functions/tests/test_magic_plan_functions.py
@ -36,7 +36,7 @@ def _count(session: Session, model: type[SQLModel]) -> int:

 def test_plan_row_present_after_save(db_session: Session, domain_plan: Plan) -> None:
    # Act
-    save_plan(db_session, domain_plan)
+    save_plan(db_session, domain_plan, 1)
    # Assert
    assert _count(db_session, MagicPlanPlanModel) == 1

@ -45,7 +45,7 @@ def test_floor_count_matches_domain(db_session: Session, domain_plan: Plan) -> N
    # Arrange
    expected = len(domain_plan.floors)
    # Act
-    save_plan(db_session, domain_plan)
+    save_plan(db_session, domain_plan, 1)
    # Assert
    assert _count(db_session, MagicPlanFloorModel) == expected

@ -54,7 +54,7 @@ def test_room_count_matches_domain(db_session: Session, domain_plan: Plan) -> No
    # Arrange
    expected = sum(len(f.rooms) for f in domain_plan.floors)
    # Act
-    save_plan(db_session, domain_plan)
+    save_plan(db_session, domain_plan, 1)
    # Assert
    assert _count(db_session, MagicPlanRoomModel) == expected

@ -63,7 +63,7 @@ def test_window_count_matches_domain(db_session: Session, domain_plan: Plan) ->
    # Arrange
    expected = sum(len(r.windows) for f in domain_plan.floors for r in f.rooms)
    # Act
-    save_plan(db_session, domain_plan)
+    save_plan(db_session, domain_plan, 1)
    # Assert
    assert _count(db_session, MagicPlanWindowModel) == expected

@ -72,15 +72,15 @@ def test_door_count_matches_domain(db_session: Session, domain_plan: Plan) -> No
    # Arrange
    expected = sum(len(r.doors) for f in domain_plan.floors for r in f.rooms)
    # Act
-    save_plan(db_session, domain_plan)
+    save_plan(db_session, domain_plan, 1)
    # Assert
    assert _count(db_session, MagicPlanDoorModel) == expected


 def test_save_plan_idempotent(db_session: Session, domain_plan: Plan) -> None:
    # Act — call twice within the same session
-    save_plan(db_session, domain_plan)
-    save_plan(db_session, domain_plan)
+    save_plan(db_session, domain_plan, 1)
+    save_plan(db_session, domain_plan, 1)
    # Assert — same row counts as a single call
    assert _count(db_session, MagicPlanPlanModel) == 1
    assert _count(db_session, MagicPlanFloorModel) == len(domain_plan.floors)
@ -93,3 +93,23 @@ def test_save_plan_idempotent(db_session: Session, domain_plan: Plan) -> None:
    assert _count(db_session, MagicPlanDoorModel) == sum(
        len(r.doors) for f in domain_plan.floors for r in f.rooms
    )
+
+
+def test_uploaded_file_id_stored_after_save(db_session: Session, domain_plan: Plan) -> None:
+    # Act
+    save_plan(db_session, domain_plan, 1)
+    # Assert
+    row = db_session.execute(select(MagicPlanPlanModel)).scalar_one()
+    assert row.uploaded_file_id == 1
+
+
+def test_save_plan_updates_uploaded_file_id_on_reingest(
+    db_session: Session, domain_plan: Plan
+) -> None:
+    # Arrange
+    save_plan(db_session, domain_plan, 1)
+    # Act
+    save_plan(db_session, domain_plan, 2)
+    # Assert
+    row = db_session.execute(select(MagicPlanPlanModel)).scalar_one()
+    assert row.uploaded_file_id == 2
--- a/backend/app/db/models/epc_property.py
+++ b/backend/app/db/models/epc_property.py
@ -225,7 +225,7 @@ class EpcPropertyModel(SQLModel, table=True):
            pressure_test_certificate_number=data.pressure_test_certificate_number,
            percent_draughtproofed=data.percent_draughtproofed,
            insulated_door_u_value=data.insulated_door_u_value,
-            multiple_glazed_proportion=data.multiple_glazed_propertion,
+            multiple_glazed_proportion=data.multiple_glazed_proportion,
            windows_transmission_u_value=(
                data.windows_transmission_details.u_value
                if data.windows_transmission_details
@ -501,7 +501,7 @@ class EpcBuildingPartModel(SQLModel, table=True):
        aw2 = part.sap_alternative_wall_2
        return cls(
            epc_property_id=epc_property_id,
-            identifier=part.identifier,
+            identifier=part.identifier.value,
            construction_age_band=part.construction_age_band,
            wall_construction=str(part.wall_construction),
            wall_insulation_type=str(part.wall_insulation_type),
--- a/backend/app/db/models/magic_plan.py
+++ b/backend/app/db/models/magic_plan.py
@ -11,6 +11,7 @@ class MagicPlanPlanModel(SQLModel, table=True):
    name: Optional[str] = None
    address: Optional[str] = None
    postcode: Optional[str] = None
+    uploaded_file_id: Optional[int] = Field(default=None)


 class MagicPlanFloorModel(SQLModel, table=True):
--- a/backend/app/db/models/uploaded_file.py
+++ b/backend/app/db/models/uploaded_file.py
@ -18,10 +18,14 @@ class FileTypeEnum(enum.Enum):
    ECMK_RD_SAP_SITE_NOTE = "ecmk_rd_sap_site_note"
    ECMK_SURVEY_XML = "ecmk_survey_xml"
    MAGIC_PLAN_JSON = "magic_plan_json"
+    IMPROVEMENT_OPTION_EVALUATION = "improvement_option_evaluation"
+    MEDIUM_TERM_IMPROVEMENT_PLAN = "medium_term_improvement_plan"
+    RETROFIT_DESIGN_DOC = "retrofit_design_doc"


 class FileSourceEnum(enum.Enum):
    PAS_HUB = "pas hub"
+    COORDINATION_HUB = "coordination_hub"
    SHAREPOINT = "sharepoint"
    HUBSPOT = "hubspot"
    ECMK = "ecmk"
--- a/backend/condition/handler/Dockerfile
+++ b/backend/condition/handler/Dockerfile
@ -32,6 +32,7 @@ COPY utils/ utils/
 COPY backend/condition/ backend/condition/

 COPY backend/app/db/models/condition.py backend/app/db/models/condition.py
+COPY backend/app/db/base.py backend/app/db/base.py
 COPY backend/app/db/connection.py backend/app/db/connection.py
 COPY backend/app/config.py backend/app/config.py

--- a/backend/documents_parser/elmhurst_extractor.py
+++ b/backend/documents_parser/elmhurst_extractor.py
@ -3,9 +3,11 @@ from datetime import date, datetime
 from typing import List, Optional

 from datatypes.epc.surveys.elmhurst_site_notes import (
+    AlternativeWall,
    BathsAndShowers,
    BuildingPartDimensions,
    ElmhurstSiteNotes,
+    ExtensionPart,
    FloorDetails,
    FloorDimension,
    Lighting,
@ -14,6 +16,8 @@ from datatypes.epc.surveys.elmhurst_site_notes import (
    PropertyDetails,
    Renewables,
    RoofDetails,
+    RoomInRoof,
+    RoomInRoofSurface,
    Shower,
    SurveyorInfo,
    VentilationAndCooling,
@ -79,6 +83,36 @@ class ElmhurstSiteNotesExtractor:
        except ValueError:
            return ""

+    # Multi-bp helpers: Summary PDFs subdivide §4/§7/§8/§9 with explicit
+    # "Main Property" / "1st Extension" / "2nd Extension" headers. The
+    # existing single-bp fixture also carries "Main Property" as a header
+    # before the body. This helper splits a section into per-bp chunks.
+    _BP_HEADER_RE = re.compile(
+        r"^(Main Property|\d+(?:st|nd|rd|th) Extension)\s*$",
+        re.MULTILINE,
+    )
+
+    def _split_section_by_bp(self, section_text: str) -> List[tuple[str, str]]:
+        """Split a section's text into per-bp subsections.
+
+        Returns ``[(bp_name, body), ...]`` in document order. Body is
+        the text between this bp's header and the next bp's header
+        (exclusive). Returns ``[("Main Property", section_text)]`` when
+        no headers are found (defensive fallback for malformed PDFs).
+        """
+        matches = list(self._BP_HEADER_RE.finditer(section_text))
+        if not matches:
+            return [("Main Property", section_text)]
+        result: List[tuple[str, str]] = []
+        for i, m in enumerate(matches):
+            name = m.group(1)
+            body_start = m.end()
+            body_end = (
+                matches[i + 1].start() if i + 1 < len(matches) else len(section_text)
+            )
+            result.append((name, section_text[body_start:body_end]))
+        return result
+
    def _section_lines(self, start: str, end: str) -> List[str]:
        text = self._between(start, end)
        return [l.strip() for l in text.splitlines() if l.strip()]
@ -151,14 +185,13 @@ class ElmhurstSiteNotesExtractor:
        m = re.search(r"1\.0 Property type:\n[^\n]+\n([^\n]+)", self._text)
        return " ".join(m.group(1).strip().split()) if m else ""

-    def _extract_dimensions(self) -> BuildingPartDimensions:
-        dim_type = self._str_val("Dimension type")
-        section = self._between("4.0 Dimensions:", "5.0 Conservatory:")
-        floor_matches = re.findall(
+    def _floors_from_dimensions_body(self, body: str) -> List[FloorDimension]:
+        """Parse FloorDimension entries from a single bp's §4 body."""
+        matches = re.findall(
            r"([A-Za-z ]+Floor):\n([\d.]+)\n([\d.]+)\n([\d.]+)\n([\d.]+)",
-            section,
+            body,
        )
-        floors = [
+        return [
            FloorDimension(
                name=name.strip(),
                area_m2=float(area),
@ -166,12 +199,22 @@ class ElmhurstSiteNotesExtractor:
                heat_loss_perimeter_m=float(hlp),
                party_wall_length_m=float(pwl),
            )
-            for name, area, height, hlp, pwl in floor_matches
+            for name, area, height, hlp, pwl in matches
        ]
-        return BuildingPartDimensions(dimension_type=dim_type, floors=floors)

-    def _extract_walls(self) -> WallDetails:
-        lines = self._section_lines("7.0 Walls:", "8.0 Roofs:")
+    def _extract_dimensions(self) -> BuildingPartDimensions:
+        """Main-property dimensions only. Extensions are picked up by
+        `_extract_extensions`."""
+        dim_type = self._str_val("Dimension type")
+        section = self._between("4.0 Dimensions:", "5.0 Conservatory:")
+        bp_chunks = self._split_section_by_bp(section)
+        main_body = bp_chunks[0][1] if bp_chunks else section
+        return BuildingPartDimensions(
+            dimension_type=dim_type,
+            floors=self._floors_from_dimensions_body(main_body),
+        )
+
+    def _wall_details_from_lines(self, lines: List[str]) -> WallDetails:
        thickness_raw = self._local_val(lines, "Wall Thickness")
        thickness_mm = (
            int(thickness_raw.split()[0]) if thickness_raw else None
@ -183,23 +226,81 @@ class ElmhurstSiteNotesExtractor:
            u_value_known=self._local_bool(lines, "U-value Known"),
            party_wall_type=self._local_str(lines, "Party Wall Type"),
            thickness_mm=thickness_mm,
+            alternative_walls=self._alternative_walls_from_lines(lines),
        )

-    def _extract_roof(self) -> RoofDetails:
-        lines = self._section_lines("8.0 Roofs:", "8.1 Rooms in Roof:")
+    def _alternative_walls_from_lines(self, lines: List[str]) -> List[AlternativeWall]:
+        """Parse up to two §7 "Alternative Wall N" sub-area lodgements.
+        The Elmhurst Summary PDF lays them out as a contiguous block of
+        prefixed labels ("Alternative Wall 1 Area", "Alternative Wall 1
+        Type", …); we read each numbered slot independently and drop
+        slots whose Area is missing/zero."""
+        result: List[AlternativeWall] = []
+        for n in (1, 2):
+            area_raw = self._local_val(lines, f"Alternative Wall {n} Area")
+            if not area_raw:
+                continue
+            try:
+                area = float(area_raw.split()[0])
+            except (ValueError, IndexError):
+                continue
+            if area <= 0:
+                continue
+            thickness_raw = self._local_val(lines, f"Alternative Wall {n} Thickness")
+            thickness_mm = (
+                int(thickness_raw.split()[0])
+                if thickness_raw and thickness_raw.split()[0].isdigit()
+                else None
+            )
+            result.append(AlternativeWall(
+                area_m2=area,
+                wall_type=self._local_str(lines, f"Alternative Wall {n} Type"),
+                insulation=self._local_str(lines, f"Alternative Wall {n} Insulation"),
+                thickness_unknown=self._local_bool(
+                    lines, f"Alternative Wall {n} Thickness Unknown"
+                ),
+                thickness_mm=thickness_mm,
+                u_value_known=self._local_bool(
+                    lines, f"Alternative Wall {n} U-value Known"
+                ),
+            ))
+        return result
+
+    def _extract_walls(self) -> WallDetails:
+        section = self._between("7.0 Walls:", "8.0 Roofs:")
+        bp_chunks = self._split_section_by_bp(section)
+        main_body = bp_chunks[0][1] if bp_chunks else section
+        lines = [l.strip() for l in main_body.splitlines() if l.strip()]
+        return self._wall_details_from_lines(lines)
+
+    def _roof_details_from_lines(self, lines: List[str]) -> RoofDetails:
        thickness_raw = self._local_val(lines, "Insulation Thickness")
        thickness_mm = (
-            int(thickness_raw.split()[0]) if thickness_raw else None
+            int(thickness_raw.split()[0]) if thickness_raw and thickness_raw.split()[0].isdigit() else None
        )
+        insulation = self._local_str(lines, "Insulation")
+        # The Summary PDF omits the "Insulation Thickness" line entirely
+        # when no retrofit insulation is lodged (e.g. "Insulation: N None"
+        # on 000516). Treat that case as 0 mm so the cascade picks Table
+        # 16 row 0 (U=2.30) rather than the age-band default — the
+        # surveyor explicitly recorded "None".
+        if thickness_mm is None and insulation.split(" ", 1)[0] == "N":
+            thickness_mm = 0
        return RoofDetails(
            roof_type=self._local_str(lines, "Type"),
-            insulation=self._local_str(lines, "Insulation"),
+            insulation=insulation,
            u_value_known=self._local_bool(lines, "U-value Known"),
            insulation_thickness_mm=thickness_mm,
        )

-    def _extract_floor(self) -> FloorDetails:
-        lines = self._section_lines("9.0 Floors:", "10.0 Doors:")
+    def _extract_roof(self) -> RoofDetails:
+        section = self._between("8.0 Roofs:", "8.1 Rooms in Roof:")
+        bp_chunks = self._split_section_by_bp(section)
+        main_body = bp_chunks[0][1] if bp_chunks else section
+        lines = [l.strip() for l in main_body.splitlines() if l.strip()]
+        return self._roof_details_from_lines(lines)
+
+    def _floor_details_from_lines(self, lines: List[str]) -> FloorDetails:
        u_val_raw = self._local_val(lines, "Default U-value")
        default_u = float(u_val_raw) if u_val_raw else None
        return FloorDetails(
@ -210,14 +311,251 @@ class ElmhurstSiteNotesExtractor:
            default_u_value=default_u,
        )

+    def _extract_floor(self) -> FloorDetails:
+        section = self._between("9.0 Floors:", "10.0 Doors:")
+        bp_chunks = self._split_section_by_bp(section)
+        main_body = bp_chunks[0][1] if bp_chunks else section
+        lines = [l.strip() for l in main_body.splitlines() if l.strip()]
+        return self._floor_details_from_lines(lines)
+
+    # RIR surface row: `<name>  <length>  <height>  [<insulation>  [<ins_type>]
+    #   [<gable_type>]  <default_u>  <known>  <u>]`. The middle slot
+    # widths vary by surface kind; we match the four leading numerics
+    # robustly (length, height, default_u, u_value) and slot the
+    # remaining textual fields by position. The layout preprocessor
+    # collapses multi-space-separated cells into single newlines, so
+    # each row in the dump occupies multiple lines per cell.
+    _RIR_SURFACE_NAMES: tuple[str, ...] = (
+        "Flat Ceiling 1", "Flat Ceiling 2",
+        "Stud Wall 1", "Stud Wall 2",
+        "Slope 1", "Slope 2",
+        "Gable Wall 1", "Gable Wall 2",
+        "Common Wall 1", "Common Wall 2",
+    )
+
+    def _extract_room_in_roof(
+        self, main_dim_body: str, age_band_text: str
+    ) -> Optional[RoomInRoof]:
+        """Parse the §8.1 Rooms in Roof section for the Main bp. Returns
+        None when no RR is lodged (single-storey or simple loft houses).
+        `main_dim_body` is the Main-property §4 chunk used to pull the
+        RR floor area; `age_band_text` is the §3 raw text holding the
+        "Main Prop. Room(s) in Roof <band>" line."""
+        # RR floor area lives in §4 Dimensions immediately above the
+        # storey floor entries: "Room(s) in Roof:   15.06".
+        m = re.search(r"Room\(s\) in Roof:\s+(\d+(?:\.\d+)?)", main_dim_body)
+        if m is None:
+            return None
+        floor_area = float(m.group(1))
+        if floor_area <= 0:
+            return None
+
+        section = self._between("8.1 Rooms in Roof:", "9.0 Floors:")
+        if not section.strip() or "Room in roof type" not in section:
+            return None
+        bp_chunks = self._split_section_by_bp(section)
+        main_body = bp_chunks[0][1] if bp_chunks else section
+        lines = [l.strip() for l in main_body.splitlines() if l.strip()]
+
+        assessment_idx = next(
+            (i for i, l in enumerate(lines) if l == "Assessment"), None
+        )
+        assessment = (
+            lines[assessment_idx + 1] if assessment_idx is not None and assessment_idx + 1 < len(lines) else ""
+        )
+
+        surfaces: List[RoomInRoofSurface] = []
+        for name in self._RIR_SURFACE_NAMES:
+            try:
+                idx = lines.index(name)
+            except ValueError:
+                continue
+            surfaces.append(self._parse_rir_surface_row(name, lines, idx))
+
+        # Age band from §3: "Main Prop. Room(s) in Roof    B 1900-1929"
+        age_m = re.search(
+            r"Main Prop\. Room\(s\) in Roof\s+([A-M] [^\n]+)", age_band_text
+        )
+        age_band = age_m.group(1).strip() if age_m else None
+
+        return RoomInRoof(
+            floor_area_m2=floor_area,
+            construction_age_band=age_band,
+            assessment=assessment,
+            surfaces=surfaces,
+        )
+
+    _RIR_NUMERIC_RE = re.compile(r"^-?\d+(?:\.\d+)?$")
+    _RIR_INSULATION_THICKNESS_RE = re.compile(r"^\d+\s*mm$")
+
+    def _parse_rir_surface_row(
+        self, name: str, lines: List[str], idx: int
+    ) -> RoomInRoofSurface:
+        """One RR surface row spans the name line followed by ~6-9 tokens
+        depending on which optional cells the surveyor filled. The token
+        order is stable: length, height, [insulation], [ins_type],
+        [gable_type], default_u, u_known, u_value. Numeric cells (length,
+        height, default_u, u_value) are the anchor; everything else is
+        slotted into the appropriate textual field."""
+        # Walk forward until either we exhaust the cell budget or hit
+        # the next RIR row's name marker — the layout dump puts each
+        # numeric / textual cell on its own line and we can't tell
+        # the LAST cell of THIS row from the FIRST cell of the next
+        # without that signal.
+        tokens: List[str] = []
+        scan_end = min(idx + 10, len(lines))
+        for j in range(idx + 1, scan_end):
+            if self._is_next_rir_row(lines[j]):
+                break
+            tokens.append(lines[j])
+        # First two numerics = length, height
+        length = float(tokens[0]) if tokens and self._RIR_NUMERIC_RE.match(tokens[0]) else 0.0
+        height = float(tokens[1]) if len(tokens) > 1 and self._RIR_NUMERIC_RE.match(tokens[1]) else 0.0
+
+        # Last numeric is u_value; preceding "Yes"/"No" is u_value_known;
+        # the numeric before that is default_u.
+        # Walk from the end backwards looking for the u_value, then known
+        # flag, then default_u.
+        u_value = 0.0
+        u_value_known = False
+        default_u: Optional[float] = None
+        # The known/default_u tail is fairly stable; collect the trailing
+        # tokens and slot by position. The "known" token is "No" or "Yes".
+        rev = list(reversed(tokens[2:]))
+        # rev[0] = u_value, rev[1] = u_value_known, rev[2] = default_u
+        if len(rev) >= 1 and self._RIR_NUMERIC_RE.match(rev[0]):
+            u_value = float(rev[0])
+        if len(rev) >= 2 and rev[1] in ("Yes", "No"):
+            u_value_known = rev[1] == "Yes"
+        if len(rev) >= 3 and self._RIR_NUMERIC_RE.match(rev[2]):
+            default_u = float(rev[2])
+
+        # Middle textual cells: insulation, insulation_type, gable_type.
+        # Drop the leading length/height (already consumed) and the
+        # trailing 3 tokens (default_u, known, u_value).
+        middle = tokens[2:-3] if len(tokens) >= 5 else []
+        insulation = ""
+        insulation_type: Optional[str] = None
+        gable_type: Optional[str] = None
+        for t in middle:
+            if self._RIR_INSULATION_THICKNESS_RE.match(t) or t in ("As Built", "None"):
+                if not insulation:
+                    insulation = t
+            elif t in ("Mineral or EPS", "PUR", "PIR"):
+                insulation_type = t
+            elif t in ("Party", "Sheltered", "Connected to heated space"):
+                gable_type = t
+        return RoomInRoofSurface(
+            name=name,
+            length_m=length,
+            height_m=height,
+            insulation=insulation,
+            insulation_type=insulation_type,
+            gable_type=gable_type,
+            default_u_value=default_u,
+            u_value_known=u_value_known,
+            u_value=u_value,
+        )
+
+    def _is_next_rir_row(self, line: str) -> bool:
+        return line in self._RIR_SURFACE_NAMES
+
+    def _extract_extensions(self) -> List[ExtensionPart]:
+        """Collect non-Main building parts. Cross-references the §4, §7,
+        §8, §9 per-bp subsections by extension name. "As Main: Yes"
+        within a section body inherits the main bp's data for that
+        section; otherwise the section body is parsed in isolation."""
+        # Gather per-section chunks once.
+        dim_section = self._between("4.0 Dimensions:", "5.0 Conservatory:")
+        wall_section = self._between("7.0 Walls:", "8.0 Roofs:")
+        roof_section = self._between("8.0 Roofs:", "8.1 Rooms in Roof:")
+        floor_section = self._between("9.0 Floors:", "10.0 Doors:")
+        dim_type = self._str_val("Dimension type")
+
+        dim_chunks = dict(self._split_section_by_bp(dim_section))
+        wall_chunks = dict(self._split_section_by_bp(wall_section))
+        roof_chunks = dict(self._split_section_by_bp(roof_section))
+        floor_chunks = dict(self._split_section_by_bp(floor_section))
+
+        main_walls = self._extract_walls()
+        main_roof = self._extract_roof()
+        main_floor = self._extract_floor()
+
+        # Per-bp age-band lookup. Section 3 contains lines like
+        # "1st Extension B 1900-1929" — the band sits after the name.
+        age_band_re = re.compile(
+            r"^(\d+(?:st|nd|rd|th) Extension)\s+([A-M] [^\n]+)$",
+            re.MULTILINE,
+        )
+        age_bands = {m.group(1): m.group(2).strip() for m in age_band_re.finditer(self._text)}
+
+        # Collect names in document order from the dimensions section
+        # (excluding Main Property).
+        names = [
+            name for name, _ in self._split_section_by_bp(dim_section)
+            if name != "Main Property"
+        ]
+
+        extensions: List[ExtensionPart] = []
+        for name in names:
+            dim_body = dim_chunks.get(name, "")
+            wall_body = wall_chunks.get(name, "")
+            roof_body = roof_chunks.get(name, "")
+            floor_body = floor_chunks.get(name, "")
+
+            wall_lines = [l.strip() for l in wall_body.splitlines() if l.strip()]
+            roof_lines = [l.strip() for l in roof_body.splitlines() if l.strip()]
+            floor_lines = [l.strip() for l in floor_body.splitlines() if l.strip()]
+
+            if self._local_bool(wall_lines, "As Main Wall"):
+                # Alternative walls live in the extension's own chunk
+                # even when the main wall fields are inherited; merge
+                # them into the inherited WallDetails so the bp carries
+                # them through to its SapBuildingPart.
+                walls = WallDetails(
+                    wall_type=main_walls.wall_type,
+                    insulation=main_walls.insulation,
+                    thickness_unknown=main_walls.thickness_unknown,
+                    u_value_known=main_walls.u_value_known,
+                    party_wall_type=main_walls.party_wall_type,
+                    thickness_mm=main_walls.thickness_mm,
+                    alternative_walls=self._alternative_walls_from_lines(wall_lines),
+                )
+            else:
+                walls = self._wall_details_from_lines(wall_lines)
+            roof = main_roof if self._local_bool(roof_lines, "As Main") else self._roof_details_from_lines(roof_lines)
+            floor = main_floor if self._local_bool(floor_lines, "As Main") else self._floor_details_from_lines(floor_lines)
+
+            extensions.append(
+                ExtensionPart(
+                    name=name,
+                    construction_age_band=age_bands.get(name, ""),
+                    dimensions=BuildingPartDimensions(
+                        dimension_type=dim_type,
+                        floors=self._floors_from_dimensions_body(dim_body),
+                    ),
+                    walls=walls,
+                    roof=roof,
+                    floor=floor,
+                )
+            )
+        return extensions
+
    def _extract_windows(self) -> List[Window]:
+        # Textract-style pages keep "Permanent\s+Shutters" adjacent in
+        # reading order and the windows table flows as one column-block
+        # the existing token-walker can step through. PDF-derived pages
+        # (Summary PDFs preprocessed from `pdftotext -layout`) break the
+        # header across lines, so this regex misses entirely and the
+        # `_extract_windows_from_layout` fallback below picks them up
+        # by anchoring on the W/H/Area data line.
        m = re.search(
            r"Permanent\s+Shutters\n(.*?)Draught Proofing",
            self._text,
            re.DOTALL,
        )
        if not m:
-            return []
+            return self._extract_windows_from_layout()
        tokens = [t.strip() for t in m.group(1).splitlines() if t.strip()]
        windows: List[Window] = []
        i = 0
@ -285,6 +623,323 @@ class ElmhurstSiteNotesExtractor:
            )
        return windows

+    # Anchors used by the layout-style window parser. The W/H/Area anchor
+    # is sometimes followed by a joined glazing-type phrase on the same
+    # line (e.g. '1.22 1.76 2.15 Double pre 2002'); the optional 4th
+    # capture surfaces that text so the parser can use it instead of a
+    # separately-laid-out prefix line.
+    _WIDTH_HEIGHT_AREA_RE = re.compile(
+        r"^(\d+\.\d+)\s+(\d+\.\d+)\s+(\d+\.\d+)(?:\s+(\S.*?))?$"
+    )
+    _MANUFACTURER_RE = re.compile(r"^(Manufacturer|Default)\s+(\d+\.\d+)$")
+    _ORIENTATION_TOKENS = frozenset({
+        "North", "South", "East", "West", "NE", "NW", "SE", "SW",
+    })
+    _BP_INLINE_TOKENS = frozenset({"Main"})  # "Extension" only appears as suffix
+    # The Elmhurst Summary PDF lodges each window's glazing-type as a
+    # capitalised phrase like "Double between 2002" / "Double with unknown"
+    # / "Single" / "Triple" / "Secondary". The first token of that phrase
+    # marks the start of a new window's prefix block in the layout dump,
+    # which is the only stable signal partitioning one window's suffix
+    # from the next window's prefix.
+    _GLAZING_TYPE_PREFIX_WORDS = frozenset({
+        "Single", "Double", "Triple", "Secondary",
+    })
+
+    def _extract_windows_from_layout(self) -> List[Window]:
+        """Fallback window parser for Summary PDFs preprocessed from
+        `pdftotext -layout`. Each window has two stable anchors:
+        a "W H Area" line and a "Manufacturer <U_value>" line a few
+        lines further down. Everything between holds frame_type,
+        frame_factor, and a variable mix of glazing_gap, building_part,
+        location, and orientation (depending on which fields the
+        surveyor lodged); everything around the window holds glazing-
+        type/building-part/orientation prefix/suffix tokens split by
+        the layout preprocessor.
+        """
+        m = re.search(
+            r"11\.0 Windows:(.*?)(Draught Proofing|12\.0 Ventilation)",
+            self._text, re.DOTALL,
+        )
+        if not m:
+            return []
+        lines = m.group(1).splitlines()
+
+        # Locate all (data_line, manufacturer_line) pairs in document
+        # order. Each pair is one window.
+        data_anchors: List[tuple[int, re.Match[str]]] = []
+        for i, line in enumerate(lines):
+            anchor = self._WIDTH_HEIGHT_AREA_RE.match(line.strip())
+            if anchor is not None:
+                data_anchors.append((i, anchor))
+
+        windows: List[Window] = []
+        for k, (data_idx, anchor) in enumerate(data_anchors):
+            manuf_idx = self._find_manufacturer_after(lines, data_idx)
+            if manuf_idx is None:
+                continue
+            prev_manuf_idx = (
+                self._find_manufacturer_after(lines, data_anchors[k - 1][0])
+                if k > 0 else None
+            )
+            next_data_idx = (
+                data_anchors[k + 1][0] if k + 1 < len(data_anchors) else len(lines)
+            )
+            # Partition the cross-window gap between this window's suffix
+            # and the next window's prefix on the first glazing-type-start
+            # token (Single/Double/Triple/Secondary). The same boundary
+            # is used symmetrically — current window's `after_end` = next
+            # window's `before_start` — so prefix tokens of W_{k+1} never
+            # get attributed as suffix of W_k (which was the bug producing
+            # orientation='East-South' for windows where 'South' actually
+            # belonged to the next row).
+            before_start = (
+                self._partition_after_manuf(lines, prev_manuf_idx, data_idx)
+                if prev_manuf_idx is not None else 0
+            )
+            after_end = self._partition_after_manuf(lines, manuf_idx, next_data_idx)
+            try:
+                window = self._parse_window_from_anchors(
+                    lines=lines,
+                    data_idx=data_idx,
+                    manuf_idx=manuf_idx,
+                    anchor=anchor,
+                    before_start=before_start,
+                    after_end=after_end,
+                )
+            except (ValueError, IndexError):
+                continue
+            if window is not None:
+                windows.append(window)
+        return windows
+
+    def _find_manufacturer_after(self, lines: List[str], data_idx: int) -> Optional[int]:
+        for j in range(data_idx + 1, min(data_idx + 12, len(lines))):
+            if self._MANUFACTURER_RE.match(lines[j].strip()):
+                return j
+        return None
+
+    _FRAME_TYPE_AND_FACTOR_RE = re.compile(r"^(\S+(?:\s+\S+)*?)\s+(\d\.\d+)$")
+    _FRAME_FACTOR_ONLY_RE = re.compile(r"^(\d\.\d+)$")
+
+    def _parse_frame_type_and_factor(
+        self, lines: List[str], data_idx: int
+    ) -> tuple[str, Optional[float], int]:
+        """Return `(frame_type, frame_factor, middle_start_idx)` from
+        the lines immediately after the data anchor. Layouts vary:
+        (a) "PVC" on data+1, "0.70" on data+2 — the original 000474
+            shape;
+        (b) "Wood 0.70" on data+1 — joined-cell variant from 000487
+            and 000516 first-row windows;
+        (c) "0.70" alone on data+1 (no frame_type word at all) —
+            seen in 000487's subsequent windows where the
+            preprocessor dropped the frame-type column. frame_type
+            is recovered downstream from glazing-type defaults or
+            left empty."""
+        first = lines[data_idx + 1].strip()
+        combined = self._FRAME_TYPE_AND_FACTOR_RE.match(first)
+        if combined is not None:
+            return combined.group(1), float(combined.group(2)), data_idx + 2
+        factor_only = self._FRAME_FACTOR_ONLY_RE.match(first)
+        if factor_only is not None:
+            return "", float(factor_only.group(1)), data_idx + 2
+        if data_idx + 2 >= len(lines):
+            return first, None, data_idx + 2
+        frame_type = first
+        try:
+            frame_factor = float(lines[data_idx + 2].strip())
+        except ValueError:
+            return frame_type, None, data_idx + 3
+        return frame_type, frame_factor, data_idx + 3
+
+    def _partition_after_manuf(
+        self, lines: List[str], manuf_idx: int, next_data_idx: int
+    ) -> int:
+        """Return the exclusive upper bound for this window's suffix
+        block (and the inclusive lower bound for the next window's prefix
+        block). After the manufacturer line come 3 fixed tokens (g_value,
+        draught, shutters); the variable suffix lines start at manuf+4
+        and run until either (a) the next window's glazing-type-start
+        token (e.g. 'Double between 2002', 'Single', 'Triple ...') or
+        (b) the second orientation token in the gap, whichever comes
+        first. Branch (b) covers layouts where the glazing-type is
+        joined to the data line (no separate prefix line exists), so
+        the only signal of window-transition is the orientation tokens
+        rotating: orient_suffix(k) → orient_prefix(k+1). Falls through
+        to `next_data_idx` when neither marker is present."""
+        scan_start = manuf_idx + 4
+        seen_orient = False
+        for j in range(scan_start, next_data_idx):
+            stripped = lines[j].strip()
+            first_word = stripped.split(" ", 1)[0]
+            if first_word in self._GLAZING_TYPE_PREFIX_WORDS:
+                return j
+            if stripped in self._ORIENTATION_TOKENS:
+                if seen_orient:
+                    return j
+                seen_orient = True
+        return next_data_idx
+
+    def _parse_window_from_anchors(
+        self,
+        *,
+        lines: List[str],
+        data_idx: int,
+        manuf_idx: int,
+        anchor: re.Match[str],
+        before_start: int,
+        after_end: int,
+    ) -> Optional[Window]:
+        width = float(anchor.group(1))
+        height = float(anchor.group(2))
+        area = float(anchor.group(3))
+        # Layout-style cell joining sometimes leaves the glazing-type
+        # phrase trailing the W H Area triplet on the same line (e.g.
+        # "1.22 1.76 2.15 Double pre 2002"); when present we pass it
+        # through as `inline_glazing_type` and the composer skips the
+        # would-be glazing-prefix scan.
+        inline_glazing_type = anchor.group(4) if anchor.lastindex and anchor.lastindex >= 4 else None
+
+        # frame_type and frame_factor immediately follow the data line.
+        # Layout-style cell joining sometimes collapses them onto a
+        # single "Wood 0.70" line; treat both shapes uniformly so the
+        # downstream `middle` slice still starts at the first variable
+        # field (glazing_gap / bp / location / orient).
+        if data_idx + 1 >= len(lines):
+            return None
+        frame_type, frame_factor, middle_start = self._parse_frame_type_and_factor(
+            lines, data_idx
+        )
+        if frame_factor is None or not 0.0 < frame_factor <= 1.0:
+            return None
+
+        # Variable-order tokens between frame_factor and Manufacturer.
+        middle = [lines[j].strip() for j in range(middle_start, manuf_idx)]
+        glazing_gap = next((t for t in middle if "mm" in t.lower()), None)
+        location = next((t for t in middle if "wall" in t.lower()), "External wall")
+        bp_inline = next((t for t in middle if t in self._BP_INLINE_TOKENS), None)
+        orient_inline = next(
+            (t for t in middle if t in self._ORIENTATION_TOKENS), None
+        )
+
+        # Manufacturer line carries data_source + u_value.
+        manuf_match = self._MANUFACTURER_RE.match(lines[manuf_idx].strip())
+        if manuf_match is None:
+            return None
+        data_source = manuf_match.group(1)
+        u_value = float(manuf_match.group(2))
+
+        # Post-manufacturer: g_value, draught, shutters.
+        if manuf_idx + 3 >= len(lines):
+            return None
+        try:
+            g_value = float(lines[manuf_idx + 1].strip())
+        except ValueError:
+            return None
+        draught_proofed = lines[manuf_idx + 2].strip().lower() == "yes"
+        permanent_shutters = lines[manuf_idx + 3].strip()
+
+        # Prefix / suffix tokens (variable count) carry the
+        # glazing-type, building-part, and orientation strings split by
+        # the layout preprocessor.
+        before = [lines[j].strip() for j in range(before_start, data_idx) if lines[j].strip()]
+        after = [lines[j].strip() for j in range(manuf_idx + 4, after_end) if lines[j].strip()]
+
+        glazing_type, building_part, orientation = self._compose_window_descriptors(
+            before=before,
+            after=after,
+            bp_inline=bp_inline,
+            orient_inline=orient_inline,
+            inline_glazing_type=inline_glazing_type,
+        )
+
+        return Window(
+            width_m=width,
+            height_m=height,
+            area_m2=area,
+            glazing_type=glazing_type,
+            frame_factor=frame_factor,
+            building_part=building_part,
+            location=location,
+            orientation=orientation,
+            data_source=data_source,
+            u_value=u_value,
+            g_value=g_value,
+            draught_proofed=draught_proofed,
+            permanent_shutters=permanent_shutters,
+            frame_type=frame_type,
+            glazing_gap=glazing_gap,
+        )
+
+    def _compose_window_descriptors(
+        self,
+        *,
+        before: List[str],
+        after: List[str],
+        bp_inline: Optional[str],
+        orient_inline: Optional[str],
+        inline_glazing_type: Optional[str] = None,
+    ) -> tuple[str, str, str]:
+        """Re-join the glazing-type / building-part / orientation tokens
+        split by the layout preprocessor. Each is at most 2 fragments
+        (one before the data line, one after); inline tokens in the
+        between-segment win over prefix/suffix fragments."""
+        # before holds (in document order, possibly): glazing_prefix,
+        # bp_prefix, orient_prefix — bp/orient may be missing.
+        # after holds: glazing_suffix, bp_suffix, orient_suffix — same.
+        prefix = list(before[-3:])  # last 3 lines preceding data
+        suffix = list(after[:3])
+
+        def pop_if_orientation(tokens: List[str]) -> Optional[str]:
+            for t in tokens:
+                if t in self._ORIENTATION_TOKENS:
+                    tokens.remove(t)
+                    return t
+            return None
+
+        def pop_if_bp_fragment(tokens: List[str]) -> Optional[str]:
+            # Prefix fragments like "1st" / "2nd" — match digit-prefixed
+            # ordinals; suffix fragments are always "Extension".
+            for t in tokens:
+                if re.match(r"^\d+(?:st|nd|rd|th)$", t) or t == "Extension":
+                    tokens.remove(t)
+                    return t
+            return None
+
+        orient_prefix_token = pop_if_orientation(prefix)
+        orient_suffix_token = pop_if_orientation(suffix)
+        bp_prefix_frag = pop_if_bp_fragment(prefix)
+        bp_suffix_frag = pop_if_bp_fragment(suffix)
+
+        # Glazing type: an inline glazing-type captured from the data
+        # line (layout-joined variant) wins; otherwise join the remaining
+        # prefix + suffix fragments.
+        if inline_glazing_type is not None:
+            glazing_type = inline_glazing_type
+        else:
+            glazing_type = " ".join([*prefix, *suffix]).strip()
+
+        # Building part: inline token wins; otherwise join prefix + suffix.
+        if bp_inline is not None:
+            building_part = bp_inline
+        else:
+            building_part = " ".join(
+                t for t in (bp_prefix_frag, bp_suffix_frag) if t
+            ).strip()
+
+        # Orientation: inline token wins for the primary direction;
+        # combine with the opposite-direction fragment when present.
+        primary = orient_inline or orient_prefix_token or ""
+        secondary_candidates = [
+            t for t in (orient_prefix_token, orient_suffix_token) if t and t != primary
+        ]
+        if primary and secondary_candidates:
+            orientation = f"{primary}-{secondary_candidates[0]}"
+        else:
+            orientation = primary
+
+        return glazing_type, building_part, orientation
+
    def _extract_ventilation(self) -> VentilationAndCooling:
        return VentilationAndCooling(
            open_chimneys_count=self._int_val("No. of open chimneys"),
@ -326,6 +981,20 @@ class ElmhurstSiteNotesExtractor:
        lines = self._section_lines("14.0 Main Heating1", "14.1 Main Heating2")
        pct_raw = self._local_val(lines, "Percentage of Heat")
        pct = int(pct_raw.split()[0]) if pct_raw else 0
+        # The "Secondary Heating SapCode" key is lodged inside §14.1 Main
+        # Heating2 — Elmhurst uses the Main-2 block to also carry the
+        # cert's secondary heating system (when one exists). Look for it
+        # in that section; absence (or "0") means no secondary lodged.
+        secondary_lines = self._section_lines(
+            "14.1 Main Heating2", "14.1 Community Heating"
+        )
+        secondary_raw = self._local_val(secondary_lines, "Secondary Heating SapCode")
+        secondary_code = (
+            int(secondary_raw)
+            if secondary_raw is not None and secondary_raw.isdigit()
+            and int(secondary_raw) > 0
+            else None
+        )
        return MainHeating(
            heat_emitter=self._local_str(lines, "Heat Emitter"),
            fuel_type=self._local_str(lines, "Fuel Type"),
@ -337,6 +1006,7 @@ class ElmhurstSiteNotesExtractor:
            percentage_of_heat=pct,
            pcdf_boiler_reference=self._local_val(lines, "PCDF boiler Reference"),
            heat_pump_age=self._local_val(lines, "Heat pump age"),
+            secondary_heating_sap_code=secondary_code,
        )

    def _extract_meters(self) -> Meters:
@ -448,4 +1118,15 @@ class ElmhurstSiteNotesExtractor:
            water_heating=self._extract_water_heating(),
            baths_and_showers=self._extract_baths_and_showers(),
            renewables=self._extract_renewables(),
+            extensions=self._extract_extensions(),
+            room_in_roof=self._extract_room_in_roof_from_text(),
        )
+
+    def _extract_room_in_roof_from_text(self) -> Optional[RoomInRoof]:
+        """Convenience wrapper: pulls the Main §4 body + the §3 age-band
+        text once so `_extract_room_in_roof` doesn't need to re-slice
+        the document."""
+        dim_section = self._between("4.0 Dimensions:", "5.0 Conservatory:")
+        bp_chunks = self._split_section_by_bp(dim_section)
+        main_body = bp_chunks[0][1] if bp_chunks else dim_section
+        return self._extract_room_in_roof(main_body, self._text)
--- a/backend/documents_parser/tests/fixtures/Summary_000474.pdf
+++ b/backend/documents_parser/tests/fixtures/Summary_000474.pdf
--- a/backend/documents_parser/tests/fixtures/Summary_000477.pdf
+++ b/backend/documents_parser/tests/fixtures/Summary_000477.pdf
--- a/backend/documents_parser/tests/fixtures/Summary_000480.pdf
+++ b/backend/documents_parser/tests/fixtures/Summary_000480.pdf
--- a/backend/documents_parser/tests/fixtures/Summary_000487.pdf
+++ b/backend/documents_parser/tests/fixtures/Summary_000487.pdf
--- a/backend/documents_parser/tests/fixtures/Summary_000490.pdf
+++ b/backend/documents_parser/tests/fixtures/Summary_000490.pdf
--- a/backend/documents_parser/tests/fixtures/Summary_000516.pdf
+++ b/backend/documents_parser/tests/fixtures/Summary_000516.pdf
--- a/backend/documents_parser/tests/fixtures/Summary_001479.pdf
+++ b/backend/documents_parser/tests/fixtures/Summary_001479.pdf
--- a/backend/documents_parser/tests/test_elmhurst_end_to_end.py
+++ b/backend/documents_parser/tests/test_elmhurst_end_to_end.py
@ -5,7 +5,7 @@ from datetime import date
 import pytest

 from backend.documents_parser.elmhurst_extractor import ElmhurstSiteNotesExtractor
-from datatypes.epc.domain.epc_property_data import EpcPropertyData
+from datatypes.epc.domain.epc_property_data import BuildingPartIdentifier, EpcPropertyData
 from datatypes.epc.domain.mapper import EpcPropertyDataMapper

 FIXTURE_PATH = os.path.join(
@ -130,16 +130,23 @@ class TestBuildingPart:
        assert len(result.sap_building_parts) == 1

    def test_identifier(self, result: EpcPropertyData) -> None:
-        assert result.sap_building_parts[0].identifier == "main"
+        assert result.sap_building_parts[0].identifier is BuildingPartIdentifier.MAIN

    def test_construction_age_band(self, result: EpcPropertyData) -> None:
-        assert result.sap_building_parts[0].construction_age_band == "1950-1966"
+        # Spec age-band letter code per RdSAP10 Table 1; the cascade
+        # reads this code letter for U-value lookups, not the year-range
+        # description.
+        assert result.sap_building_parts[0].construction_age_band == "D"

    def test_wall_construction(self, result: EpcPropertyData) -> None:
-        assert result.sap_building_parts[0].wall_construction == "Cavity"
+        # SAP10 wall_construction integer: 4 = Cavity (per
+        # domain.ml.rdsap_uvalues.WALL_CAVITY).
+        assert result.sap_building_parts[0].wall_construction == 4

    def test_wall_insulation_type(self, result: EpcPropertyData) -> None:
-        assert result.sap_building_parts[0].wall_insulation_type == "Filled Cavity"
+        # SAP10 wall_insulation_type integer: 2 = Filled cavity (per
+        # domain.ml.rdsap_uvalues.WALL_INSULATION_FILLED_CAVITY).
+        assert result.sap_building_parts[0].wall_insulation_type == 2

    def test_wall_thickness_measured(self, result: EpcPropertyData) -> None:
        assert result.sap_building_parts[0].wall_thickness_measured is True
@ -194,14 +201,25 @@ class TestWindows:
    def test_window_count(self, result: EpcPropertyData) -> None:
        assert len(result.sap_windows) == 4

-    def test_first_window_width(self, result: EpcPropertyData) -> None:
-        assert result.sap_windows[0].window_width == 1.30
+    def test_first_window_area(self, result: EpcPropertyData) -> None:
+        # The Elmhurst mapper lodges the Summary PDF's precomputed Area
+        # (1.30 × 1.10 = 1.43 m²) as `window_width × 1.0` to avoid the
+        # 2-d.p. round-trip drift that W × H reintroduces. The cascade
+        # reads only the product, so flattening to (area, 1.0) is
+        # behaviourally equivalent to (1.30, 1.10) modulo precision.
+        w = result.sap_windows[0]
+        assert w.window_width * w.window_height == 1.43

    def test_first_window_height(self, result: EpcPropertyData) -> None:
-        assert result.sap_windows[0].window_height == 1.10
+        # See `test_first_window_area` — the mapper normalises height
+        # to 1.0 so the lodged Area can be carried as the canonical
+        # geometry without re-multiplying.
+        assert result.sap_windows[0].window_height == 1.0

    def test_first_window_orientation(self, result: EpcPropertyData) -> None:
-        assert result.sap_windows[0].orientation == "North"
+        # SAP10 octant code: 1 = North. The solar-gains cascade keys
+        # off the integer, not the cardinal-direction string.
+        assert result.sap_windows[0].orientation == 1

    def test_first_window_glazing_type(self, result: EpcPropertyData) -> None:
        assert result.sap_windows[0].glazing_type == "Double post or during 2022"
@ -210,7 +228,8 @@ class TestWindows:
        assert result.sap_windows[0].draught_proofed is True

    def test_third_window_orientation(self, result: EpcPropertyData) -> None:
-        assert result.sap_windows[2].orientation == "South"
+        # SAP10 octant code: 5 = South.
+        assert result.sap_windows[2].orientation == 5

    def test_frame_factor(self, result: EpcPropertyData) -> None:
        assert result.sap_windows[0].frame_factor == 0.7
@ -233,12 +252,14 @@ class TestHeating:
        assert len(result.sap_heating.main_heating_details) == 1

    def test_fuel_type(self, result: EpcPropertyData) -> None:
-        assert result.sap_heating.main_heating_details[0].main_fuel_type == "Mains gas"
+        # SAP10.2 Table 12 fuel code: 26 = mains gas (not community).
+        # The cascade only consumes the int code; strings drop the
+        # standing-charge / PE-factor / CO2-factor lookups.
+        assert result.sap_heating.main_heating_details[0].main_fuel_type == 26

    def test_heat_emitter_type(self, result: EpcPropertyData) -> None:
-        assert (
-            result.sap_heating.main_heating_details[0].heat_emitter_type == "Radiators"
-        )
+        # SAP10.2 heat-emitter code: 1 = Radiators.
+        assert result.sap_heating.main_heating_details[0].heat_emitter_type == 1

    def test_emitter_temperature(self, result: EpcPropertyData) -> None:
        assert (
@ -252,10 +273,10 @@ class TestHeating:
        assert result.sap_heating.main_heating_details[0].has_fghrs is False

    def test_main_heating_control(self, result: EpcPropertyData) -> None:
-        assert (
-            result.sap_heating.main_heating_details[0].main_heating_control
-            == "Programmer, room thermostat and TRVs"
-        )
+        # SAP10.2 main_heating_control code extracted from the Elmhurst
+        # "SAP code 2106, Programmer, room thermostat and TRVs" string;
+        # the cascade keys efficiency adjustments off the integer.
+        assert result.sap_heating.main_heating_details[0].main_heating_control == 2106

    def test_shower_outlet_type(self, result: EpcPropertyData) -> None:
        assert result.sap_heating.shower_outlets is not None
--- a/backend/documents_parser/tests/test_end_to_end.py
+++ b/backend/documents_parser/tests/test_end_to_end.py
@ -6,6 +6,7 @@ import pytest
 from backend.documents_parser.extractor import PasHubRdSapSiteNotesExtractor
 from backend.documents_parser.pdf import pdf_to_text_list
 from datatypes.epc.domain.epc_property_data import (
+    BuildingPartIdentifier,
    EpcPropertyData,
    InstantaneousWwhrs,
    MainHeatingDetail,
@ -187,7 +188,7 @@ class TestPdfToEpcPropertyData:
            ),
            sap_building_parts=[
                SapBuildingPart(
-                    identifier="main",
+                    identifier=BuildingPartIdentifier.MAIN,
                    construction_age_band="1950-1966",
                    wall_construction="Cavity",
                    wall_insulation_type="Filled Cavity",
@ -218,7 +219,7 @@ class TestPdfToEpcPropertyData:
                    floor_u_value_known=False,
                ),
                SapBuildingPart(
-                    identifier="extension_1",
+                    identifier=BuildingPartIdentifier.EXTENSION_1,
                    construction_age_band="2003-2006",
                    wall_construction="Cavity",
                    wall_insulation_type="As built",
--- a/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py
+++ b/backend/documents_parser/tests/test_summary_pdf_mapper_chain.py
@ -0,0 +1,760 @@
+"""End-to-end validation for the Elmhurst Summary→EpcPropertyData chain.
+
+The 6 Elmhurst worksheet fixtures in `domain.sap.worksheet.tests`
+build their `EpcPropertyData` synthetically — they validate the
+calculator + cascade in isolation from the mapper. This file pins
+the OTHER half of the chain: `from_elmhurst_site_notes` must produce
+a calculator-equivalent `EpcPropertyData` when fed the Summary PDF
+the worksheet was generated from. Together with the worksheet
+cascade tests, this closes the loop: extractor + mapper + cascade
+ calculator validated end-to-end against the authoritative
+Elmhurst documents.
+
+Status: GREEN. For cert U985-0001-000474, this pipeline produces an
+unrounded SAP within 0.5 of the worksheet PDF's `62.2584` (line 257).
+The cascade itself reproduces Elmhurst's calculator exactly on
+hand-built inputs (handbuilt → 62.2584 to 4 d.p.); the remaining
+sub-half-point gap from the mapped path is non-load-bearing field
+drift (e.g. central_heating_pump_age the Summary PDF doesn't lodge).
+
+Preprocessing: the existing `ElmhurstSiteNotesExtractor` was written
+against Textract-style output (label\\nvalue pairs in spatial
+reading order). We don't have Textract in the test environment, so
+this helper converts `pdftotext -layout` output (label-whitespace-
+value on a single line) into the Textract-style sequence the
+extractor expects. Test-only preprocessing; production runs through
+Textract directly.
+"""
+
+from __future__ import annotations
+
+import dataclasses
+import json
+import re
+import subprocess
+from pathlib import Path
+from typing import cast
+
+from backend.documents_parser.elmhurst_extractor import ElmhurstSiteNotesExtractor
+from datatypes.epc.domain.mapper import EpcPropertyDataMapper
+from domain.sap.calculator import calculate_sap_from_inputs
+from domain.sap.rdsap.cert_to_inputs import SAP_10_2_SPEC_PRICES, cert_to_inputs
+from domain.sap.worksheet.tests import (
+    _elmhurst_worksheet_000474 as _w000474,
+    _elmhurst_worksheet_000477 as _w000477,
+    _elmhurst_worksheet_000480 as _w000480,
+    _elmhurst_worksheet_000487 as _w000487,
+    _elmhurst_worksheet_000490 as _w000490,
+    _elmhurst_worksheet_000516 as _w000516,
+)
+
+_FIXTURES = Path(__file__).parent / "fixtures"
+_SUMMARY_000474_PDF = _FIXTURES / "Summary_000474.pdf"
+_SUMMARY_000477_PDF = _FIXTURES / "Summary_000477.pdf"
+_SUMMARY_000480_PDF = _FIXTURES / "Summary_000480.pdf"
+_SUMMARY_000487_PDF = _FIXTURES / "Summary_000487.pdf"
+_SUMMARY_000490_PDF = _FIXTURES / "Summary_000490.pdf"
+_SUMMARY_000516_PDF = _FIXTURES / "Summary_000516.pdf"
+_SUMMARY_001479_PDF = _FIXTURES / "Summary_001479.pdf"
+
+# GOV.UK EPB API JSON for cert 001479 — the API-path counterpart of the
+# Summary_001479.pdf fixture. Together they drive the API ≡ Summary
+# parity workstream; Layer 4 of the validation stack is "API cascade SAP
+# matches worksheet continuous SAP at 1e-4".
+_API_001479_JSON = (
+    Path(__file__).parents[3]
+    / "packages/domain/src/domain/sap/rdsap/tests/fixtures/golden"
+    / "0535-9020-6509-0821-6222.json"
+)
+
+
+def _summary_pdf_to_textract_style_pages(pdf_path: Path) -> list[str]:
+    """Convert a Summary PDF into the per-page text format the existing
+    `ElmhurstSiteNotesExtractor` expects (label\\nvalue sequences).
+
+    `pdftotext -layout` preserves the spatial pairing of label and value
+    on each line; we split each line on 2+ spaces to surface the
+    label/value tokens, then concatenate them back into a single
+    newline-delimited stream per page.
+    """
+    info = subprocess.run(
+        ["pdfinfo", str(pdf_path)], capture_output=True, text=True, check=True
+    ).stdout
+    m = re.search(r"Pages:\s+(\d+)", info)
+    if m is None:
+        raise RuntimeError(f"Could not parse page count from {pdf_path}")
+    page_count = int(m.group(1))
+
+    pages: list[str] = []
+    for i in range(1, page_count + 1):
+        layout = subprocess.run(
+            [
+                "pdftotext", "-layout", "-f", str(i), "-l", str(i),
+                str(pdf_path), "-",
+            ],
+            capture_output=True, text=True, check=True,
+        ).stdout
+        tokens: list[str] = []
+        for line in layout.splitlines():
+            if not line.strip():
+                tokens.append("")
+                continue
+            parts = [p for p in re.split(r"\s{2,}", line.strip()) if p]
+            tokens.extend(parts)
+        pages.append("\n".join(tokens))
+    return pages
+
+
+def test_summary_000474_mapper_produces_three_building_parts() -> None:
+    # Arrange — cert U985-0001-000474 is a mid-terrace with 3 building
+    # parts (Main + 2 extensions) per the hand-built worksheet fixture
+    # at packages/domain/src/domain/sap/worksheet/tests/
+    # _elmhurst_worksheet_000474.py. Routing the Summary PDF through
+    # extractor + mapper must yield the same count.
+    pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000474_PDF)
+    site_notes = ElmhurstSiteNotesExtractor(pages).extract()
+
+    # Act
+    epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
+
+    # Assert
+    assert len(epc.sap_building_parts) == 3
+
+
+def test_summary_000474_mapper_extracts_seven_windows() -> None:
+    # Arrange — cert U985-0001-000474's §11 table lodges 7 windows
+    # across Main + 1st Extension + 2nd Extension. The legacy Textract-
+    # style window parser couldn't anchor on the Summary PDF's tabular
+    # layout; the new W/H/Area-plus-Manufacturer anchor pair picks them
+    # all up.
+    pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000474_PDF)
+    site_notes = ElmhurstSiteNotesExtractor(pages).extract()
+
+    # Act
+    epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
+
+    # Assert
+    assert len(epc.sap_windows) == 7
+
+
+def test_summary_000474_full_chain_sap_matches_worksheet_pdf_exactly() -> None:
+    # Arrange — the full Summary→ElmhurstSiteNotes→EpcPropertyData→cascade
+    # →SAP path against the U985-0001-000474 worksheet PDF's unrounded
+    # SAP rating (line 257: SAP value 62.2584, rating (258) = 62).
+    # Because the Summary PDF carries the same source-of-truth data that
+    # the hand-built worksheet fixture encodes by hand, and because the
+    # cascade matches Elmhurst's calculator to 4 d.p. on those hand-
+    # built inputs, this end-to-end path MUST produce the same unrounded
+    # SAP value. Any non-trivial drift = a real mapper bug dropping
+    # information from the Summary PDF.
+    pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000474_PDF)
+    site_notes = ElmhurstSiteNotesExtractor(pages).extract()
+    epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
+
+    # Act
+    result = calculate_sap_from_inputs(
+        cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)
+    )
+
+    # Assert — within the same 1e-4 tolerance the other Elmhurst worksheet
+    # tests pin against. 0.5 is the API-cert residual tolerance (the API
+    # publishes rounded SAP integers, so up to half a SAP point is just
+    # rounding); for Elmhurst worksheet inputs the cascade reproduces
+    # Elmhurst exactly and we expect identical outputs.
+    worksheet_unrounded_sap = 62.2584
+    assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < 1e-4
+
+
+def test_summary_000477_full_chain_sap_matches_worksheet_pdf_exactly() -> None:
+    # Arrange — cert U985-0001-000477 is a single-bp mid-terrace with
+    # a 15.06 m² Room-in-Roof storey and zero baths lodged. Worksheet
+    # PDF lodges unrounded SAP 65.0057. Drives the chain through the
+    # `RoomInRoof.detailed_surfaces` cascade with stud walls @ 100mm
+    # Mineral, two uninsulated slopes, two party gable walls, plus the
+    # RR/storey-area suspended-timber-floor heuristic (RIR < storey →
+    # 0.2 ACH floor infiltration).
+    pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000477_PDF)
+    site_notes = ElmhurstSiteNotesExtractor(pages).extract()
+    epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
+
+    # Act
+    result = calculate_sap_from_inputs(
+        cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)
+    )
+
+    # Assert
+    worksheet_unrounded_sap = 65.0057
+    assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < 1e-4
+
+
+def test_summary_000480_full_chain_sap_matches_worksheet_pdf_exactly() -> None:
+    # Arrange — cert U985-0001-000480 is a mid-terrace with main + one
+    # extension and a 19.83 m² room-in-roof storey. Worksheet PDF lodges
+    # unrounded SAP 61.2986 on line "SAP value". The Detailed §3.10 RR
+    # surfaces (2 stud walls @ 0mm + 2 slopes @ 0mm + 1 flat ceiling @
+    # 0mm + 2 party gables) plus zero baths drive the chain to 1e-4.
+    pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000480_PDF)
+    site_notes = ElmhurstSiteNotesExtractor(pages).extract()
+    epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
+
+    # Act
+    result = calculate_sap_from_inputs(
+        cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)
+    )
+
+    # Assert
+    worksheet_unrounded_sap = 61.2986
+    assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < 1e-4
+
+
+def test_summary_000487_full_chain_sap_matches_worksheet_pdf_exactly() -> None:
+    # Arrange — cert U985-0001-000487 is an enclosed-mid-terrace with
+    # main bp + 1st extension, a 21.03 m² Room-in-Roof, an electric
+    # shower, and a 1.43 m² Timber Frame alternative wall on the
+    # extension. Worksheet PDF lodges unrounded SAP 61.6431. The mapped
+    # chain has to thread the alt-wall U-value cascade (Thickness
+    # Unknown → cascade falls back to age-band default U=1.9 for thin
+    # timber walls) plus the §11 layout variant where the frame_factor
+    # appears unprefixed on its own line (no "PVC"/"Wood" frame_type).
+    pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000487_PDF)
+    site_notes = ElmhurstSiteNotesExtractor(pages).extract()
+    epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
+
+    # Act
+    result = calculate_sap_from_inputs(
+        cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)
+    )
+
+    # Assert
+    worksheet_unrounded_sap = 61.6431
+    assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < 1e-4
+
+
+def test_summary_000516_full_chain_sap_matches_worksheet_pdf_exactly() -> None:
+    # Arrange — cert U985-0001-000516 is a mid-terrace with main bp +
+    # 19.02 m² room-in-roof. Worksheet PDF lodges unrounded SAP 62.7937.
+    # The §11 table mixes 5 vertical windows (U=2.80) with 1 roof
+    # window (U=3.10 in cert, U=3.40 Table 24 raw); the mapper
+    # discriminates by `U > 3.0` and routes the high-U entry to
+    # `sap_roof_windows` so its solar gains feed §6 with the right
+    # pitch (45°) and Table-24 U-value.
+    pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000516_PDF)
+    site_notes = ElmhurstSiteNotesExtractor(pages).extract()
+    epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
+
+    # Act
+    result = calculate_sap_from_inputs(
+        cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)
+    )
+
+    # Assert
+    worksheet_unrounded_sap = 62.7937
+    assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < 1e-4
+
+
+def test_summary_000490_full_chain_sap_matches_worksheet_pdf_exactly() -> None:
+    # Arrange — cert U985-0001-000490 is an end-terrace with main +
+    # 1st extension. The worksheet PDF lodges unrounded SAP 57.3979.
+    # End-terrace built-form drives sheltered_sides=1 (RdSAP §S5) and
+    # the cert's Summary §14.1 Main Heating2 sub-section carries a
+    # secondary heating SAP code (691, electric panel) — both required
+    # for the mapped chain to reproduce the worksheet to 1e-4.
+    pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000490_PDF)
+    site_notes = ElmhurstSiteNotesExtractor(pages).extract()
+    epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
+
+    # Act
+    result = calculate_sap_from_inputs(
+        cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)
+    )
+
+    # Assert
+    worksheet_unrounded_sap = 57.3979
+    assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < 1e-4
+
+
+def test_summary_001479_mapper_extensions_count_matches_extension_bps() -> None:
+    # Arrange — cert 0535-9020-6509-0821-6222 (Summary_001479) is the first
+    # cohort cert with an actual GOV.UK API counterpart. Worksheet PDF
+    # lodges Main + Extension 1 + Extension 2 (3 building parts, 2
+    # extensions). Pre-slice the Elmhurst mapper hard-coded
+    # `extensions_count=0` regardless of survey.extensions; this asserts
+    # the count flows through.
+    pages = _summary_pdf_to_textract_style_pages(_SUMMARY_001479_PDF)
+    site_notes = ElmhurstSiteNotesExtractor(pages).extract()
+
+    # Act
+    epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
+
+    # Assert
+    assert epc.extensions_count == 2
+    assert len(epc.sap_building_parts) == 3
+
+
+def test_summary_001479_main_party_wall_construction_is_cavity_unfilled() -> None:
+    # Arrange — cert 001479 Main §7 Walls lodges "Party Wall Type: CU
+    # Cavity masonry unfilled". The Elmhurst leading-code map previously
+    # only knew "S" and "C"; "CU" fell through to None, which made the
+    # cascade default to U=0.25 instead of the worksheet's lodged U=0.50.
+    # The fix adds "CU" → SAP10 wall_construction code 4 (WALL_CAVITY),
+    # which `u_party_wall` resolves to U=0.50 — matching the worksheet's
+    # §3 `Party walls Main … 0.50` row.
+    pages = _summary_pdf_to_textract_style_pages(_SUMMARY_001479_PDF)
+    site_notes = ElmhurstSiteNotesExtractor(pages).extract()
+
+    # Act
+    epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
+
+    # Assert
+    assert epc.sap_building_parts[0].party_wall_construction == 4
+
+
+def test_summary_001479_ext2_floor_is_exposed_to_external_air() -> None:
+    # Arrange — cert 001479 Ext2 §9 lodges "Location: E To external air"
+    # — a cantilevered exposed timber floor (the upper-storey extension
+    # over the back garden). The worksheet's §3 row `Exposed floor Ext2
+    # … 1.92, 1.20, 1.20` pins this as U=1.20 via Table 20. Pre-slice the
+    # mapper only routed "U Above unheated space" through `is_exposed_
+    # floor=True`; "E To external air" fell through to the BS EN ISO
+    # 13370 ground-floor cascade, dropping the lodged exposure entirely.
+    pages = _summary_pdf_to_textract_style_pages(_SUMMARY_001479_PDF)
+    site_notes = ElmhurstSiteNotesExtractor(pages).extract()
+
+    # Act
+    epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
+
+    # Assert
+    ext2 = epc.sap_building_parts[2]
+    assert ext2.floor_type == "To external air"
+    assert ext2.sap_floor_dimensions[0].is_exposed_floor is True
+
+
+def test_summary_001479_ext2_sloping_ceiling_roof_uninsulated_for_pre_1950() -> None:
+    # Arrange — cert 001479 Ext2 §8 lodges "Type: PS Pitched, sloping
+    # ceiling" + "Insulation Thickness: As Built" + age band C (1930-49).
+    # Original 1930s construction had no sloping-ceiling insulation;
+    # worksheet §3 `External roof Ext2 … 2.30` pins U=2.30 (uninsulated
+    # Table 16 row 0). Pre-slice the mapper passed thickness=None through,
+    # routing to `u_roof`'s pitched-roof Table 18 col 1 default (0.40 for
+    # age C, assumes loft-joist retrofit) — wrong geometry for PS.
+    # Ext1's PS roof at age M leaves thickness=None (modern build,
+    # cascade default U=0.15 matches worksheet).
+    pages = _summary_pdf_to_textract_style_pages(_SUMMARY_001479_PDF)
+    site_notes = ElmhurstSiteNotesExtractor(pages).extract()
+
+    # Act
+    epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
+
+    # Assert
+    assert epc.sap_building_parts[2].roof_insulation_thickness == 0
+    assert epc.sap_building_parts[1].roof_insulation_thickness is None
+
+
+def test_summary_001479_secondary_heating_routes_mains_gas_fuel() -> None:
+    # Arrange — cert 001479 §14.1 Main Heating2 lodges "Secondary Heating
+    # Code: SAP code 605, Flush fitting live effect gas fire, sealed to
+    # chimney". The Summary surfaces only the SAP code (605); the fuel
+    # type 26 (mains gas) must be derived from the code range so the
+    # `_fuel_cost` orchestrator's `secondary_high_rate_gbp_per_kwh`
+    # picks up Table 32's gas tariff (£0.0348/kWh) rather than the
+    # default standard-electricity tariff (£0.132/kWh). Worksheet line
+    # (242) "Space heating - secondary … 3.4800 70.5022" confirms gas
+    # pricing.
+    pages = _summary_pdf_to_textract_style_pages(_SUMMARY_001479_PDF)
+    site_notes = ElmhurstSiteNotesExtractor(pages).extract()
+
+    # Act
+    epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
+
+    # Assert
+    assert epc.sap_heating.secondary_heating_type == 605
+    assert epc.sap_heating.secondary_fuel_type == 26
+
+
+def test_summary_001479_full_chain_sap_matches_worksheet_pdf_exactly() -> None:
+    # Arrange — cert 001479 (Summary_001479.pdf / P960-0001-001479.pdf)
+    # is the first cohort cert with a real GOV.UK EPB API counterpart
+    # (cert ref 0535-9020-6509-0821-6222). Worksheet PDF line "SAP value"
+    # lodges unrounded SAP **69.0094** (rating C 69, also the API-
+    # published integer). This is the load-bearing forcing function for
+    # the API↔Elmhurst parity workstream: any drift from 1e-4 means a
+    # mapper gap, not a calculator bug — the cohort 6 cert cascades all
+    # reproduce Elmhurst exactly at 1e-4 on hand-built fixtures.
+    #
+    # Source-data caveat (documented for future debuggers): Summary §3
+    # lodges Ext1 age band as "M 2023 onwards"; the worksheet header
+    # records "Ext1: L". Likely assessor data-entry inconsistency. The
+    # mapper trusts the Summary (its source of truth); accept whatever
+    # residual the M vs L disagreement produces.
+    pages = _summary_pdf_to_textract_style_pages(_SUMMARY_001479_PDF)
+    site_notes = ElmhurstSiteNotesExtractor(pages).extract()
+    epc = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
+
+    # Act
+    result = calculate_sap_from_inputs(
+        cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)
+    )
+
+    # Assert — 1e-4 pin, no widening, no xfail (project memory
+    # `feedback_zero_error_strict`).
+    worksheet_unrounded_sap = 69.0094
+    assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < 1e-4
+
+
+def test_api_001479_full_chain_sap_matches_worksheet_pdf_exactly() -> None:
+    # Arrange — cert 001479 has both an Elmhurst Summary PDF and a GOV.UK
+    # EPB API JSON (ref 0535-9020-6509-0821-6222). The Summary cascade
+    # already pins at worksheet's 69.0094 ± 1e-4 above; this test is the
+    # Layer 4 production-path gate: API JSON → from_api_response →
+    # cert_to_inputs → calculate_sap_from_inputs must also hit 69.0094
+    # at 1e-4. Identical inputs must produce identical outputs; the
+    # calculator is deterministic, so any drift is a mapper coverage gap.
+    doc = json.loads(_API_001479_JSON.read_text())
+    epc = EpcPropertyDataMapper.from_api_response(doc)
+
+    # Act
+    result = calculate_sap_from_inputs(
+        cert_to_inputs(epc, prices=SAP_10_2_SPEC_PRICES)
+    )
+
+    # Assert — 1e-4 pin against the worksheet's continuous SAP. ±0.5 is
+    # the API-only fallback (project memory `feedback_api_tolerance_1e_
+    # minus_4`); when the worksheet is available, identical-inputs-must-
+    # produce-identical-outputs is the bar.
+    worksheet_unrounded_sap = 69.0094
+    assert abs(result.sap_score_continuous - worksheet_unrounded_sap) < 1e-4
+
+
+# ============================================================================
+# Mapper-vs-hand-built EpcPropertyData diff tests
+# ============================================================================
+# The 6 cohort hand-builts (_elmhurst_worksheet_NNNNNN.build_epc) are the
+# 100%-correct calculator-input ground truth — each cascades to its
+# worksheet PDF's lodged SAP at 1e-4. The chain tests above only assert
+# cascade-output equivalence; the mapper can pass them by producing a
+# *different* EpcPropertyData that happens to cascade to the same number.
+#
+# These tests pin the missing layer: the mapper's EpcPropertyData must
+# match the hand-built's load-bearing fields exactly. Every divergence
+# surfaced here is a mapper coverage gap to close as its own slice.
+#
+# "Load-bearing" = the subset of EpcPropertyData fields that drive the
+# SAP cascade or carry semantic cross-mapper meaning. Cert-metadata
+# fields (address, registration dates, descriptive EnergyElement lists,
+# tariff strings) are excluded because they don't change calculator
+# output and vary by mapper pathway (the API publishes some, the
+# Elmhurst Summary publishes others) without semantic disagreement.
+
+# SapWindow sub-fields the cascade doesn't read (descriptive Union[int,
+# str] codes lodged differently by each mapper). The cascade reads
+# window_width / window_height / orientation / window_location /
+# frame_factor / window_transmission_details.{u_value,solar_
+# transmittance} — those WILL still be diffed; everything else on
+# SapWindow is metadata and excluded to avoid noise from the int/str
+# dual encoding (API mapper produces int codes; Elmhurst mapper
+# surfaces the Summary's lodged strings).
+_NON_LOAD_BEARING_WINDOW_SUBFIELDS: frozenset[str] = frozenset({
+    "frame_material",
+    "glazing_gap",
+    "window_type",
+    "glazing_type",
+    "window_wall_type",
+    "draught_proofed",
+    "permanent_shutters_present",
+    "permanent_shutters_insulated",
+})
+
+
+def _is_excluded_path(path: str) -> bool:
+    """Return True for paths the diff should silently skip — non-cascade-
+    affecting Union[int, str] encoding differences between the API and
+    Elmhurst mapper outputs that cohort hand-built fixtures don't pin."""
+    if path.startswith("sap_windows[") and "]." in path:
+        suffix = path.split("].", 1)[1]
+        if suffix in _NON_LOAD_BEARING_WINDOW_SUBFIELDS:
+            return True
+        if suffix == "window_transmission_details.data_source":
+            return True
+    return False
+
+
+_LOAD_BEARING_FIELDS: tuple[str, ...] = (
+    # Cascade-driving structural fields
+    "sap_building_parts",
+    "sap_windows",
+    "sap_roof_windows",
+    "sap_heating",
+    "sap_ventilation",
+    "sap_energy_source",
+    "total_floor_area_m2",
+    # Building-classification fields driving default cascades
+    "dwelling_type",
+    "built_form",
+    "property_type",
+    "country_code",
+    "postcode",
+    # Counts and openings
+    "door_count",
+    "insulated_door_count",
+    "insulated_door_u_value",
+    "habitable_rooms_count",
+    "heated_rooms_count",
+    "wet_rooms_count",
+    "extensions_count",
+    "open_chimneys_count",
+    "blocked_chimneys_count",
+    "extract_fans_count",
+    # Lighting
+    "cfl_fixed_lighting_bulbs_count",
+    "led_fixed_lighting_bulbs_count",
+    "incandescent_fixed_lighting_bulbs_count",
+    "low_energy_fixed_lighting_bulbs_count",
+    "fixed_lighting_outlets_count",
+    "low_energy_fixed_lighting_outlets_count",
+    # HW / appliances
+    "solar_water_heating",
+    "has_hot_water_cylinder",
+    "has_fixed_air_conditioning",
+    "has_conservatory",
+    "has_heated_separate_conservatory",
+    # Envelope drivers
+    "percent_draughtproofed",
+    "mechanical_ventilation",
+    "pressure_test",
+    # Construction-detail flags
+    "addendum",
+    "lzc_energy_sources",
+    "any_unheated_rooms",
+    "number_of_storeys",
+    "sap_flat_details",
+)
+
+
+def _diff_load_bearing(
+    mapped: object, hand_built: object, path: str = "",
+) -> list[str]:
+    """Recursive field diff; yields one line per leaf divergence between
+    mapped EpcPropertyData and the hand-built fixture. Int/float type
+    differences with the same numeric value are not flagged.
+
+    Strict-pyright posture: arguments typed `object` so each branch
+    narrows via `isinstance` rather than threading `Any` through the
+    recursion (which pyright can't reason about under
+    `strict`/`typeCheckingMode = strict`)."""
+    out: list[str] = []
+    if type(mapped) is not type(hand_built):
+        if not (isinstance(mapped, (int, float)) and isinstance(hand_built, (int, float))):
+            if not _is_excluded_path(path):
+                out.append(
+                    f"{path}: TYPE {type(mapped).__name__} vs "
+                    f"{type(hand_built).__name__}  mapped={mapped!r}  "
+                    f"handbuilt={hand_built!r}"
+                )
+            return out
+    if dataclasses.is_dataclass(mapped) and not isinstance(mapped, type) \
+            and dataclasses.is_dataclass(hand_built) and not isinstance(hand_built, type):
+        for fld in dataclasses.fields(mapped):
+            out.extend(_diff_load_bearing(
+                getattr(mapped, fld.name),
+                getattr(hand_built, fld.name),
+                f"{path}.{fld.name}" if path else fld.name,
+            ))
+        return out
+    if isinstance(mapped, list) and isinstance(hand_built, list):
+        mapped_list = cast("list[object]", mapped)
+        hand_built_list = cast("list[object]", hand_built)
+        if len(mapped_list) != len(hand_built_list):
+            out.append(f"{path}: LEN {len(mapped_list)} vs {len(hand_built_list)}")
+            return out
+        for i, (m_item, h_item) in enumerate(zip(mapped_list, hand_built_list)):
+            out.extend(_diff_load_bearing(m_item, h_item, f"{path}[{i}]"))
+        return out
+    if mapped != hand_built:
+        if not _is_excluded_path(path):
+            out.append(f"{path}: mapped={mapped!r}  handbuilt={hand_built!r}")
+    return out
+
+
+def test_from_elmhurst_site_notes_matches_hand_built_000474() -> None:
+    # Arrange — _elmhurst_worksheet_000474.build_epc() is the canonical
+    # hand-built EpcPropertyData for cert U985-0001-000474; it cascades
+    # to the worksheet PDF's `SAP value 62.2584` at 1e-4 (cohort SAP-
+    # result pin). Routing the corresponding Summary PDF through the
+    # Elmhurst mapper MUST produce a load-bearing-field-equivalent
+    # EpcPropertyData; any divergence is a mapper-coverage gap.
+    #
+    # Tracer-bullet scope: cert 000474 only. Once GREEN, parametrize
+    # over the 5 other cohort fixtures and add cert 001479 (after
+    # `_elmhurst_worksheet_001479` lands at 1e-4 via Slice 62 iteration).
+    pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000474_PDF)
+    site_notes = ElmhurstSiteNotesExtractor(pages).extract()
+    mapped = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
+    hand_built = _w000474.build_epc()
+
+    # Act
+    diffs: list[str] = []
+    for field_name in _LOAD_BEARING_FIELDS:
+        diffs.extend(_diff_load_bearing(
+            getattr(mapped, field_name, None),
+            getattr(hand_built, field_name, None),
+            field_name,
+        ))
+
+    # Assert
+    assert not diffs, (
+        f"{len(diffs)} load-bearing divergence(s) between mapped and "
+        f"hand-built EpcPropertyData for cohort cert 000474:\n  " +
+        "\n  ".join(diffs)
+    )
+
+
+def test_from_elmhurst_site_notes_matches_hand_built_000477() -> None:
+    # Arrange — _elmhurst_worksheet_000477.build_epc() is the canonical
+    # hand-built EpcPropertyData for cert U985-0001-000477 (single-bp
+    # mid-terrace, age band B, RIR with stud walls + party gables, no
+    # extension); it cascades to the worksheet PDF's `SAP value 65.0057`
+    # at 1e-4. Routing the Summary PDF through the Elmhurst mapper MUST
+    # produce a load-bearing-field-equivalent EpcPropertyData; any
+    # divergence is a mapper-coverage gap to close as its own slice.
+    pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000477_PDF)
+    site_notes = ElmhurstSiteNotesExtractor(pages).extract()
+    mapped = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
+    hand_built = _w000477.build_epc()
+
+    # Act
+    diffs: list[str] = []
+    for field_name in _LOAD_BEARING_FIELDS:
+        diffs.extend(_diff_load_bearing(
+            getattr(mapped, field_name, None),
+            getattr(hand_built, field_name, None),
+            field_name,
+        ))
+
+    # Assert
+    assert not diffs, (
+        f"{len(diffs)} load-bearing divergence(s) between mapped and "
+        f"hand-built EpcPropertyData for cohort cert 000477:\n  " +
+        "\n  ".join(diffs)
+    )
+
+
+def test_from_elmhurst_site_notes_matches_hand_built_000480() -> None:
+    # Arrange — _elmhurst_worksheet_000480.build_epc() is the canonical
+    # hand-built EpcPropertyData for cert U985-0001-000480 (mid-terrace
+    # with main + 1 extension + 19.83 m² RIR, gas combi); it cascades
+    # to the worksheet PDF's `SAP value 61.2986` at 1e-4. Routing the
+    # Summary PDF through the Elmhurst mapper MUST produce a load-
+    # bearing-field-equivalent EpcPropertyData; any divergence is a
+    # mapper-coverage gap to close as its own slice.
+    pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000480_PDF)
+    site_notes = ElmhurstSiteNotesExtractor(pages).extract()
+    mapped = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
+    hand_built = _w000480.build_epc()
+
+    # Act
+    diffs: list[str] = []
+    for field_name in _LOAD_BEARING_FIELDS:
+        diffs.extend(_diff_load_bearing(
+            getattr(mapped, field_name, None),
+            getattr(hand_built, field_name, None),
+            field_name,
+        ))
+
+    # Assert
+    assert not diffs, (
+        f"{len(diffs)} load-bearing divergence(s) between mapped and "
+        f"hand-built EpcPropertyData for cohort cert 000480:\n  " +
+        "\n  ".join(diffs)
+    )
+
+
+def test_from_elmhurst_site_notes_matches_hand_built_000487() -> None:
+    # Arrange — _elmhurst_worksheet_000487.build_epc() is the canonical
+    # hand-built EpcPropertyData for cert U985-0001-000487 (Enclosed
+    # Mid-Terrace, main + 1 extension + 21.03 m² RIR with explicit-U
+    # gable_wall_external, gas combi, 1 electric shower, 1.43 m²
+    # timber-frame alt wall on the extension); it cascades to the
+    # worksheet PDF's `SAP value 61.6431` at 1e-4. Routing the Summary
+    # PDF through the Elmhurst mapper MUST produce a load-bearing-
+    # field-equivalent EpcPropertyData; any divergence is a mapper-
+    # coverage gap to close as its own slice.
+    pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000487_PDF)
+    site_notes = ElmhurstSiteNotesExtractor(pages).extract()
+    mapped = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
+    hand_built = _w000487.build_epc()
+
+    # Act
+    diffs: list[str] = []
+    for field_name in _LOAD_BEARING_FIELDS:
+        diffs.extend(_diff_load_bearing(
+            getattr(mapped, field_name, None),
+            getattr(hand_built, field_name, None),
+            field_name,
+        ))
+
+    # Assert
+    assert not diffs, (
+        f"{len(diffs)} load-bearing divergence(s) between mapped and "
+        f"hand-built EpcPropertyData for cohort cert 000487:\n  " +
+        "\n  ".join(diffs)
+    )
+
+
+def test_from_elmhurst_site_notes_matches_hand_built_000490() -> None:
+    # Arrange — _elmhurst_worksheet_000490.build_epc() is the canonical
+    # hand-built EpcPropertyData for cert U985-0001-000490 (End-Terrace,
+    # main + 1 extension, gas combi + gas-secondary; sheltered_sides=1
+    # per RdSAP §S5); it cascades to the worksheet PDF's `SAP value
+    # 57.3979` at 1e-4. Routing the Summary PDF through the Elmhurst
+    # mapper MUST produce a load-bearing-field-equivalent
+    # EpcPropertyData; any divergence is a mapper-coverage gap.
+    pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000490_PDF)
+    site_notes = ElmhurstSiteNotesExtractor(pages).extract()
+    mapped = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
+    hand_built = _w000490.build_epc()
+
+    # Act
+    diffs: list[str] = []
+    for field_name in _LOAD_BEARING_FIELDS:
+        diffs.extend(_diff_load_bearing(
+            getattr(mapped, field_name, None),
+            getattr(hand_built, field_name, None),
+            field_name,
+        ))
+
+    # Assert
+    assert not diffs, (
+        f"{len(diffs)} load-bearing divergence(s) between mapped and "
+        f"hand-built EpcPropertyData for cohort cert 000490:\n  " +
+        "\n  ".join(diffs)
+    )
+
+
+def test_from_elmhurst_site_notes_matches_hand_built_000516() -> None:
+    # Arrange — _elmhurst_worksheet_000516.build_epc() is the canonical
+    # hand-built EpcPropertyData for cert U985-0001-000516 (Mid-Terrace,
+    # main + 19.02 m² RIR, 5 vertical windows + 1 roof window which the
+    # mapper routes to `sap_roof_windows` per `U > 3.0` discrimination);
+    # it cascades to the worksheet PDF's `SAP value 62.7937` at 1e-4.
+    # Routing the Summary PDF through the Elmhurst mapper MUST produce
+    # a load-bearing-field-equivalent EpcPropertyData.
+    pages = _summary_pdf_to_textract_style_pages(_SUMMARY_000516_PDF)
+    site_notes = ElmhurstSiteNotesExtractor(pages).extract()
+    mapped = EpcPropertyDataMapper.from_elmhurst_site_notes(site_notes)
+    hand_built = _w000516.build_epc()
+
+    # Act
+    diffs: list[str] = []
+    for field_name in _LOAD_BEARING_FIELDS:
+        diffs.extend(_diff_load_bearing(
+            getattr(mapped, field_name, None),
+            getattr(hand_built, field_name, None),
+            field_name,
+        ))
+
+    # Assert
+    assert not diffs, (
+        f"{len(diffs)} load-bearing divergence(s) between mapped and "
+        f"hand-built EpcPropertyData for cohort cert 000516:\n  " +
+        "\n  ".join(diffs)
+    )
--- a/backend/epc_client/epc_client_service.py
+++ b/backend/epc_client/epc_client_service.py
@ -47,8 +47,14 @@ class EpcClientService:
        latest = max(results, key=lambda r: r.registration_date)
        return self.get_by_certificate_number(latest.certificate_number)

+    @staticmethod
+    def _normalise_postcode(postcode: str) -> str:
+        """Return the postcode with all spaces removed and uppercased."""
+        return postcode.replace(" ", "").upper()
+
    def search_by_postcode(self, postcode: str) -> list[EpcSearchResult]:
-        return call_with_retry(lambda: self._search(postcode=postcode))
+        normalised = self._normalise_postcode(postcode)
+        return call_with_retry(lambda: self._search(postcode=normalised))

    # ------------------------------------------------------------------
    # Private helperEpcRateLimpolarss
--- a/backend/magic_plan/magic_plan_service.py
+++ b/backend/magic_plan/magic_plan_service.py
@ -1,7 +1,7 @@
 import gzip
 import json
 from datetime import datetime, timezone
-from typing import Optional
+from typing import Optional, cast

 from datatypes.magicplan.api.response import MagicPlanPlan, PlanSummary
 from datatypes.magicplan.domain.mapper import map_plan
@ -55,8 +55,9 @@ class MagicPlanService:
        )

        with db_session() as session:
-            save_plan(session, plan)
            session.add(uploaded_file)
+            session.flush()
+            save_plan(session, plan, cast(int, uploaded_file.id))

        return plan

--- a/backend/magic_plan/tests/test_magic_plan_service.py
+++ b/backend/magic_plan/tests/test_magic_plan_service.py
@ -271,3 +271,38 @@ def test_run_creates_uploaded_file_record(
    assert uploaded_file.s3_upload_timestamp is not None
    assert uploaded_file.uprn == 100023336956
    assert uploaded_file.hubspot_deal_id == "deal-789"
+
+
+def test_run_passes_flushed_uploaded_file_id_to_save_plan(
+    mock_client: MagicMock,
+    plan_summary: PlanSummary,
+) -> None:
+    # Arrange
+    mock_client.get_plans.return_value = [plan_summary]
+    service = _make_service(mock_client)
+    mock_session = MagicMock()
+    added_objects: list = []
+
+    mock_session.add.side_effect = added_objects.append
+
+    def simulate_flush() -> None:
+        for obj in added_objects:
+            if isinstance(obj, UploadedFile):
+                obj.id = 42
+
+    mock_session.flush.side_effect = simulate_flush
+
+    with patch(
+        "backend.magic_plan.magic_plan_service.find_matching_plan",
+        return_value=plan_summary,
+    ), patch("backend.magic_plan.magic_plan_service.save_plan") as mock_save, patch(
+        "backend.magic_plan.magic_plan_service.db_session"
+    ) as mock_db, patch(
+        "backend.magic_plan.magic_plan_service.save_data_to_s3"
+    ):
+        mock_db.return_value.__enter__.return_value = mock_session
+        # Act
+        service.run(_make_request())
+
+    # Assert
+    assert mock_save.call_args[0][2] == 42
--- a/backend/pashub_fetcher/core_files.py
+++ b/backend/pashub_fetcher/core_files.py
@ -14,9 +14,12 @@ class CoreFiles(Enum):
    PAR_PHOTOPACK = "PAR Photo Pack"
    PAS2023_PROPERTY = "PAS 2023 Property Assessment Report"
    PAS2023_OCCUPANCY = "PAS 2023 Occupancy Assessment Report"
+    IMPROVEMENT_OPTION_EVALUATION = "Improvement Option Evaluation"
+    MEDIUM_TERM_IMPROVEMENT_PLAN = "Medium Term Improvement Plan"
+    RETROFIT_DESIGN_DOC = "Retrofit Design Doc"


-CORE_TO_FILETYPE_MAP = {
+_CORE_FILE_TO_FILE_TYPE: dict[CoreFiles, str] = {
    CoreFiles.PHOTOPACK: FileTypeEnum.PHOTO_PACK.value,
    CoreFiles.SITENOTE: FileTypeEnum.SITE_NOTE.value,
    CoreFiles.RDSAP_SITENOTE: FileTypeEnum.RD_SAP_SITE_NOTE.value,
@ -26,11 +29,49 @@ CORE_TO_FILETYPE_MAP = {
    CoreFiles.PAR_PHOTOPACK: FileTypeEnum.PAR_PHOTO_PACK.value,
    CoreFiles.PAS2023_PROPERTY: FileTypeEnum.PAS_2023_PROPERTY.value,
    CoreFiles.PAS2023_OCCUPANCY: FileTypeEnum.PAS_2023_OCCUPANCY.value,
+    CoreFiles.IMPROVEMENT_OPTION_EVALUATION: FileTypeEnum.IMPROVEMENT_OPTION_EVALUATION.value,
+    CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN: FileTypeEnum.MEDIUM_TERM_IMPROVEMENT_PLAN.value,
+    CoreFiles.RETROFIT_DESIGN_DOC: FileTypeEnum.RETROFIT_DESIGN_DOC.value,
 }


-def infer_file_type(filename: str) -> Optional[str]:
-    for core_file, file_type in CORE_TO_FILETYPE_MAP.items():
+def get_core_file_type(
+    filename: str, evidence_category: Optional[str] = None
+) -> Optional[CoreFiles]:
+    # Identify retrofit design doc using evidence category as the name is possibly unreliable.
+    # We might change to always use evidence category, but needs more investigation
+    if evidence_category is not None and evidence_category.lower() == "retrofit design":
+        return CoreFiles.RETROFIT_DESIGN_DOC
+
+    if CoreFiles.IMPROVEMENT_OPTION_EVALUATION.value in filename:
+        return CoreFiles.IMPROVEMENT_OPTION_EVALUATION
+
+    if CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN.value in filename:
+        return CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN
+
+    if evidence_category is None and "-OSM-" in filename and "DR-N-A" in filename:
+        return CoreFiles.RETROFIT_DESIGN_DOC
+
+    _prefix_skip = {
+        CoreFiles.RETROFIT_DESIGN_DOC,
+        CoreFiles.IMPROVEMENT_OPTION_EVALUATION,
+        CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN,
+    }
+
+    for core_file in CoreFiles:
+        if core_file in _prefix_skip:
+            continue
+
        if filename.startswith(core_file.value):
-            return file_type
+            return core_file
+
    return None
+
+
+def get_file_type_string(filename: str) -> Optional[str]:
+    core_file: Optional[CoreFiles] = get_core_file_type(filename)
+
+    if core_file is None:
+        return None
+
+    return _CORE_FILE_TO_FILE_TYPE[core_file]
--- a/backend/pashub_fetcher/handler/handler.py
+++ b/backend/pashub_fetcher/handler/handler.py
@ -1,9 +1,11 @@
-from typing import Any, Dict, List
+from typing import Any, Callable, Dict, List, Optional

 from backend.app.config import get_settings
-from backend.pashub_fetcher.pashub_client import PashubClient, UnauthorizedError
+from backend.pashub_fetcher.pashub_client import PashubClient
 from backend.pashub_fetcher.pashub_service import PashubService
-from backend.pashub_fetcher.pashub_to_ara_trigger_request import PashubToAraTriggerRequest
+from backend.pashub_fetcher.pashub_to_ara_trigger_request import (
+    PashubToAraTriggerRequest,
+)
 from backend.pashub_fetcher.token_getter import get_token_from_local_storage
 from backend.app.db.models.tasks import SourceEnum
 from backend.utils.subtasks import task_handler
@ -28,38 +30,41 @@ def handler(body: Dict[str, Any], context: Any) -> List[str]:

    settings = get_settings()

-    pas_hub_email = settings.PASHUB_EMAIL
-    pas_hub_password = settings.PASHUB_PASSWORD
+    pashub_email = settings.PASHUB_EMAIL
+    pashub_password = settings.PASHUB_PASSWORD

-    if (not pas_hub_email) or (not pas_hub_password):
+    coordination_hub_email = settings.PASHUB_COORDINATION_EMAIL
+    coordination_hub_password = settings.PASHUB_COORDINATION_PASSWORD
+    coordination_client_factory: Optional[Callable[[], PashubClient]] = None
+
+    if (not pashub_email) or (not pashub_password):
        raise ValueError("Pas Hub credentials not provided")

    sharepoint_client = DomnaSharepointClient(
        sharepoint_location=DomnaSites.SOCIAL_HOUSING_WAVE_3
    )

+    if coordination_hub_email and coordination_hub_password:
+        _coord_email, _coord_password = (
+            coordination_hub_email,
+            coordination_hub_password,
+        )
+        coordination_client_factory = lambda: get_pashub_client(
+            _coord_email, _coord_password
+        )
+
    logger.debug("Validating request body")
    payload = PashubToAraTriggerRequest.model_validate(body)
    logger.debug("Successfully validated request body")

    service = PashubService(
-        pashub_client=get_pashub_client(pas_hub_email, pas_hub_password),
+        pashub_client=get_pashub_client(pashub_email, pashub_password),
        sharepoint_client=sharepoint_client,
        s3_bucket=S3_BUCKET,
+        coordination_client_factory=coordination_client_factory,
    )

-    try:
-        files: List[str] = service.run(payload)
-    except UnauthorizedError:
-        logger.warning("Token expired - refreshing")
-
-        service = PashubService(
-            pashub_client=get_pashub_client(pas_hub_email, pas_hub_password),
-            sharepoint_client=sharepoint_client,
-            s3_bucket=S3_BUCKET,
-        )
-
-        files = service.run(payload)
+    files: List[str] = service.run(payload)

    logger.info(f"Saved {len(files)} files")

--- a/backend/pashub_fetcher/pashub_client.py
+++ b/backend/pashub_fetcher/pashub_client.py
@ -5,12 +5,11 @@ from datetime import datetime

 import requests

-from backend.pashub_fetcher.core_files import CoreFiles
+from backend.pashub_fetcher.core_files import CoreFiles, get_core_file_type
 from backend.pashub_fetcher.evidence_file_data import EvidenceFileData
 from backend.pashub_fetcher.evidence_metadata import EvidenceMetadata
 from utils.logger import setup_logger

-
 logger = setup_logger()


@ -75,6 +74,10 @@ class PashubClient:
        logger.info(f"Getting UPRN for job ID {job_id}")
        url = f"{self.base}/jobs/{job_id}"

+        logger.debug(
+            f"About to make API request with session headers: {self.session.headers}"
+        )
+
        r = self.session.get(url)
        if r.status_code == 401:
            raise UnauthorizedError("Token expired or invalid")
@ -83,15 +86,12 @@ class PashubClient:

        try:
            return r.json()["uprn"]
-        except Exception:
+        except Exception as e:
+            logger.warning(
+                f"Failed to get UPRN for Job ID {job_id} with exception: {e}"
+            )
            return None

-    def _get_core_file_type(self, file: EvidenceFileData) -> Optional[CoreFiles]:
-        for core_file in CoreFiles:
-            if file.file_name.startswith(core_file.value):
-                return core_file
-        return None
-
    def _select_latest_core_files(
        self,
        files: List[EvidenceFileData],
@ -99,7 +99,9 @@ class PashubClient:
        grouped: Dict[CoreFiles, List[EvidenceFileData]] = defaultdict(list)

        for file in files:
-            core_type = self._get_core_file_type(file)
+            core_type: Optional[CoreFiles] = get_core_file_type(
+                file.file_name, file.evidence_category
+            )
            if not core_type:
                continue
            grouped[core_type].append(file)
@ -107,6 +109,9 @@ class PashubClient:
        latest_files: Dict[CoreFiles, EvidenceFileData] = {}

        for core_type, group in grouped.items():
+            if core_type == CoreFiles.RETROFIT_DESIGN_DOC and len(group) > 1:
+                osm_candidates = [f for f in group if "-OSM-" in f.file_name]
+                group = osm_candidates if osm_candidates else group
            latest = max(group, key=lambda f: datetime.fromisoformat(f.created_utc))
            latest_files[core_type] = latest

--- a/backend/pashub_fetcher/pashub_service.py
+++ b/backend/pashub_fetcher/pashub_service.py
@ -1,6 +1,6 @@
 import os
 from datetime import datetime, timezone
-from typing import List, NamedTuple, Optional, cast
+from typing import Callable, List, NamedTuple, Optional, cast

 from backend.app.db.connection import db_session
 from backend.app.db.models.uploaded_file import (
@ -10,8 +10,8 @@ from backend.app.db.models.uploaded_file import (
 )
 from backend.documents_parser.db_writer import save_epc_property_data
 from backend.documents_parser.parser import parse_site_notes_pdf
-from backend.pashub_fetcher.core_files import infer_file_type
-from backend.pashub_fetcher.pashub_client import PashubClient
+from backend.pashub_fetcher.core_files import get_file_type_string
+from backend.pashub_fetcher.pashub_client import PashubClient, UnauthorizedError
 from backend.pashub_fetcher.pashub_to_ara_trigger_request import (
    PashubToAraTriggerRequest,
 )
@ -36,17 +36,37 @@ class PashubService:
        pashub_client: PashubClient,
        sharepoint_client: DomnaSharepointClient,
        s3_bucket: str,
+        coordination_client_factory: Optional[Callable[[], PashubClient]] = None,
    ) -> None:
        self._pashub_client = pashub_client
        self._sharepoint_client = sharepoint_client
        self._s3_bucket = s3_bucket
+        self._coordination_client_factory = coordination_client_factory
+        self._coordination_client: Optional[PashubClient] = None
+
+    def _get_coordination_client(self) -> PashubClient:
+        if self._coordination_client_factory is None:
+            raise UnauthorizedError("No coordination client factory configured")
+        if self._coordination_client is None:
+            self._coordination_client = self._coordination_client_factory()
+        return self._coordination_client

    def run(self, request: PashubToAraTriggerRequest) -> List[str]:
        job_id = request.pashub_job_id
+        active_client = self._pashub_client
+
+        if request.uprn:
+            uprn: Optional[str] = request.uprn
+        else:
+            try:
+                uprn = active_client.get_uprn_by_job_id(job_id)
+            except UnauthorizedError:
+                logger.info(
+                    f"PasHub credentials unauthorized for job {job_id}; retrying with CoordinationHub credentials"
+                )
+                active_client = self._get_coordination_client()
+                uprn = active_client.get_uprn_by_job_id(job_id)

-        uprn: Optional[str] = request.uprn or self._pashub_client.get_uprn_by_job_id(
-            job_id
-        )
        hubspot_deal_id: Optional[str] = request.hubspot_deal_id

        if uprn:
@ -54,14 +74,25 @@ class PashubService:
        else:
            logger.info(f"No UPRN found for job {job_id}")

-        job_files: List[str] = self._pashub_client.get_core_evidence_files_by_job_id(
-            job_id
-        )
+        try:
+            job_files: List[str] = active_client.get_core_evidence_files_by_job_id(
+                job_id
+            )
+        except UnauthorizedError:
+            if active_client is not self._pashub_client:
+                raise
+            active_client = self._get_coordination_client()
+            job_files = active_client.get_core_evidence_files_by_job_id(job_id)

        if uprn or hubspot_deal_id:
            logger.info("Uploading files to s3")
+            file_source = (
+                FileSourceEnum.PAS_HUB
+                if active_client is self._pashub_client
+                else FileSourceEnum.COORDINATION_HUB
+            )
            upload_records = self._upload_to_s3_and_update_db(
-                job_files, uprn, hubspot_deal_id
+                job_files, uprn, hubspot_deal_id, file_source
            )
            self._save_site_notes(upload_records)

@ -83,6 +114,7 @@ class PashubService:
        job_files: List[str],
        uprn: Optional[str],
        hubspot_deal_id: Optional[str],
+        file_source: FileSourceEnum,
    ) -> List[_FileUploadRecord]:
        if not uprn and not hubspot_deal_id:
            return []
@ -108,8 +140,8 @@ class PashubService:
                s3_upload_timestamp=datetime.now(timezone.utc),
                uprn=int(uprn) if uprn else None,
                hubspot_deal_id=hubspot_deal_id,
-                file_source=FileSourceEnum.PAS_HUB.value,
-                file_type=infer_file_type(filename),
+                file_source=file_source.value,
+                file_type=get_file_type_string(filename),
            )
            file_paths.append(file_path)
            uploaded_files.append(uploaded_file)
--- a/backend/pashub_fetcher/pashub_to_ara_trigger_request.py
+++ b/backend/pashub_fetcher/pashub_to_ara_trigger_request.py
@ -1,11 +1,10 @@
+import re
 from typing import Optional
 from pydantic import BaseModel


 class PashubToAraTriggerRequest(BaseModel):
-    pashub_link: (
-        str  # e.g. https://pashub.net/jobs/12345-abcd-1234-abcd-12345abcde/details
-    )
+    pashub_link: str  # e.g. https://pashub.net/jobs/{id}/details, /jobs/{id}/evidence/view, /jobs/{id}

    address: Optional[str] = None
    sharepoint_link: Optional[str] = None
@ -17,4 +16,7 @@ class PashubToAraTriggerRequest(BaseModel):

    @property
    def pashub_job_id(self) -> str:
-        return self.pashub_link.split("/")[-2]
+        match = re.search(r"/jobs/([^/]+)", self.pashub_link)
+        if not match:
+            raise ValueError(f"No job ID found in PasHub link: {self.pashub_link}")
+        return match.group(1)
--- a/backend/pashub_fetcher/tests/test_core_files.py
+++ b/backend/pashub_fetcher/tests/test_core_files.py
@ -0,0 +1,185 @@
+from backend.pashub_fetcher.core_files import (
+    CoreFiles,
+    get_core_file_type,
+    get_file_type_string,
+)
+
+
+def test_file_type_for_photopack():
+    assert get_file_type_string("Photopack_123456_V1.pdf") == "photo_pack"
+
+
+def test_file_type_for_sitenote():
+    assert get_file_type_string("SiteNote_123456_V1.pdf") == "site_note"
+
+
+def test_file_type_for_rdsap_sitenote():
+    assert (
+        get_file_type_string("RdSAP_SiteNote_9510890_V1_Assessmet.pdf")
+        == "rd_sap_site_note"
+    )
+
+
+def test_file_type_for_pas2023_ventilation():
+    assert (
+        get_file_type_string("PAS 2023 Ventilation Assessment Report_123456.pdf")
+        == "pas_2023_ventilation"
+    )
+
+
+def test_file_type_for_pas2023_condition():
+    assert (
+        get_file_type_string("PAS 2023 Condition Report_123456.pdf")
+        == "pas_2023_condition"
+    )
+
+
+def test_file_type_for_pas_significance():
+    assert get_file_type_string("PAS Significance_123456.pdf") == "pas_significance"
+
+
+def test_file_type_for_par_photopack():
+    assert (
+        get_file_type_string("PAR Photo Pack_95101890_V2_Assessment.pdf")
+        == "par_photo_pack"
+    )
+
+
+def test_file_type_for_pas2023_property():
+    assert (
+        get_file_type_string("PAS 2023 Property Assessment Report_123456.pdf")
+        == "pas_2023_property"
+    )
+
+
+def test_file_type_for_pas2023_occupancy():
+    assert (
+        get_file_type_string("PAS 2023 Occupancy Assessment Report_123456.pdf")
+        == "pas_2023_occupancy"
+    )
+
+
+def test_file_type_for_improvement_option_evaluation():
+    # filename: "{job_id} - {postcode} - Improvement Option Evaluation.pdf"
+    assert (
+        get_file_type_string("6000802 - NG4 4HD - Improvement Option Evaluation.pdf")
+        == "improvement_option_evaluation"
+    )
+
+
+def test_file_type_for_medium_term_improvement_plan():
+    # filename: "{job_id} - {postcode} - Medium Term Improvement Plan IOE.pdf"
+    assert (
+        get_file_type_string(
+            "60800802 - NG4 4HD - Medium Term Improvement Plan IOE.pdf"
+        )
+        == "medium_term_improvement_plan"
+    )
+
+
+def test_file_type_for_retrofit_design_doc():
+    assert (
+        get_file_type_string("2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf")
+        == "retrofit_design_doc"
+    )
+    assert (
+        get_file_type_string("2603-OSM-B06M901-XX-DR-N-A_Alvaston Walk 022.pdf")
+        == "retrofit_design_doc"
+    )
+
+
+# ---------------------------------------------------------------------------
+# core_file_for
+# ---------------------------------------------------------------------------
+
+
+def test_core_file_for_evidence_category_match_is_case_insensitive() -> None:
+    # Arrange
+    filename = "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf"
+
+    # Act
+    result = get_core_file_type(filename, evidence_category="Retrofit Design")
+
+    # Assert
+    assert result == CoreFiles.RETROFIT_DESIGN_DOC
+
+
+def test_core_file_for_evidence_category_returns_retrofit_design_doc() -> None:
+    # Arrange
+    filename = "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf"
+
+    # Act
+    result = get_core_file_type(filename, evidence_category="retrofit design")
+
+    # Assert
+    assert result == CoreFiles.RETROFIT_DESIGN_DOC
+
+
+def test_core_file_for_ioe_substring_returns_improvement_option_evaluation() -> None:
+    # Arrange
+    filename = "6000802 - NG4 4HD - Improvement Option Evaluation.pdf"
+
+    # Act
+    result = get_core_file_type(filename)
+
+    # Assert
+    assert result == CoreFiles.IMPROVEMENT_OPTION_EVALUATION
+
+
+def test_core_file_for_mtip_substring_returns_medium_term_improvement_plan() -> None:
+    # Arrange
+    filename = "60800802 - NG4 4HD - Medium Term Improvement Plan IOE.pdf"
+
+    # Act
+    result = get_core_file_type(filename)
+
+    # Assert
+    assert result == CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN
+
+
+def test_core_file_for_osm_pattern_returns_retrofit_design_doc_without_evidence_category() -> (
+    None
+):
+    # Arrange
+    filename = "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf"
+
+    # Act
+    result = get_core_file_type(filename)
+
+    # Assert
+    assert result == CoreFiles.RETROFIT_DESIGN_DOC
+
+
+def test_core_file_for_prefix_returns_photopack() -> None:
+    # Arrange
+    filename = "Photopack_123456_V1.pdf"
+
+    # Act
+    result = get_core_file_type(filename)
+
+    # Assert
+    assert result == CoreFiles.PHOTOPACK
+
+
+def test_core_file_for_unknown_filename_returns_none() -> None:
+    # Arrange
+    filename = "unknown_document_123.pdf"
+
+    # Act
+    result = get_core_file_type(filename)
+
+    # Assert
+    assert result is None
+
+
+def test_core_file_for_osm_fallback_does_not_fire_when_evidence_category_present() -> (
+    None
+):
+    # Arrange — OSM+DR-N-A filename but evidence_category is something other than retrofit design
+    filename = "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf"
+
+    # Act
+    result = get_core_file_type(filename, evidence_category="some other category")
+
+    # Assert
+    assert result is None
--- a/backend/pashub_fetcher/tests/test_pashub_client.py
+++ b/backend/pashub_fetcher/tests/test_pashub_client.py
@ -0,0 +1,117 @@
+# pyright: reportPrivateUsage=false
+from typing import Optional
+
+from backend.pashub_fetcher.core_files import CoreFiles
+from backend.pashub_fetcher.evidence_file_data import EvidenceFileData
+from backend.pashub_fetcher.pashub_client import PashubClient
+
+
+def make_client() -> PashubClient:
+    return PashubClient(token="test-token")
+
+
+def make_file(
+    file_name: str = "unknown.pdf",
+    evidence_category: Optional[str] = None,
+    created_utc: str = "2024-01-01T00:00:00",
+) -> EvidenceFileData:
+    return EvidenceFileData(
+        file_id="id-1",
+        file_name=file_name,
+        created_utc=created_utc,
+        file_size=1024,
+        file_extension="pdf",
+        evidence_category=evidence_category,
+    )
+
+
+# ---------------------------------------------------------------------------
+# _select_latest_core_files
+# ---------------------------------------------------------------------------
+
+
+def test_select_latest_core_files_returns_single_retrofit_design_doc() -> None:
+    # Arrange
+    client = make_client()
+    files = [
+        make_file(
+            file_name="2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf",
+            evidence_category="retrofit design",
+            created_utc="2024-06-01T00:00:00",
+        )
+    ]
+
+    # Act
+    result = client._select_latest_core_files(files)
+
+    # Assert
+    assert result[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf"
+
+
+def test_select_latest_core_files_osm_candidate_wins_over_non_osm() -> None:
+    # Arrange - the non-OSM file is newer but should lose to the OSM file
+    client = make_client()
+    files = [
+        make_file(
+            file_name="2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf",
+            evidence_category="retrofit design",
+            created_utc="2024-01-01T00:00:00",
+        ),
+        make_file(
+            file_name="Retrofit Design Doc non-osm variant.pdf",
+            evidence_category="retrofit design",
+            created_utc="2024-06-01T00:00:00",
+        ),
+    ]
+
+    # Act
+    result = client._select_latest_core_files(files)
+
+    # Assert
+    assert result[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf"
+
+
+def test_select_latest_core_files_picks_latest_when_both_candidates_have_osm() -> None:
+    # Arrange
+    client = make_client()
+    files = [
+        make_file(
+            file_name="2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf",
+            evidence_category="retrofit design",
+            created_utc="2024-01-01T00:00:00",
+        ),
+        make_file(
+            file_name="2603-OSM-B06M901-XX-DR-N-A_Alvaston Walk 022.pdf",
+            evidence_category="retrofit design",
+            created_utc="2024-06-01T00:00:00",
+        ),
+    ]
+
+    # Act
+    result = client._select_latest_core_files(files)
+
+    # Assert
+    assert result[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "2603-OSM-B06M901-XX-DR-N-A_Alvaston Walk 022.pdf"
+
+
+def test_select_latest_core_files_falls_back_to_latest_when_no_osm_candidates() -> None:
+    # Arrange
+    client = make_client()
+    files = [
+        make_file(
+            file_name="retrofit_design_v1.pdf",
+            evidence_category="retrofit design",
+            created_utc="2024-01-01T00:00:00",
+        ),
+        make_file(
+            file_name="retrofit_design_v2.pdf",
+            evidence_category="retrofit design",
+            created_utc="2024-06-01T00:00:00",
+        ),
+    ]
+
+    # Act
+    result = client._select_latest_core_files(files)
+
+    # Assert
+    assert result[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "retrofit_design_v2.pdf"
--- a/backend/pashub_fetcher/tests/test_pashub_service.py
+++ b/backend/pashub_fetcher/tests/test_pashub_service.py
@ -1,8 +1,10 @@
-from typing import Optional
+import pytest
+from typing import Any, Callable, Optional
 from unittest.mock import MagicMock, call, patch


-from backend.pashub_fetcher.pashub_client import PashubClient
+from backend.app.db.models.uploaded_file import FileSourceEnum
+from backend.pashub_fetcher.pashub_client import PashubClient, UnauthorizedError
 from backend.pashub_fetcher.pashub_service import PashubService
 from backend.pashub_fetcher.pashub_to_ara_trigger_request import (
    PashubToAraTriggerRequest,
@ -31,11 +33,13 @@ def make_service(
    pashub_client: Optional[PashubClient] = None,
    sharepoint_client: Optional[DomnaSharepointClient] = None,
    s3_bucket: str = "test-bucket",
+    coordination_client_factory: Optional[Callable[[], PashubClient]] = None,
 ) -> PashubService:
    return PashubService(
        pashub_client=pashub_client or MagicMock(spec=PashubClient),
        sharepoint_client=sharepoint_client or MagicMock(spec=DomnaSharepointClient),
        s3_bucket=s3_bucket,
+        coordination_client_factory=coordination_client_factory,
    )


@ -144,10 +148,11 @@ def test_run_persists_uploaded_file_records_to_db() -> None:
        service.run(make_request(uprn="12345"))

    fake_session.add_all.assert_called_once()
-    added: list = fake_session.add_all.call_args[0][0]
+    added: list[Any] = fake_session.add_all.call_args[0][0]
    assert len(added) == 1
    assert added[0].s3_file_bucket == "test-bucket"
    assert added[0].uprn == 12345
+    assert added[0].file_source == FileSourceEnum.PAS_HUB.value


 # ---------------------------------------------------------------------------
@ -225,6 +230,135 @@ def test_run_parses_and_saves_site_notes_for_rd_sap_site_note_file() -> None:
 # ---------------------------------------------------------------------------


+# ---------------------------------------------------------------------------
+# run(): coordination fallback
+# ---------------------------------------------------------------------------
+
+
+def test_run_uses_coordination_client_when_pas_401_on_uprn_lookup() -> None:
+    pas_client = MagicMock(spec=PashubClient)
+    pas_client.get_uprn_by_job_id.side_effect = UnauthorizedError()
+
+    coord_client = MagicMock(spec=PashubClient)
+    coord_client.get_uprn_by_job_id.return_value = "99999"
+    coord_client.get_core_evidence_files_by_job_id.return_value = ["/tmp/a.pdf"]
+
+    factory = MagicMock(return_value=coord_client)
+
+    service = make_service(pashub_client=pas_client, coordination_client_factory=factory)
+
+    with (
+        patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3"),
+        patch("backend.pashub_fetcher.pashub_service.db_session"),
+        patch("backend.pashub_fetcher.pashub_service.os.remove"),
+    ):
+        result = service.run(make_request())
+
+    assert result == ["/tmp/a.pdf"]
+    coord_client.get_uprn_by_job_id.assert_called_once()
+    coord_client.get_core_evidence_files_by_job_id.assert_called_once()
+    assert factory.call_count == 1
+
+
+def test_run_uses_coordination_client_when_pas_401_on_file_listing() -> None:
+    pas_client = MagicMock(spec=PashubClient)
+    pas_client.get_core_evidence_files_by_job_id.side_effect = UnauthorizedError()
+
+    coord_client = MagicMock(spec=PashubClient)
+    coord_client.get_core_evidence_files_by_job_id.return_value = ["/tmp/a.pdf"]
+
+    factory = MagicMock(return_value=coord_client)
+
+    service = make_service(pashub_client=pas_client, coordination_client_factory=factory)
+
+    with (
+        patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3"),
+        patch("backend.pashub_fetcher.pashub_service.db_session"),
+        patch("backend.pashub_fetcher.pashub_service.os.remove"),
+    ):
+        result = service.run(make_request(uprn="12345"))
+
+    assert result == ["/tmp/a.pdf"]
+    coord_client.get_core_evidence_files_by_job_id.assert_called_once()
+    pas_client.get_uprn_by_job_id.assert_not_called()
+
+
+def test_run_raises_unauthorized_when_pas_401_and_no_factory() -> None:
+    pas_client = MagicMock(spec=PashubClient)
+    pas_client.get_uprn_by_job_id.side_effect = UnauthorizedError()
+
+    service = make_service(pashub_client=pas_client)
+
+    with pytest.raises(UnauthorizedError):
+        service.run(make_request())
+
+
+def test_run_raises_unauthorized_when_both_clients_401() -> None:
+    pas_client = MagicMock(spec=PashubClient)
+    pas_client.get_uprn_by_job_id.side_effect = UnauthorizedError()
+
+    coord_client = MagicMock(spec=PashubClient)
+    coord_client.get_uprn_by_job_id.side_effect = UnauthorizedError()
+
+    factory = MagicMock(return_value=coord_client)
+
+    service = make_service(pashub_client=pas_client, coordination_client_factory=factory)
+
+    with pytest.raises(UnauthorizedError):
+        service.run(make_request())
+
+
+def test_run_persists_coordination_hub_file_source_when_pas_401_on_uprn_lookup() -> None:
+    pas_client = MagicMock(spec=PashubClient)
+    pas_client.get_uprn_by_job_id.side_effect = UnauthorizedError()
+
+    coord_client = MagicMock(spec=PashubClient)
+    coord_client.get_uprn_by_job_id.return_value = "99999"
+    coord_client.get_core_evidence_files_by_job_id.return_value = ["/tmp/a.pdf"]
+
+    factory = MagicMock(return_value=coord_client)
+    fake_session = MagicMock()
+
+    service = make_service(pashub_client=pas_client, coordination_client_factory=factory)
+
+    with (
+        patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3"),
+        patch("backend.pashub_fetcher.pashub_service.db_session") as mock_db,
+        patch("backend.pashub_fetcher.pashub_service.os.remove"),
+    ):
+        mock_db.return_value.__enter__.return_value = fake_session
+        service.run(make_request())
+
+    fake_session.add_all.assert_called_once()
+    added: list[Any] = fake_session.add_all.call_args[0][0]
+    assert added[0].file_source == FileSourceEnum.COORDINATION_HUB.value
+
+
+def test_run_persists_coordination_hub_file_source_when_pas_401_on_file_listing() -> None:
+    pas_client = MagicMock(spec=PashubClient)
+    pas_client.get_core_evidence_files_by_job_id.side_effect = UnauthorizedError()
+
+    coord_client = MagicMock(spec=PashubClient)
+    coord_client.get_core_evidence_files_by_job_id.return_value = ["/tmp/a.pdf"]
+
+    factory = MagicMock(return_value=coord_client)
+    fake_session = MagicMock()
+
+    service = make_service(pashub_client=pas_client, coordination_client_factory=factory)
+
+    with (
+        patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3"),
+        patch("backend.pashub_fetcher.pashub_service.db_session") as mock_db,
+        patch("backend.pashub_fetcher.pashub_service.os.remove"),
+    ):
+        mock_db.return_value.__enter__.return_value = fake_session
+        service.run(make_request(uprn="12345"))
+
+    fake_session.add_all.assert_called_once()
+    added: list[Any] = fake_session.add_all.call_args[0][0]
+    assert added[0].file_source == FileSourceEnum.COORDINATION_HUB.value
+
+
 def test_run_warns_and_continues_when_site_notes_parsing_fails() -> None:
    mock_client = MagicMock(spec=PashubClient)
    mock_client.get_uprn_by_job_id.return_value = None
--- a/backend/pashub_fetcher/tests/test_pashub_to_ara_trigger_request.py
+++ b/backend/pashub_fetcher/tests/test_pashub_to_ara_trigger_request.py
@ -0,0 +1,51 @@
+import pytest
+
+from backend.pashub_fetcher.pashub_to_ara_trigger_request import (
+    PashubToAraTriggerRequest,
+)
+
+
+def make_request(pashub_link: str) -> PashubToAraTriggerRequest:
+    return PashubToAraTriggerRequest(pashub_link=pashub_link)
+
+
+def test_pashub_job_id_extracts_id_from_details_link() -> None:
+    # Arrange
+    request = make_request("https://pashub.net/jobs/job-id-123/details")
+
+    # Act
+    result = request.pashub_job_id
+
+    # Assert
+    assert result == "job-id-123"
+
+
+def test_pashub_job_id_raises_for_invalid_link() -> None:
+    # Arrange
+    request = make_request("https://pashub.net/rcs-dashboard")
+
+    # Act / Assert
+    with pytest.raises(ValueError):
+        request.pashub_job_id
+
+
+def test_pashub_job_id_extracts_id_from_bare_job_link() -> None:
+    # Arrange
+    request = make_request("https://pashub.net/jobs/job-id-123")
+
+    # Act
+    result = request.pashub_job_id
+
+    # Assert
+    assert result == "job-id-123"
+
+
+def test_pashub_job_id_extracts_id_from_evidence_view_link() -> None:
+    # Arrange
+    request = make_request("https://pashub.net/jobs/job-id-123/evidence/view")
+
+    # Act
+    result = request.pashub_job_id
+
+    # Assert
+    assert result == "job-id-123"
--- a/backend/pashub_fetcher/trigger_pashub_sqs_from_file.py
+++ b/backend/pashub_fetcher/trigger_pashub_sqs_from_file.py
@ -0,0 +1,137 @@
+import json
+import logging
+import os
+from typing import Any, Optional, cast
+
+import boto3
+from openpyxl import load_workbook
+
+from backend.app.config import get_settings
+from backend.pashub_fetcher.pashub_to_ara_trigger_request import (
+    PashubToAraTriggerRequest,
+)
+
+logging.basicConfig(level=logging.INFO, format="%(message)s")
+logger: logging.Logger = logging.getLogger(__name__)
+
+DRY_RUN: bool = False
+
+DEAL_ID_FILTER: frozenset[str] = frozenset(
+    {
+        "379452094688",
+        "379466504437",
+        "379660170452",
+        "380016925932",
+        "379848065216",
+        "379466504434",
+        "379452094690",
+        "379965924567",
+        "380016925923",
+        "379792072898",
+        "379654754502",
+        "379560262861",
+        "379969670369",
+        "379248717001",
+        "379971468493",
+        "379999888607",
+        "379606372580",
+        "379969603797",
+        "379967743213",
+        "379263155434",
+        "379855267025",
+        "379889899719",
+        "379071064307",
+        "379867925741",
+    }
+)
+
+EXCEL_PATH: str = os.path.join(
+    os.path.dirname(__file__),
+    "united-infrastructure-exports-all-deals-2026-05-14.xlsx",
+)
+
+
+def _build_requests(excel_path: str) -> list[PashubToAraTriggerRequest]:
+    wb = load_workbook(excel_path, data_only=True)
+    ws = wb.worksheets[0]
+
+    headers: dict[str, int] = {}
+    for col in range(1, ws.max_column + 1):
+        header_val = ws.cell(row=1, column=col).value
+        if header_val is not None:
+            headers[str(header_val).strip()] = col
+
+    pashub_col: int = headers["PasHub link"]
+    record_id_col: int = headers["Record ID"]
+    deal_name_col: int = headers["Deal Name"]
+    deal_stage_col: int = headers["Deal Stage"]
+
+    requests: list[PashubToAraTriggerRequest] = []
+
+    for row in range(2, ws.max_row + 1):
+        pashub_link_raw = ws.cell(row=row, column=pashub_col).value
+        if not pashub_link_raw:
+            continue
+
+        pashub_link: str = str(pashub_link_raw).strip()
+
+        record_id_raw = ws.cell(row=row, column=record_id_col).value
+        deal_name_raw = ws.cell(row=row, column=deal_name_col).value
+        deal_stage_raw = ws.cell(row=row, column=deal_stage_col).value
+
+        hubspot_deal_id: Optional[str] = (
+            str(record_id_raw) if record_id_raw is not None else None
+        )
+        address: Optional[str] = (
+            str(deal_name_raw).strip() if deal_name_raw is not None else None
+        )
+        deal_stage: Optional[str] = (
+            str(deal_stage_raw).strip() if deal_stage_raw is not None else None
+        )
+
+        requests.append(
+            PashubToAraTriggerRequest(
+                pashub_link=pashub_link,
+                hubspot_deal_id=hubspot_deal_id,
+                address=address,
+                deal_stage=deal_stage,
+            )
+        )
+
+    return requests
+
+
+def main() -> None:
+    trigger_requests: list[PashubToAraTriggerRequest] = _build_requests(EXCEL_PATH)
+
+    if DEAL_ID_FILTER:
+        trigger_requests = [
+            r for r in trigger_requests if r.hubspot_deal_id in DEAL_ID_FILTER
+        ]
+
+    sqs: Any = cast(Any, boto3.client("sqs"))  # type: ignore[reportUnknownMemberType]
+    queue_url: str = get_settings().PASHUB_TO_ARA_SQS_URL
+
+    count: int = 0
+    for request in trigger_requests:
+        action: str = "DRY RUN" if DRY_RUN else "SENDING"
+        logger.info(
+            f"[{action}] deal_id={request.hubspot_deal_id} pashub_link={request.pashub_link}"
+        )
+
+        if not DRY_RUN:
+            response: dict[str, Any] = sqs.send_message(
+                QueueUrl=queue_url,
+                MessageBody=json.dumps(request.model_dump()),
+            )
+            message_id: str = response["MessageId"]
+            logger.info(f"  MessageId: {message_id}")
+
+        count += 1
+
+    label: str = "would send" if DRY_RUN else "sent"
+    print(f"{count} messages {label}")
+
+
+if __name__ == "__main__":
+    main()
--- a/datatypes/epc/domain/epc.py
+++ b/datatypes/epc/domain/epc.py
@ -9,3 +9,25 @@ class Epc(Enum):
    E = "E"
    F = "F"
    G = "G"
+
+    @classmethod
+    def from_sap_score(cls, score: int) -> "Epc":
+        """Map a SAP10 energy rating (1-100) to its EPC band.
+
+        Thresholds are the standard SAP10 boundaries: A 92+, B 81-91, C 69-80,
+        D 55-68, E 39-54, F 21-38, G 1-20. Scores below 21 (including 0 and
+        negatives, which should not occur in practice) fall through to G.
+        """
+        if score >= 92:
+            return cls.A
+        if score >= 81:
+            return cls.B
+        if score >= 69:
+            return cls.C
+        if score >= 55:
+            return cls.D
+        if score >= 39:
+            return cls.E
+        if score >= 21:
+            return cls.F
+        return cls.G
--- a/datatypes/epc/domain/epc_codes.csv
+++ b/datatypes/epc/domain/epc_codes.csv
--- a/datatypes/epc/domain/epc_property_data.py
+++ b/datatypes/epc/domain/epc_property_data.py
@ -1,10 +1,67 @@
-from dataclasses import dataclass
+import re
+from dataclasses import dataclass, field
 from datetime import date
-from typing import List, Optional, Union
+from enum import Enum
+from typing import Final, List, Optional, Union

 from datatypes.epc.domain.epc import Epc


+_API_EXTENSION = re.compile(r"^Extension\s+(\d+)$")
+
+
+class BuildingPartIdentifier(Enum):
+    """Canonical identifier for a SAP building part.
+
+    Replaces bare-string matching on `SapBuildingPart.identifier`. The
+    enum *values* match the site-notes / database shape ("main",
+    "extension_1" .. "extension_4"); boundary mappers (gov-EPC API,
+    site notes) construct these via the `from_api_string` / `extension`
+    classmethods so consumers can dispatch with `is` instead of fragile
+    string equality.
+
+    RdSAP10 §1.2 caps extensions at 4 per dwelling, so EXTENSION_1..4
+    are enumerated explicitly; anything else falls to OTHER so callers
+    can still iterate safely.
+
+    P6.1 — first slice of the strict-typing P6 work documented in
+    HANDOVER_SYSTEMATIC_REVIEW §2.5.
+    """
+
+    MAIN = "main"
+    EXTENSION_1 = "extension_1"
+    EXTENSION_2 = "extension_2"
+    EXTENSION_3 = "extension_3"
+    EXTENSION_4 = "extension_4"
+    OTHER = "other"
+
+    @classmethod
+    def from_api_string(
+        cls, api_identifier: Optional[str]
+    ) -> "BuildingPartIdentifier":
+        """Map a gov-EPC API `BuildingPart.identifier` to its canonical
+        member. "Main Dwelling" → MAIN; "Extension N" → EXTENSION_N
+        (for N in 1..4). `None` (permitted by the 21_0_1 schema) and
+        anything unrecognised fall to OTHER.
+        """
+        if api_identifier == "Main Dwelling":
+            return cls.MAIN
+        if api_identifier is not None:
+            match = _API_EXTENSION.match(api_identifier)
+            if match is not None:
+                return cls.extension(int(match.group(1)))
+        return cls.OTHER
+
+    @classmethod
+    def extension(cls, n: int) -> "BuildingPartIdentifier":
+        """Canonical identifier for the Nth extension. RdSAP10 §1.2
+        caps at 4; numbers outside 1..4 fall to OTHER."""
+        try:
+            return cls(f"extension_{n}")
+        except ValueError:
+            return cls.OTHER
+
+
@dataclass
 class EnergyElement:
    description: str
@ -12,6 +69,18 @@ class EnergyElement:
    environmental_efficiency_rating: int


+@dataclass
+class Addendum:
+    """Optional cert-level addendum carrying construction-detail flags.
+
+    Present on ~43% of real RdSAP certs (stone-walls / system-build / a list of
+    numeric improvement codes the assessor wanted to call out).
+    """
+    stone_walls: Optional[bool] = None
+    system_build: Optional[bool] = None
+    addendum_numbers: Optional[List[int]] = None
+
+
@dataclass
 class InstantaneousWwhrs:
    wwhrs_index_number1: Optional[int] = None
@ -69,6 +138,21 @@ class SapHeating:
    secondary_fuel_type: Optional[int] = None
    secondary_heating_type: Optional[Union[int, str]] = None  # int from API; str from site notes
    cylinder_insulation_thickness_mm: Optional[int] = None
+    # SAP10 hot-water demand inputs from sap_heating.
+    number_baths: Optional[int] = None
+    number_baths_wwhrs: Optional[int] = None
+    # Per SAP10.2 Appendix J (p.81) step 1a: Noutlets includes electric
+    # showers in the count for Nshower; step 2a routes Nbath through the
+    # "shower also present" branch (0.13N + 0.19) when ANY shower is
+    # lodged — including electric. Modelled separately from mixer outlets
+    # because electric showers don't draw warm water from the system.
+    electric_shower_count: Optional[int] = None
+    # PCDF mixer-shower lodgement (count of outlets that DO draw warm
+    # water from the main HW system). When set, overrides the heuristic
+    # default of 1 vented outlet @ 7 L/min used by `_mixer_shower_flow_
+    # rates_from_cert`. Most certs lodge only count; the standard
+    # vented-system flow rate from Table J4 (7 L/min) is the default.
+    mixer_shower_count: Optional[int] = None


@dataclass
@ -84,6 +168,11 @@ class SapVentilation:
    passive_vents_count: Optional[int] = None
    flueless_gas_fires_count: Optional[int] = None
    ventilation_in_pcdf_database: Optional[bool] = None
+    # SAP10.2 §2 cert lodgements not previously surfaced on this type.
+    sheltered_sides: Optional[int] = None              # (19) — cert assessor lodge, 0..4
+    has_suspended_timber_floor: Optional[bool] = None  # (12) gate
+    suspended_timber_floor_sealed: Optional[bool] = None
+    has_draught_lobby: Optional[bool] = None           # (13) gate (overrides .draught_lobby for §2 cascade)


@dataclass
@ -93,6 +182,29 @@ class WindowTransmissionDetails:
    solar_transmittance: float


+@dataclass
+class SapRoofWindow:
+    """RdSAP10 worksheet roof window — feeds §3 (27a) heat transmission
+    and §6 (82) solar gain. Heat-transmission contribution is A × U_eff
+    where U_eff applies the SAP10.2 §3.2 curtain resistance (R=0.04
+    m²K/W) to `u_value_raw`. Roof windows draw their U-value from RdSAP
+    10 Table 24 (p.50/113) "Roof window" column (e.g. double-glazed roof
+    window U=3.4 vs 2.8 for standard).
+
+    Solar fields (orientation, pitch, g_perpendicular, frame_factor)
+    feed `solar_gains_from_cert` — defaults match the modal RdSAP roof
+    window (45° pitch, manufacturer-default DG g⊥=0.76, PVC FF=0.70,
+    N-facing) and are intended to be overridden per-fixture.
+    """
+
+    area_m2: float
+    u_value_raw: float  # RdSAP10 Table 24 roof-window column, pre-curtain.
+    orientation: int = 1  # SAP10.2 code: 1=N, 2=NE, 3=E, 4=SE, 5=S, 6=SW, 7=W, 8=NW.
+    pitch_deg: float = 45.0
+    g_perpendicular: float = 0.76
+    frame_factor: float = 0.70
+
+
@dataclass
 class SapWindow:
    frame_material: Optional[str]
@ -137,6 +249,19 @@ class PhotovoltaicSupply:
    none_or_no_details: PhotovoltaicSupplyNoneOrNoDetails


+@dataclass
+class PhotovoltaicArray:
+    """One measured PV array: peak power (kW), pitch, orientation (SAP octant
+    1-8), and overshading code. Populated on EpcPropertyData when the EPC has
+    measured PV configuration; `photovoltaic_supply` carries the fallback
+    `percent_roof_area` estimate when the surveyor could not confirm details.
+    """
+    peak_power: float
+    pitch: int
+    orientation: int
+    overshading: int
+
+
@dataclass
 class SapEnergySource:
    mains_gas: bool
@ -150,6 +275,7 @@ class SapEnergySource:

    pv_connection: Optional[Union[int, str]] = None  # int from API; str from site notes
    photovoltaic_supply: Optional[PhotovoltaicSupply] = None
+    photovoltaic_arrays: Optional[List[PhotovoltaicArray]] = None
    wind_turbine_details: Optional[WindTurbineDetails] = None
    pv_batteries: Optional[PvBatteries] = None

@ -164,12 +290,75 @@ class SapFloorDimension:
    floor: Optional[int] = None
    floor_insulation: Optional[int] = None
    floor_construction: Optional[int] = None
+    # RdSAP10 §5.13 Table 20: True when this floor is open to outside air
+    # (exposed) or sits over enclosed unheated space (semi-exposed) — e.g.
+    # the lowest floor of an extension that hangs off the main from the
+    # first storey upward. False means a ground floor (on soil), the
+    # default path through the BS EN ISO 13370 / Table 19 cascade.
+    is_exposed_floor: bool = False
+
+
+@dataclass(frozen=True)
+class SapRoomInRoofSurface:
+    """One surface lodged via the RdSAP10 §3.10 Detailed measurement path.
+
+    Each RR can carry up to two of each surface kind (flat ceiling,
+    sloping ceiling, stud wall, gable wall) per spec Figure 4. The U-value
+    is resolved from Table 17 when `insulation_thickness_mm` is set, or
+    Table 18 col (4) age-band default otherwise.
+
+    RdSAP10 Table 4 (p.22) "U-values of gable-end and other walls in RR"
+    distinguishes four gable types. We model the two we've seen lodged in
+    the U985 corpus:
+      - "gable_wall" — party (U = 0.25 W/m²K per Table 4 row 2)
+      - "gable_wall_external" — exposed gable (U = "as common wall" per
+        Table 4 row 1; when assessor lodges a measured U on the surface,
+        `u_value` overrides the cascade)
+    The other two Table 4 variants ("sheltered" R=0.5 of external, and
+    "connected to heated space" U=0) are not yet seen in the corpus.
+    """
+
+    kind: str  # "slope" | "flat_ceiling" | "stud_wall" | "gable_wall" | "gable_wall_external"
+    area_m2: float
+    insulation_thickness_mm: Optional[int] = None
+    insulation_type: Optional[str] = None  # "mineral_wool" / "eps" / "pur" / "pir"
+    # Assessor-lodged U override (W/m²K). Used by `gable_wall_external`
+    # when the cert measures U directly (cf. 000487 Gable Wall 2 at
+    # U=0.86 on line 29). When None, the cascade falls back to the main-
+    # wall U via Table 4 "as common wall".
+    u_value: Optional[float] = None


@dataclass
 class SapRoomInRoof:
    floor_area: Union[int, float]
    construction_age_band: str
+    # RdSAP10 §3.9.2 Simplified Type 2 — RR built into a roof space that
+    # has continuous common walls outside the RR boundaries. The space is
+    # treated as Room-in-Roof when the height of accessible common walls
+    # is < 1.8 m (otherwise it counts as a separate storey).
+    common_wall_length_m: Optional[float] = None
+    common_wall_height_m: Optional[float] = None
+    # Optional gable lengths/heights for the Type 2 quadratic correction:
+    #   A_gable = L × (0.25 + H) − Σ ((H − H_common_wall_i)² / 2)
+    # If absent, the gable contribution is 0 (Simplified Type 1).
+    gable_1_length_m: Optional[float] = None
+    gable_1_height_m: Optional[float] = None
+    gable_2_length_m: Optional[float] = None
+    gable_2_height_m: Optional[float] = None
+    # RdSAP10 §3.10 Detailed measurement path. When `detailed_surfaces` is
+    # set, each entry contributes A × U directly and the Simplified A_RR
+    # formula is bypassed. The storey-below roof area still deducts
+    # `floor_area` per §3.9.
+    detailed_surfaces: Optional[List[SapRoomInRoofSurface]] = None
+
+
+# RdSAP10 wall_construction integer encoding. The gov-EPC API doesn't publish
+# the mapping; established empirically from a 50k 2026-bulk sweep — code 6
+# co-occurs with `walls[].description = "Basement wall"` in 88% of cases at
+# a 0.18% false-positive rate, so we treat it as the canonical basement-wall
+# signal.
+BASEMENT_WALL_CONSTRUCTION_CODE: Final[int] = 6


@dataclass
@ -180,12 +369,26 @@ class SapAlternativeWall:
    wall_insulation_type: int
    wall_thickness_measured: str
    wall_insulation_thickness: Optional[str] = None
+    # Assessor-lodged U-value (W/m²K) — when set, overrides the
+    # Table 6 cascade for this alt sub-area. Lodged directly on the
+    # cert for some constructions (e.g. 000487 Ext1 TimberWallOneLayer
+    # at U=1.90, where the 9-mm-thick single-layer timber wall doesn't
+    # fit the Table 6 buckets cleanly).
+    u_value: Optional[float] = None
+
+    @property
+    def is_basement_wall(self) -> bool:
+        """True iff this alt sub-area is the dwelling's basement wall —
+        identified by RdSAP10 wall_construction code = 6 (see module
+        constant `BASEMENT_WALL_CONSTRUCTION_CODE`). RdSAP §5.17 / Table 23
+        applies a special U-value lookup to basement walls."""
+        return self.wall_construction == BASEMENT_WALL_CONSTRUCTION_CODE


@dataclass
 class SapBuildingPart:
    # General
-    identifier: str  # e.g. "main", "roof"
+    identifier: BuildingPartIdentifier
    construction_age_band: str

    # Wall
@ -196,12 +399,12 @@ class SapBuildingPart:
        int, str
    ]  # int from API, str from site notes TODO: make enum/mapping?
    wall_thickness_measured: bool
-    party_wall_construction: Union[int, str]  # TODO: make enum/mapping?
+    party_wall_construction: Optional[Union[int, str]] = (
+        None  # TODO: make enum/mapping?
+    )

    # Floor
-    sap_floor_dimensions: List[
-        SapFloorDimension
-    ]  # Not included in site notes; should this be optional?
+    sap_floor_dimensions: List[SapFloorDimension] = field(default_factory=list)

    # Optional
    building_part_number: Optional[int] = (
@ -224,6 +427,7 @@ class SapBuildingPart:
    floor_u_value_known: Optional[bool] = None

    roof_construction: Optional[int] = None
+    roof_construction_type: Optional[str] = None  # str from site notes e.g. "PS Pitched, sloping ceiling"
    roof_insulation_location: Optional[Union[int, str]] = (
        None  # TODO: make enum/mapping?
    )
@ -232,6 +436,29 @@ class SapBuildingPart:
    )
    sap_room_in_roof: Optional[SapRoomInRoof] = None

+    @property
+    def main_wall_is_basement(self) -> bool:
+        """True iff this part's primary wall (not an alt sub-area) is the
+        basement wall — happens when the whole part sits below grade.
+        Empirically 54 of 67k parts in the 2026 sweep; rare but real."""
+        return self.wall_construction == BASEMENT_WALL_CONSTRUCTION_CODE
+
+    @property
+    def has_basement(self) -> bool:
+        """True iff this part carries a basement wall — either as its
+        main wall (`main_wall_is_basement`) or as an alt sub-area
+        (`SapAlternativeWall.is_basement_wall`). When true, RdSAP §5.17 /
+        Table 23 governs both the basement-wall U-value AND the entire
+        ground floor's U-value for this part (per user-confirmed
+        convention: basement-wall presence ⇒ whole floor=0 is basement
+        floor)."""
+        if self.main_wall_is_basement:
+            return True
+        return any(
+            alt is not None and alt.is_basement_wall
+            for alt in (self.sap_alternative_wall_1, self.sap_alternative_wall_2)
+        )
+

@dataclass
 class WindowsTransmissionDetails:
@ -250,6 +477,22 @@ class SapFlatDetails:
    unheated_corridor_length_m: Optional[int] = None


+@dataclass
+class RenewableHeatIncentive:
+    """The RHI block on the EPC — annual baseline kWh per end-use, plus SAP-estimated
+    impact of common insulation measures.
+
+    Mapped 1:1 from the gov EPC API's `renewable_heat_incentive` object. Source of
+    baseline `space_heating_kwh` and `hot_water_kwh` for SAP10 properties (used as ML
+    training targets per ADR-0007).
+    """
+    space_heating_kwh: float
+    water_heating_kwh: float
+    impact_of_loft_insulation_kwh: Optional[float] = None
+    impact_of_cavity_insulation_kwh: Optional[float] = None
+    impact_of_solid_wall_insulation_kwh: Optional[float] = None
+
+
@dataclass
 class EpcPropertyData:
    # General
@ -327,6 +570,10 @@ class EpcPropertyData:
    main_heating_controls: Optional[EnergyElement] = (
        None  # site notes has heating_and_hot_water.main_heating.controls: str - doesn't map to EnergyElement
    )
+    # Air-tightness EnergyElement (description + ratings) — kept as input even though
+    # ratings are derived, because the `.description` text categorizes the building's
+    # permeability class when no pressure test was carried out.
+    air_tightness: Optional[EnergyElement] = None
    current_energy_efficiency_band: Optional[Epc] = None  # not available in site notes?
    environmental_impact_current: Optional[int] = None
    heating_cost_current: Optional[float] = None
@ -352,17 +599,28 @@ class EpcPropertyData:
    potential_energy_efficiency_band: Optional[Epc] = (
        None  # not available in site notes
    )
-    # renewable_heat_incentive: Optional[Any] = None # Not sure what this is, skip for now
+    renewable_heat_incentive: Optional[RenewableHeatIncentive] = None
    draughtproofed_door_count: Optional[int] = None
    mechanical_vent_duct_type: Optional[int] = None
    windows_transmission_details: Optional[WindowsTransmissionDetails] = None
-    multiple_glazed_propertion: Optional[int] = None
+    multiple_glazed_proportion: Optional[int] = None
+    extract_fans_count: Optional[int] = None
+    # Optional cert-level addendum + LZC source codes.
+    addendum: Optional[Addendum] = None
+    lzc_energy_sources: Optional[List[int]] = None
+    # RdSAP10 §3 line (27a) — roof windows cut into a storey-below roof.
+    # Distinct from `sap_windows` (vertical, line (27)) because Table 24
+    # has a separate roof-window U-value column. None when the dwelling
+    # has no roof windows; for cert-cascade fixtures the bootstrap path
+    # lodges per-window area + raw U.
+    sap_roof_windows: Optional[List[SapRoofWindow]] = None
    calculation_software_version: Optional[str] = None  # Do we care about this?
    mechanical_vent_duct_placement: Optional[int] = None
    mechanical_vent_duct_insulation: Optional[int] = None
    pressure_test_certificate_number: Optional[int] = None
    mechanical_ventilation_index_number: Optional[int] = None
    mechanical_vent_measured_installation: Optional[str] = None
+    mechanical_vent_duct_insulation_level: Optional[int] = None
    co2_emissions_current_per_floor_area: Optional[int] = None
    low_energy_fixed_lighting_bulbs_count: Optional[int] = None
    sap_flat_details: Optional[SapFlatDetails] = None
--- a/datatypes/epc/domain/mapper.py
+++ b/datatypes/epc/domain/mapper.py
--- a/datatypes/epc/domain/tests/test_building_part_identifier.py
+++ b/datatypes/epc/domain/tests/test_building_part_identifier.py
@ -0,0 +1,98 @@
+"""Tests for `BuildingPartIdentifier` — the strictly-typed identifier
+that replaces bare-string matching on `SapBuildingPart.identifier`.
+
+Two boundary factories convert raw inputs to canonical members:
+- `BuildingPartIdentifier.from_api_string` (gov-EPC API)
+- `BuildingPartIdentifier.extension(n)` (site-notes / construction id)
+
+P6.1 starts P6 (strict-type EpcPropertyData) from the documented pain
+point in packages/domain/src/domain/sap/worksheet/dimensions.py:74-82.
+"""
+from __future__ import annotations
+
+import pytest
+
+from datatypes.epc.domain.epc_property_data import BuildingPartIdentifier
+
+
+class TestFromApiString:
+    """The gov-EPC API returns "Main Dwelling" and "Extension N"; the
+    21_0_1 schema also permits `None`. All map to canonical members."""
+
+    def test_main_dwelling_becomes_main(self) -> None:
+        # Arrange / Act
+        identifier = BuildingPartIdentifier.from_api_string("Main Dwelling")
+
+        # Assert
+        assert identifier is BuildingPartIdentifier.MAIN
+
+    @pytest.mark.parametrize(
+        "api_string, expected",
+        [
+            ("Extension 1", BuildingPartIdentifier.EXTENSION_1),
+            ("Extension 2", BuildingPartIdentifier.EXTENSION_2),
+            ("Extension 3", BuildingPartIdentifier.EXTENSION_3),
+            ("Extension 4", BuildingPartIdentifier.EXTENSION_4),
+        ],
+    )
+    def test_extension_n_becomes_extension_n(
+        self, api_string: str, expected: BuildingPartIdentifier
+    ) -> None:
+        # Arrange / Act
+        identifier = BuildingPartIdentifier.from_api_string(api_string)
+
+        # Assert
+        assert identifier is expected
+
+    def test_none_becomes_other(self) -> None:
+        # Arrange — the 21_0_1 schema permits `identifier: Optional[str]`.
+        # Act
+        identifier = BuildingPartIdentifier.from_api_string(None)
+
+        # Assert
+        assert identifier is BuildingPartIdentifier.OTHER
+
+    @pytest.mark.parametrize(
+        "api_string", ["", "roof", "garage", "Extension", "Main", "Extension 5"]
+    )
+    def test_unrecognised_becomes_other(self, api_string: str) -> None:
+        # Arrange — "Extension 5" is intentionally OTHER per RdSAP10 §1.2
+        # (max 4 extensions); bare "Extension" with no digit likewise.
+        # Act
+        identifier = BuildingPartIdentifier.from_api_string(api_string)
+
+        # Assert
+        assert identifier is BuildingPartIdentifier.OTHER
+
+
+class TestExtensionFactory:
+    """`extension(n)` is the site-notes-side constructor — surveyors
+    record extensions by integer id; this maps id→canonical member."""
+
+    @pytest.mark.parametrize(
+        "n, expected",
+        [
+            (1, BuildingPartIdentifier.EXTENSION_1),
+            (2, BuildingPartIdentifier.EXTENSION_2),
+            (3, BuildingPartIdentifier.EXTENSION_3),
+            (4, BuildingPartIdentifier.EXTENSION_4),
+        ],
+    )
+    def test_valid_extension_number_returns_member(
+        self, n: int, expected: BuildingPartIdentifier
+    ) -> None:
+        # Arrange / Act
+        identifier = BuildingPartIdentifier.extension(n)
+
+        # Assert
+        assert identifier is expected
+
+    @pytest.mark.parametrize("n", [0, 5, 99, -1])
+    def test_out_of_range_falls_to_other(self, n: int) -> None:
+        # Arrange — RdSAP10 §1.2 caps at 4; out-of-range numbers should
+        # not crash the mapper, they should classify as OTHER.
+        # Act
+        identifier = BuildingPartIdentifier.extension(n)
+
+        # Assert
+        assert identifier is BuildingPartIdentifier.OTHER
--- a/datatypes/epc/domain/tests/test_from_rdsap_schema.py
+++ b/datatypes/epc/domain/tests/test_from_rdsap_schema.py
@ -253,6 +253,60 @@ class TestFromRdSapSchema21_0_0:
    def test_property_type(self, result: EpcPropertyData) -> None:
        assert result.property_type == "0"

+    def test_renewable_heat_incentive(self, result: EpcPropertyData) -> None:
+        # Arrange — schema-21.0.0 sample JSON loaded via fixture
+
+        # Act
+        rhi = result.renewable_heat_incentive
+
+        # Assert
+        assert rhi is not None
+        assert rhi.space_heating_kwh == 13120.0
+        assert rhi.water_heating_kwh == 2285.0
+        assert rhi.impact_of_loft_insulation_kwh == -2114.0
+        assert rhi.impact_of_cavity_insulation_kwh == -122.0
+        assert rhi.impact_of_solid_wall_insulation_kwh == -3560.0
+
+    def test_photovoltaic_arrays_none_when_unmeasured(
+        self, result: EpcPropertyData
+    ) -> None:
+        # Arrange — fixture has the unmeasured-PV shape
+        # (photovoltaic_supply.none_or_no_details.percent_roof_area = 0)
+
+        # Act
+        es = result.sap_energy_source
+
+        # Assert
+        assert es.photovoltaic_arrays is None
+        assert es.photovoltaic_supply is not None
+
+    def test_photovoltaic_arrays_populated_when_measured(self) -> None:
+        # Arrange — load the schema-21.0.0 fixture and override
+        # sap_energy_source.photovoltaic_supply with the modern list-of-arrays
+        # shape carried by SAP10 EPCs with measured PV.
+        data = load("21_0_0.json")
+        data["sap_energy_source"]["photovoltaic_supply"] = [
+            [{"pitch": 2, "peak_power": 2.04, "orientation": 4, "overshading": 1}],
+            [{"pitch": 2, "peak_power": 1.86, "orientation": 8, "overshading": 2}],
+        ]
+        schema = from_dict(RdSapSchema21_0_0, data)
+
+        # Act
+        result = EpcPropertyDataMapper.from_rdsap_schema_21_0_0(schema)
+
+        # Assert
+        arrays = result.sap_energy_source.photovoltaic_arrays
+        assert arrays is not None
+        assert len(arrays) == 2
+        assert arrays[0].peak_power == 2.04
+        assert arrays[0].pitch == 2
+        assert arrays[0].orientation == 4
+        assert arrays[0].overshading == 1
+        assert arrays[1].peak_power == 1.86
+        assert arrays[1].orientation == 8
+        # photovoltaic_supply is None when the measured shape is present
+        assert result.sap_energy_source.photovoltaic_supply is None
+

 # ---------------------------------------------------------------------------
 # Schema 21.0.1  (most comprehensive — full field coverage)
@ -532,3 +586,107 @@ class TestFromRdSapSchema21_0_1:

    def test_party_wall_length(self, result: EpcPropertyData) -> None:
        assert result.sap_building_parts[0].sap_floor_dimensions[0].party_wall_length_m == 7.9
+
+    # --- room-in-roof (sap_room_in_roof.room_in_roof_type_1) ---
+
+    def test_flat_roof_insulation_thickness_flows_through_on_building_part(
+        self, result: EpcPropertyData
+    ) -> None:
+        # Arrange — schema-21.0.1 lodges flat_roof_insulation_thickness
+        # on SapBuildingPart as a categorical code (e.g. "AB" for "As
+        # Built"). EpcPropertyData.SapBuildingPart declares the field;
+        # without mapper passthrough the flat-roof U-value cascade has
+        # no insulation signal to use.
+
+        # Act
+        v = result.sap_building_parts[0].flat_roof_insulation_thickness
+
+        # Assert
+        assert v == "AB"
+
+    def test_sap_room_in_roof_gable_lengths_extracted_from_room_in_roof_type_1(
+        self, result: EpcPropertyData
+    ) -> None:
+        # Arrange — schema-21.0.1 lodges Simplified Type 1 gable lengths
+        # under sap_room_in_roof.room_in_roof_type_1. The cascade requires
+        # them on EpcPropertyData.SapRoomInRoof.gable_1_length_m /
+        # gable_2_length_m for the §3.9.2 area cascade. Without this the
+        # length data is silently dropped at deserialization.
+
+        # Act
+        rir = result.sap_building_parts[0].sap_room_in_roof
+
+        # Assert
+        assert rir is not None
+        assert rir.gable_1_length_m == 6.4
+        assert rir.gable_2_length_m == 6.4
+
+    # --- ventilation (sap_ventilation) ---
+
+    def test_sap_ventilation_extract_fans_count_flows_through_to_calculator_input(
+        self, result: EpcPropertyData
+    ) -> None:
+        # Arrange — fixture lodges `extract_fans_count: 2` at the cert root;
+        # cert_to_inputs reads it via epc.sap_ventilation.extract_fans_count,
+        # so the mapper must surface it on the SapVentilation slice.
+
+        # Act
+        sv = result.sap_ventilation
+
+        # Assert
+        assert sv is not None
+        assert sv.extract_fans_count == 2
+
+    def test_percent_draughtproofed_flows_through_to_calculator_input(
+        self, result: EpcPropertyData
+    ) -> None:
+        # Arrange — fixture lodges `percent_draughtproofed: 100` at the
+        # cert root. cert_to_inputs reads it via epc.percent_draughtproofed
+        # for the §2 ventilation cascade (window draught loss). Without
+        # this the cascade defaults to 0 — treats every cert as fully
+        # draughty, over-counting infiltration.
+
+        # Act
+        v = result.percent_draughtproofed
+
+        # Assert
+        assert v == 100
+
+    def test_ventilation_completeness_all_seven_vent_fields_flow_through(
+        self, result: EpcPropertyData
+    ) -> None:
+        # Arrange — schema-21.0.1 carries seven vent / draught fields the
+        # cert→inputs cascade reads for the §2 infiltration calculation.
+        # Without these the calc treats the dwelling as flue-free / vent-
+        # free / no draught lobby, under-counting infiltration ACH.
+        # blocked_chimneys is top-level; the other 6 live on SapVentilation.
+
+        # Act
+        sv = result.sap_ventilation
+
+        # Assert
+        assert result.blocked_chimneys_count == 1
+        assert sv is not None
+        assert sv.open_flues_count == 1
+        assert sv.closed_flues_count == 1
+        assert sv.boiler_flues_count == 1
+        assert sv.other_flues_count == 1
+        assert sv.passive_vents_count == 2
+        assert sv.has_draught_lobby is True
+
+    # --- renewable heat incentive (RHI) ---
+
+    def test_renewable_heat_incentive(self, result: EpcPropertyData) -> None:
+        # Arrange — schema-21.0.1 sample JSON loaded via fixture
+
+        # Act
+        rhi = result.renewable_heat_incentive
+
+        # Assert
+        assert rhi is not None
+        assert rhi.space_heating_kwh == 13120.0
+        assert rhi.water_heating_kwh == 2285.0
+        assert rhi.impact_of_loft_insulation_kwh == -2114.0
+        assert rhi.impact_of_cavity_insulation_kwh == -122.0
+        assert rhi.impact_of_solid_wall_insulation_kwh == -3560.0
+
--- a/datatypes/epc/domain/tests/test_from_site_notes.py
+++ b/datatypes/epc/domain/tests/test_from_site_notes.py
@ -6,6 +6,7 @@ from typing import Any, Dict
 import pytest

 from datatypes.epc.domain.epc_property_data import (
+    BuildingPartIdentifier,
    EpcPropertyData,
    InstantaneousWwhrs,
    MainHeatingDetail,
@ -211,7 +212,7 @@ class TestFromSiteNotesExample1:
        assert len(result.sap_building_parts) == 1

    def test_building_part_identifier(self, result: EpcPropertyData) -> None:
-        assert result.sap_building_parts[0].identifier == "main"
+        assert result.sap_building_parts[0].identifier is BuildingPartIdentifier.MAIN

    def test_construction_age_band(self, result: EpcPropertyData) -> None:
        # main_building.age_range: "I: 1996 - 2002" → letter "I"
@ -464,7 +465,7 @@ class TestFromSiteNotesExample1:
            # Building parts
            sap_building_parts=[
                SapBuildingPart(
-                    identifier="main",
+                    identifier=BuildingPartIdentifier.MAIN,
                    construction_age_band="I",
                    wall_construction="Cavity",
                    wall_insulation_type="As built",
--- a/datatypes/epc/schema/helpers.py
+++ b/datatypes/epc/schema/helpers.py
@ -59,6 +59,12 @@ def _coerce(value: Any, hint: Any) -> Any:
        for arg in non_none_args:
            if dataclasses.is_dataclass(arg) and isinstance(value, dict):
                return _from_dict_impl(arg, value)
+        # Then try list types — covers Union[Dataclass, list[...]] polymorphism
+        # where a single JSON key can carry either a wrapper dict or a list of items.
+        if isinstance(value, list):
+            for arg in non_none_args:
+                if typing.get_origin(arg) is list:
+                    return _coerce(value, arg)
        # All remaining args are primitives — return value as-is
        return value

--- a/datatypes/epc/schema/rdsap_schema_21_0_0.py
+++ b/datatypes/epc/schema/rdsap_schema_21_0_0.py
@ -61,10 +61,10 @@ class SapHeating:
    cylinder_size: int
    water_heating_code: int
    water_heating_fuel: int
-    instantaneous_wwhrs: InstantaneousWwhrs
    main_heating_details: List[MainHeatingDetail]
    immersion_heating_type: Union[int, str]
    has_fixed_air_conditioning: str
+    instantaneous_wwhrs: Optional[InstantaneousWwhrs] = None
    shower_outlets: Optional[ShowerOutlets] = None
    cylinder_insulation_type: Optional[int] = None
    cylinder_thermostat: Optional[str] = None
@ -99,13 +99,28 @@ class PhotovoltaicSupply:
    none_or_no_details: PhotovoltaicSupplyNoneOrNoDetails


+@dataclass
+class PhotovoltaicArray:
+    """Measured-PV array (peak_power, pitch, orientation, overshading).
+
+    Modern SAP10 EPCs with measured PV carry `photovoltaic_supply` as a nested
+    list (`list[list[PhotovoltaicArray]]`) rather than the legacy wrapper dict
+    `PhotovoltaicSupply`. The Union type on SapEnergySource.photovoltaic_supply
+    accepts either shape.
+    """
+    peak_power: float
+    pitch: int
+    orientation: int
+    overshading: int
+
+
@dataclass
 class SapEnergySource:
    mains_gas: str
    meter_type: int
    pv_connection: int
    pv_battery_count: int
-    photovoltaic_supply: PhotovoltaicSupply
+    photovoltaic_supply: Union[PhotovoltaicSupply, List[List[PhotovoltaicArray]]]
    wind_turbines_count: int
    wind_turbine_details: WindTurbineDetails
    gas_smart_meter_present: str
@ -151,11 +166,26 @@ class SapFloorDimension:
    floor_construction: Optional[int] = None


+@dataclass
+class RoomInRoofType1:
+    """RdSAP §3.9.1 Simplified Type 1 RR — gable lengths only.
+
+    `gable_wall_type_*` is the Table 4 gable variant (0 = external, etc.;
+    full enum not yet mapped). `gable_wall_length_*` is the run of the
+    external gable in metres. Heights are NOT lodged here — the cascade
+    applies the §3.9.1 default storey height (2.45 m)."""
+    gable_wall_type_1: Optional[int] = None
+    gable_wall_type_2: Optional[int] = None
+    gable_wall_length_1: Optional[float] = None
+    gable_wall_length_2: Optional[float] = None
+
+
@dataclass
 class SapRoomInRoof:
    """Room-in-roof details. insulation and roof_room_connected removed in schema 21.0.0."""
    floor_area: Union[int, float]
    construction_age_band: str
+    room_in_roof_type_1: Optional[RoomInRoofType1] = None


@dataclass
--- a/datatypes/epc/schema/rdsap_schema_21_0_1.py
+++ b/datatypes/epc/schema/rdsap_schema_21_0_1.py
@ -14,9 +14,9 @@ class EnergyElement:

@dataclass
 class Addendum:
-    addendum_numbers: List[int]
    stone_walls: Optional[str] = None
    system_build: Optional[str] = None
+    addendum_numbers: Optional[List[int]] = None


@dataclass
@ -27,7 +27,7 @@ class ShowerOutlet:

@dataclass
 class ShowerOutlets:
-    shower_outlet: ShowerOutlet
+    shower_outlet: Optional[ShowerOutlet] = None


@dataclass
@ -43,12 +43,12 @@ class MainHeatingDetail:
    has_fghrs: str  # TODO: make bool
    main_fuel_type: int
    heat_emitter_type: int
-    emitter_temperature: Union[int, str]
    main_heating_number: int
    main_heating_control: int
    main_heating_category: int
    main_heating_fraction: int
    main_heating_data_source: int
+    emitter_temperature: Optional[Union[int, str]] = None
    boiler_flue_type: Optional[int] = None
    fan_flue_present: Optional[str] = None # TODO: make bool
    boiler_ignition_type: Optional[int] = None
@ -62,11 +62,16 @@ class SapHeating:
    cylinder_size: int
    water_heating_code: int
    water_heating_fuel: int
-    instantaneous_wwhrs: InstantaneousWwhrs
    main_heating_details: List[MainHeatingDetail]
    immersion_heating_type: Union[int, str]
    has_fixed_air_conditioning: str
-    shower_outlets: Optional[ShowerOutlets] = None
+    instantaneous_wwhrs: Optional[InstantaneousWwhrs] = None
+    # Real-API certs carry shower_outlets as a list, not the synthetic single-object form;
+    # accept both shapes so older fixtures keep parsing.
+    shower_outlets: Optional[Union[ShowerOutlets, List[ShowerOutlets]]] = None
+    # SAP10 hot-water demand inputs.
+    number_baths: Optional[int] = None
+    number_baths_wwhrs: Optional[int] = None
    cylinder_insulation_type: Optional[int] = None
    cylinder_thermostat: Optional[str] = None
    secondary_fuel_type: Optional[int] = None
@ -81,7 +86,9 @@ class PvBattery:

@dataclass
 class PvBatteries:
-    pv_battery: PvBattery
+    # Real-API certs carry pv_batteries as a list (similar to shower_outlets);
+    # the older synthetic fixture used a single-object wrapper.
+    pv_battery: Optional[PvBattery] = None


@dataclass
@ -97,7 +104,22 @@ class PhotovoltaicSupplyNoneOrNoDetails:

@dataclass
 class PhotovoltaicSupply:
-    none_or_no_details: PhotovoltaicSupplyNoneOrNoDetails
+    none_or_no_details: Optional[PhotovoltaicSupplyNoneOrNoDetails] = None
+
+
+@dataclass
+class PhotovoltaicArray:
+    """Measured-PV array (peak_power, pitch, orientation, overshading).
+
+    Modern SAP10 EPCs with measured PV carry `photovoltaic_supply` as a nested
+    list (`list[list[PhotovoltaicArray]]`) rather than the legacy wrapper dict
+    `PhotovoltaicSupply`. The Union type on SapEnergySource.photovoltaic_supply
+    accepts either shape. Some certs wrap the scalars in Measurement dicts.
+    """
+    peak_power: Union[Measurement, int, float]
+    pitch: Union[Measurement, int]
+    orientation: Union[Measurement, int]
+    overshading: Union[Measurement, int]


@dataclass
@ -105,15 +127,15 @@ class SapEnergySource:
    mains_gas: str
    meter_type: int
    pv_connection: int
-    pv_battery_count: int
-    photovoltaic_supply: PhotovoltaicSupply
+    photovoltaic_supply: Union[PhotovoltaicSupply, List[List[PhotovoltaicArray]]]
    wind_turbines_count: int
-    wind_turbine_details: WindTurbineDetails
    gas_smart_meter_present: str
    is_dwelling_export_capable: str
    wind_turbines_terrain_type: int
    electricity_smart_meter_present: str
-    pv_batteries: Optional[PvBatteries] = None
+    pv_battery_count: Optional[int] = None
+    wind_turbine_details: Optional[WindTurbineDetails] = None
+    pv_batteries: Optional[Union[PvBatteries, List[PvBatteries]]] = None


@dataclass
@ -125,37 +147,54 @@ class WindowTransmissionDetails:

@dataclass
 class SapWindow:
-    pvc_frame: str
-    glazing_gap: int
    orientation: int
    window_type: int
-    frame_factor: float
    glazing_type: int
-    window_width: float
-    window_height: float
+    # Real-API certs sometimes carry a Measurement dict for dimensions, not a plain float.
+    window_width: Union[Measurement, int, float]
+    window_height: Union[Measurement, int, float]
    draught_proofed: str  # TODO: make bool
    window_location: int
    window_wall_type: int
    permanent_shutters_present: str  # TODO: make bool
-    window_transmission_details: WindowTransmissionDetails
    permanent_shutters_insulated: str
+    pvc_frame: Optional[str] = None
+    glazing_gap: Optional[int] = None
+    frame_factor: Optional[float] = None
+    window_transmission_details: Optional[WindowTransmissionDetails] = None


@dataclass
 class SapFloorDimension:
    floor: int
-    room_height: Measurement
-    total_floor_area: Measurement
-    party_wall_length: Union[Measurement, int]
-    heat_loss_perimeter: Measurement
+    # Real-API certs sometimes carry plain int/float instead of a Measurement object.
+    room_height: Union[Measurement, int, float]
+    total_floor_area: Union[Measurement, int, float]
+    party_wall_length: Union[Measurement, int, float]
+    heat_loss_perimeter: Union[Measurement, int, float]
    floor_insulation: Optional[int] = None
    floor_construction: Optional[int] = None


+@dataclass
+class RoomInRoofType1:
+    """RdSAP §3.9.1 Simplified Type 1 RR — gable lengths only.
+
+    `gable_wall_type_*` is the Table 4 gable variant (0 = external, etc.;
+    full enum not yet mapped). `gable_wall_length_*` is the run of the
+    external gable in metres. Heights are NOT lodged here — the cascade
+    applies the §3.9.1 default storey height (2.45 m)."""
+    gable_wall_type_1: Optional[int] = None
+    gable_wall_type_2: Optional[int] = None
+    gable_wall_length_1: Optional[float] = None
+    gable_wall_length_2: Optional[float] = None
+
+
@dataclass
 class SapRoomInRoof:
    floor_area: Union[int, float]
    construction_age_band: str
+    room_in_roof_type_1: Optional[RoomInRoofType1] = None


@dataclass
@ -170,19 +209,19 @@ class SapAlternativeWall:

@dataclass
 class SapBuildingPart:
-    identifier: str
-    wall_dry_lined: str
-    floor_heat_loss: int
-    roof_construction: int
-    wall_construction: int
-    building_part_number: int
-    sap_floor_dimensions: List[SapFloorDimension]
-    wall_insulation_type: int
-    construction_age_band: str
-    party_wall_construction: Union[int, str]
-    wall_thickness_measured: str
-    roof_insulation_location: Union[int, str]
-    roof_insulation_thickness: Union[str, int]
+    identifier: Optional[str] = None
+    wall_dry_lined: Optional[str] = None
+    floor_heat_loss: Optional[int] = None
+    roof_construction: Optional[int] = None
+    wall_construction: Optional[int] = None
+    building_part_number: Optional[int] = None
+    sap_floor_dimensions: Optional[List[SapFloorDimension]] = None
+    wall_insulation_type: Optional[int] = None
+    construction_age_band: Optional[str] = None
+    party_wall_construction: Optional[Union[int, str]] = None
+    wall_thickness_measured: Optional[str] = None
+    roof_insulation_location: Optional[Union[int, str]] = None
+    roof_insulation_thickness: Optional[Union[str, int]] = None
    sap_room_in_roof: Optional[SapRoomInRoof] = None
    sap_alternative_wall_1: Optional[SapAlternativeWall] = None
    sap_alternative_wall_2: Optional[SapAlternativeWall] = None
@ -276,7 +315,6 @@ class RdSapSchema21_0_1:
    assessment_type: str
    completion_date: str
    inspection_date: str
-    wet_rooms_count: int
    extensions_count: int
    measurement_type: int
    total_floor_area: int
@ -287,7 +325,6 @@ class RdSapSchema21_0_1:
    sap_energy_source: SapEnergySource
    secondary_heating: EnergyElement
    sap_building_parts: List[SapBuildingPart]
-    open_chimneys_count: int
    solar_water_heating: str
    habitable_room_count: int
    heating_cost_current: float
@ -300,10 +337,8 @@ class RdSapSchema21_0_1:
    has_hot_water_cylinder: str
    heating_cost_potential: float
    hot_water_cost_current: float
-    insulated_door_u_value: float
    mechanical_ventilation: int
    percent_draughtproofed: int
-    suggested_improvements: List[SuggestedImprovement]
    co2_emissions_potential: float
    energy_rating_potential: int
    lighting_cost_potential: float
@ -311,31 +346,51 @@ class RdSapSchema21_0_1:
    hot_water_cost_potential: float
    renewable_heat_incentive: RenewableHeatIncentive
    draughtproofed_door_count: int
-    mechanical_vent_duct_type: int
-    windows_transmission_details: WindowsTransmissionDetails
-    cfl_fixed_lighting_bulbs_count: int
    energy_consumption_current: int
    has_fixed_air_conditioning: str
-    multiple_glazed_proportion: int
    calculation_software_version: str
    energy_consumption_potential: int
    environmental_impact_current: int
-    led_fixed_lighting_bulbs_count: int
-    mechanical_vent_duct_placement: int
-    mechanical_vent_duct_insulation: int
    potential_energy_efficiency_band: str
-    pressure_test_certificate_number: int
-    mechanical_ventilation_index_number: int
    co2_emissions_current_per_floor_area: int
    current_energy_efficiency_band: str
    environmental_impact_potential: int
-    low_energy_fixed_lighting_bulbs_count: int
-    mechanical_vent_duct_insulation_level: int
-    mechanical_vent_measured_installation: str
    incandescent_fixed_lighting_bulbs_count: int
+    # Fields below are present in some certs but absent in many real-world responses;
+    # see datatypes/epc/schema/tests/fixtures/21_0_1_real.json for a representative cert.
+    air_tightness: Optional[EnergyElement] = None
+    extract_fans_count: Optional[int] = None
+    wet_rooms_count: Optional[int] = None
+    open_chimneys_count: Optional[int] = None
+    # Ventilation / draught completeness — surfaced into SapVentilation
+    # (or EpcPropertyData top-level for chimney counts) so the §2 cascade
+    # gets the real flue / vent / draught lobby state instead of zeros.
+    blocked_chimneys_count: Optional[int] = None
+    open_flues_count: Optional[int] = None
+    closed_flues_count: Optional[int] = None
+    boilers_flues_count: Optional[int] = None
+    other_flues_count: Optional[int] = None
+    psv_count: Optional[int] = None
+    has_draught_lobby: Optional[str] = None  # "true" / "false" / "unknown"
+    insulated_door_u_value: Optional[float] = None
+    suggested_improvements: Optional[List[SuggestedImprovement]] = None
+    mechanical_vent_duct_type: Optional[int] = None
+    windows_transmission_details: Optional[WindowsTransmissionDetails] = None
+    cfl_fixed_lighting_bulbs_count: Optional[int] = None
+    multiple_glazed_proportion: Optional[int] = None
+    led_fixed_lighting_bulbs_count: Optional[int] = None
+    mechanical_vent_duct_placement: Optional[int] = None
+    mechanical_vent_duct_insulation: Optional[int] = None
+    pressure_test_certificate_number: Optional[int] = None
+    mechanical_ventilation_index_number: Optional[int] = None
+    low_energy_fixed_lighting_bulbs_count: Optional[int] = None
+    mechanical_vent_duct_insulation_level: Optional[int] = None
+    mechanical_vent_measured_installation: Optional[str] = None
    sap_flat_details: Optional[SapFlatDetails] = None
    addendum: Optional[Addendum] = None
    address_line_2: Optional[str] = None
    has_heated_separate_conservatory: Optional[str] = None
    fixed_lighting_outlets_count: Optional[int] = None
    low_energy_fixed_lighting_outlets_count: Optional[int] = None
+    # LZC (low-carbon) energy-source codes flagged on the cert.
+    lzc_energy_sources: Optional[List[int]] = None
--- a/datatypes/epc/schema/tests/fixtures/21_0_1.json
+++ b/datatypes/epc/schema/tests/fixtures/21_0_1.json
@ -126,10 +126,20 @@
      "identifier": "Main Dwelling",
      "wall_dry_lined": "N",
      "floor_heat_loss": 7,
-      "sap_room_in_roof": {"floor_area": 100, "construction_age_band": "B"},
+      "sap_room_in_roof": {
+        "floor_area": 100,
+        "construction_age_band": "B",
+        "room_in_roof_type_1": {
+          "gable_wall_type_1": 0,
+          "gable_wall_type_2": 0,
+          "gable_wall_length_1": 6.4,
+          "gable_wall_length_2": 6.4
+        }
+      },
      "roof_construction": 4,
      "wall_construction": 4,
      "building_part_number": 1,
+      "flat_roof_insulation_thickness": "AB",
      "sap_floor_dimensions": [
        {
          "floor": 0,
@ -154,6 +164,14 @@
    }
  ],
  "open_chimneys_count": 1,
+  "extract_fans_count": 2,
+  "blocked_chimneys_count": 1,
+  "open_flues_count": 1,
+  "closed_flues_count": 1,
+  "boilers_flues_count": 1,
+  "other_flues_count": 1,
+  "psv_count": 2,
+  "has_draught_lobby": "true",
  "solar_water_heating": "N",
  "habitable_room_count": 5,
  "heating_cost_current": 365.98,
--- a/datatypes/epc/schema/tests/fixtures/21_0_1_real.json
+++ b/datatypes/epc/schema/tests/fixtures/21_0_1_real.json
@ -0,0 +1,309 @@
+{
+  "uprn": 0,
+  "roofs": [
+    {
+      "description": "(another dwelling above)",
+      "energy_efficiency_rating": 0,
+      "environmental_efficiency_rating": 0
+    }
+  ],
+  "walls": [
+    {
+      "description": "Solid brick, as built, no insulation (assumed)",
+      "energy_efficiency_rating": 1,
+      "environmental_efficiency_rating": 1
+    }
+  ],
+  "floors": [
+    {
+      "description": "Solid, no insulation (assumed)",
+      "energy_efficiency_rating": 0,
+      "environmental_efficiency_rating": 0
+    }
+  ],
+  "status": "entered",
+  "tenure": 1,
+  "window": {
+    "description": "Fully double glazed",
+    "energy_efficiency_rating": 3,
+    "environmental_efficiency_rating": 3
+  },
+  "lighting": {
+    "description": "Excellent lighting efficiency",
+    "energy_efficiency_rating": 5,
+    "environmental_efficiency_rating": 5
+  },
+  "postcode": "SE22 9QF",
+  "hot_water": {
+    "description": "From main system",
+    "energy_efficiency_rating": 4,
+    "environmental_efficiency_rating": 4
+  },
+  "post_town": "LONDON",
+  "built_form": "NR",
+  "created_at": "2026-03-10 00:03:32",
+  "door_count": 1,
+  "region_code": 17,
+  "report_type": 2,
+  "sap_heating": {
+    "number_baths": 1,
+    "cylinder_size": 1,
+    "number_baths_wwhrs": 0,
+    "water_heating_code": 901,
+    "water_heating_fuel": 26,
+    "main_heating_details": [
+      {
+        "has_fghrs": "N",
+        "main_fuel_type": 26,
+        "boiler_flue_type": 2,
+        "fan_flue_present": "Y",
+        "heat_emitter_type": 1,
+        "emitter_temperature": 0,
+        "main_heating_number": 1,
+        "main_heating_control": 2106,
+        "main_heating_category": 2,
+        "main_heating_fraction": 1,
+        "central_heating_pump_age": 0,
+        "main_heating_data_source": 1,
+        "main_heating_index_number": 17973
+      }
+    ],
+    "immersion_heating_type": "NA",
+    "has_fixed_air_conditioning": "false"
+  },
+  "sap_version": 10.2,
+  "sap_windows": [
+    {
+      "pvc_frame": "true",
+      "orientation": 5,
+      "window_type": 1,
+      "glazing_type": 2,
+      "window_width": 1.09,
+      "window_height": 1.75,
+      "draught_proofed": "true",
+      "window_location": 0,
+      "window_wall_type": 1,
+      "permanent_shutters_present": "N",
+      "permanent_shutters_insulated": "N"
+    },
+    {
+      "pvc_frame": "true",
+      "orientation": 5,
+      "window_type": 1,
+      "glazing_type": 2,
+      "window_width": 0.99,
+      "window_height": 0.89,
+      "draught_proofed": "true",
+      "window_location": 0,
+      "window_wall_type": 1,
+      "permanent_shutters_present": "N",
+      "permanent_shutters_insulated": "N"
+    },
+    {
+      "pvc_frame": "true",
+      "orientation": 3,
+      "window_type": 1,
+      "glazing_type": 2,
+      "window_width": 0.7,
+      "window_height": 0.7,
+      "draught_proofed": "true",
+      "window_location": 0,
+      "window_wall_type": 1,
+      "permanent_shutters_present": "N",
+      "permanent_shutters_insulated": "N"
+    }
+  ],
+  "schema_type": "RdSAP-Schema-21.0.1",
+  "uprn_source": "Address Matched",
+  "country_code": "ENG",
+  "main_heating": [
+    {
+      "description": "Boiler and radiators, mains gas",
+      "energy_efficiency_rating": 4,
+      "environmental_efficiency_rating": 4
+    }
+  ],
+  "air_tightness": {
+    "description": "(not tested)",
+    "energy_efficiency_rating": 0,
+    "environmental_efficiency_rating": 0
+  },
+  "dwelling_type": "Ground-floor flat",
+  "language_code": 1,
+  "pressure_test": 4,
+  "property_type": 2,
+  "address_line_1": "<scrubbed>",
+  "address_line_2": "<scrubbed>",
+  "assessment_type": "RdSAP",
+  "completion_date": "2026-03-10",
+  "inspection_date": "2026-03-05",
+  "extensions_count": 0,
+  "measurement_type": 1,
+  "sap_flat_details": {
+    "level": 1,
+    "top_storey": "N",
+    "storey_count": 4,
+    "flat_location": 0,
+    "heat_loss_corridor": 0
+  },
+  "total_floor_area": 27,
+  "transaction_type": 1,
+  "conservatory_type": 1,
+  "heated_room_count": 1,
+  "registration_date": "2026-03-10",
+  "sap_energy_source": {
+    "mains_gas": "Y",
+    "meter_type": 2,
+    "pv_connection": 0,
+    "photovoltaic_supply": {
+      "none_or_no_details": {
+        "percent_roof_area": 0
+      }
+    },
+    "wind_turbines_count": 0,
+    "gas_smart_meter_present": "false",
+    "is_dwelling_export_capable": "false",
+    "wind_turbines_terrain_type": 2,
+    "electricity_smart_meter_present": "false"
+  },
+  "secondary_heating": {
+    "description": "None",
+    "energy_efficiency_rating": 0,
+    "environmental_efficiency_rating": 0
+  },
+  "extract_fans_count": 1,
+  "sap_building_parts": [
+    {
+      "identifier": "Main Dwelling",
+      "wall_dry_lined": "N",
+      "floor_heat_loss": 7,
+      "roof_construction": 3,
+      "wall_construction": 3,
+      "building_part_number": 1,
+      "sap_floor_dimensions": [
+        {
+          "floor": 0,
+          "room_height": {
+            "value": 2.4,
+            "quantity": "metres"
+          },
+          "floor_insulation": 1,
+          "total_floor_area": {
+            "value": 26.78,
+            "quantity": "square metres"
+          },
+          "party_wall_length": {
+            "value": 10.52,
+            "quantity": "metres"
+          },
+          "floor_construction": 1,
+          "heat_loss_perimeter": {
+            "value": 10.52,
+            "quantity": "metres"
+          }
+        }
+      ],
+      "wall_insulation_type": 4,
+      "construction_age_band": "A",
+      "party_wall_construction": 0,
+      "wall_thickness_measured": "N",
+      "roof_insulation_location": "ND",
+      "roof_insulation_thickness": "ND",
+      "wall_insulation_thickness": "NI",
+      "floor_insulation_thickness": "NI"
+    }
+  ],
+  "solar_water_heating": "N",
+  "habitable_room_count": 1,
+  "heating_cost_current": {
+    "value": 355,
+    "currency": "GBP"
+  },
+  "insulated_door_count": 0,
+  "co2_emissions_current": 1.1,
+  "energy_rating_average": 60,
+  "energy_rating_current": 71,
+  "lighting_cost_current": {
+    "value": 22,
+    "currency": "GBP"
+  },
+  "main_heating_controls": [
+    {
+      "description": "Programmer, room thermostat and TRVs",
+      "energy_efficiency_rating": 4,
+      "environmental_efficiency_rating": 4
+    }
+  ],
+  "has_hot_water_cylinder": "false",
+  "heating_cost_potential": {
+    "value": 228,
+    "currency": "GBP"
+  },
+  "hot_water_cost_current": {
+    "value": 128,
+    "currency": "GBP"
+  },
+  "mechanical_ventilation": 0,
+  "percent_draughtproofed": 100,
+  "suggested_improvements": [
+    {
+      "sequence": 1,
+      "typical_saving": {
+        "value": 91,
+        "currency": "GBP"
+      },
+      "indicative_cost": "\u00a37,500 - \u00a311,000",
+      "improvement_type": "Q",
+      "improvement_details": {
+        "improvement_number": 7
+      },
+      "improvement_category": 5,
+      "energy_performance_rating": 76,
+      "environmental_impact_rating": 83
+    },
+    {
+      "sequence": 2,
+      "typical_saving": {
+        "value": 34,
+        "currency": "GBP"
+      },
+      "indicative_cost": "\u00a35,000 - \u00a310,000",
+      "improvement_type": "W2",
+      "improvement_details": {
+        "improvement_number": 58
+      },
+      "improvement_category": 5,
+      "energy_performance_rating": 77,
+      "environmental_impact_rating": 85
+    }
+  ],
+  "co2_emissions_potential": 0.7,
+  "energy_rating_potential": 77,
+  "lighting_cost_potential": {
+    "value": 22,
+    "currency": "GBP"
+  },
+  "schema_version_original": "21.0.1",
+  "hot_water_cost_potential": {
+    "value": 131,
+    "currency": "GBP"
+  },
+  "renewable_heat_incentive": {
+    "water_heating": 1653.36,
+    "space_heating_existing_dwelling": 2797.73
+  },
+  "draughtproofed_door_count": 1,
+  "energy_consumption_current": 229,
+  "has_fixed_air_conditioning": "false",
+  "multiple_glazed_proportion": 100,
+  "calculation_software_version": "5.02r0334",
+  "energy_consumption_potential": 148,
+  "environmental_impact_current": 77,
+  "current_energy_efficiency_band": "C",
+  "environmental_impact_potential": 85,
+  "led_fixed_lighting_bulbs_count": 5,
+  "has_heated_separate_conservatory": "false",
+  "potential_energy_efficiency_band": "C",
+  "co2_emissions_current_per_floor_area": 41,
+  "incandescent_fixed_lighting_bulbs_count": 0
+}
--- a/datatypes/epc/schema/tests/test_schema_loading.py
+++ b/datatypes/epc/schema/tests/test_schema_loading.py
@ -378,3 +378,25 @@ class TestRdSapSchema21_0_1:

    def test_incandescent_bulb_count(self, epc: RdSapSchema21_0_1) -> None:
        assert epc.incandescent_fixed_lighting_bulbs_count == 0
+
+
+class TestRdSapSchema21_0_1AgainstRealApiCert:
+    """Regression guard: a real cert (PII-scrubbed) from the gov bulk JSON must parse.
+
+    Previously the dataclass was driven by the synthetic `21_0_1.json` fixture, which
+    coincidentally contained every optional field. Real-API certs omit many of them,
+    so the dataclass annotations have to allow Optional/missing on those fields.
+    This test fails the moment a now-Optional field is accidentally re-marked required.
+    """
+
+    def test_real_cert_parses_via_from_dict(self) -> None:
+        # Arrange
+        real_doc = load("21_0_1_real.json")
+
+        # Act
+        epc = from_dict(RdSapSchema21_0_1, real_doc)
+
+        # Assert
+        assert epc.schema_type == "RdSAP-Schema-21.0.1"
+        assert epc.sap_heating is not None
+        assert len(epc.sap_windows) > 0
--- a/datatypes/epc/surveys/elmhurst_site_notes.py
+++ b/datatypes/epc/surveys/elmhurst_site_notes.py
@ -1,4 +1,4 @@
-from dataclasses import dataclass
+from dataclasses import dataclass, field
 from datetime import date
 from typing import List, Optional

@ -51,6 +51,22 @@ class BuildingPartDimensions:
    floors: List[FloorDimension]


+@dataclass
+class AlternativeWall:
+    """RdSAP §S5 Alternative Wall — a sub-area of the building part's
+    gross wall that has a different construction (e.g. a small 1.43 m²
+    timber-frame panel on an otherwise cavity-walled extension). Up to
+    two alternative walls per bp; Elmhurst lodges them in §7's "1st/2nd
+    Extension" subsection under the "Alternative Wall N <field>" prefix."""
+
+    area_m2: float
+    wall_type: str  # e.g. "TI Timber Frame"
+    insulation: str  # e.g. "A As Built"
+    thickness_unknown: bool
+    thickness_mm: Optional[int]
+    u_value_known: bool
+
+
@dataclass
 class WallDetails:
    wall_type: str  # e.g. "CA Cavity"
@ -58,6 +74,10 @@ class WallDetails:
    thickness_unknown: bool
    u_value_known: bool
    party_wall_type: str  # e.g. "U Unable to determine"
+    # `alternative_walls` carries up to two alt sub-areas per bp.
+    alternative_walls: List["AlternativeWall"] = field(
+        default_factory=lambda: []  # type: ignore[reportUnknownLambdaType]
+    )
    thickness_mm: Optional[int] = None


@ -78,6 +98,40 @@ class FloorDetails:
    default_u_value: Optional[float] = None


+@dataclass
+class RoomInRoofSurface:
+    """One sub-element of a §3.10 Detailed Room-in-Roof assessment:
+    Flat Ceiling / Stud Wall / Slope / Gable Wall / Common Wall.
+
+    Each is lodged with a Length × Height pair plus insulation /
+    insulation-type / gable-type / measured-U fields. Absent surfaces
+    are still lodged at 0×0 (e.g. a Flat Ceiling with no flat-roof
+    portion) and filtered out in the mapper."""
+
+    name: str  # e.g. "Flat Ceiling 1", "Stud Wall 2", "Gable Wall 1"
+    length_m: float
+    height_m: float
+    insulation: str  # "As Built" | "None" | "100 mm" | ""
+    insulation_type: Optional[str]  # e.g. "Mineral or EPS"
+    gable_type: Optional[str]  # "Party" | "Sheltered" | "Connected to heated space"
+    default_u_value: Optional[float]
+    u_value_known: bool
+    u_value: float  # assessor-measured U-value (0.00 when not known)
+
+
+@dataclass
+class RoomInRoof:
+    """§8.1 Rooms in Roof — Main-property entry only (extensions never
+    carry RR in the observed corpus). `surfaces` lists all 5 RdSAP §3.10
+    detailed-assessment kinds in document order; 0×0 entries are kept so
+    the mapper sees the complete table shape."""
+
+    floor_area_m2: float
+    construction_age_band: Optional[str]
+    assessment: str  # "Detailed" | "Simplified Type 1" | "Simplified Type 2"
+    surfaces: List[RoomInRoofSurface]
+
+
@dataclass
 class Window:
    width_m: float
@ -140,6 +194,11 @@ class MainHeating:
        None  # e.g. "17742 Potterton, Promax 33 Combi ErP, 88.30%"
    )
    heat_pump_age: Optional[str] = None
+    # Section 14.0 also lodges a secondary heating system (when one is
+    # installed). The SAP code is the integer the cascade reads via
+    # `SapHeating.secondary_heating_type` to apply the Table 11
+    # secondary-fraction split; None when no secondary is lodged.
+    secondary_heating_sap_code: Optional[int] = None


@dataclass
@ -184,6 +243,21 @@ class Renewables:
    hydro_electricity_generated_kwh: float


+@dataclass
+class ExtensionPart:
+    """Additional building part on a multi-bp cert (e.g. "1st Extension",
+    "2nd Extension" on the Elmhurst Summary PDF). Mirrors the per-bp
+    fabric fields the main dwelling carries at the top-level
+    ElmhurstSiteNotes."""
+
+    name: str  # e.g. "1st Extension", "2nd Extension"
+    construction_age_band: str  # e.g. "B 1900-1929" (may differ from main)
+    dimensions: BuildingPartDimensions
+    walls: WallDetails
+    roof: RoofDetails
+    floor: FloorDetails
+
+
@dataclass
 class ElmhurstSiteNotes:
    surveyor_info: SurveyorInfo
@ -245,3 +319,17 @@ class ElmhurstSiteNotes:

    # Sections 16.0–22.0
    renewables: Renewables
+
+    # Additional building parts beyond the main dwelling. The singular
+    # `dimensions`, `walls`, `roof`, `floor`, and `construction_age_band`
+    # fields above describe the "Main" property; each ExtensionPart in
+    # this list describes a discrete extension with its own age band,
+    # dimensions, and fabric details. Empty list = single-bp cert
+    # (preserves backward compatibility with the existing fixture).
+    extensions: List[ExtensionPart] = field(default_factory=lambda: [])  # type: ignore[reportUnknownLambdaType]
+
+    # §8.1 Rooms in Roof — Main property only in the observed corpus.
+    # When None the dwelling has no RR storey (a 2-storey house with a
+    # cold loft instead of a room-in-roof). The mapper translates the
+    # surface table into a `SapRoomInRoof` attached to the Main bp.
+    room_in_roof: Optional[RoomInRoof] = None
--- a/infrastructure/terraform/README.md
+++ b/infrastructure/terraform/README.md
--- a/infrastructure/terraform/cdn/main.tf
+++ b/infrastructure/terraform/cdn/main.tf
--- a/infrastructure/terraform/cdn/provider.tf
+++ b/infrastructure/terraform/cdn/provider.tf
--- a/infrastructure/terraform/cdn/variables.tf
+++ b/infrastructure/terraform/cdn/variables.tf
--- a/infrastructure/terraform/cdn_certificate/main.tf
+++ b/infrastructure/terraform/cdn_certificate/main.tf
--- a/infrastructure/terraform/cdn_certificate/outputs.tf
+++ b/infrastructure/terraform/cdn_certificate/outputs.tf
--- a/infrastructure/terraform/cdn_certificate/provider.tf
+++ b/infrastructure/terraform/cdn_certificate/provider.tf
--- a/infrastructure/terraform/cdn_certificate/variables.tf
+++ b/infrastructure/terraform/cdn_certificate/variables.tf
--- a/infrastructure/terraform/lambda/_template/README.md
+++ b/infrastructure/terraform/lambda/_template/README.md
@ -10,7 +10,7 @@
 ### 2. Add infrastructure prerequisites (shared stack)
 - Add a new ECR repository in:

-  infrastructure/terraform/shared/main.tf
+  deployment/terraform/shared/main.tf

 - Create a PR to deploy this to main then dev in order to deploy the shared stack

--- a/infrastructure/terraform/lambda/_template/main.tf
+++ b/infrastructure/terraform/lambda/_template/main.tf
--- a/infrastructure/terraform/lambda/_template/provider.tf
+++ b/infrastructure/terraform/lambda/_template/provider.tf
--- a/infrastructure/terraform/lambda/_template/variables.tf
+++ b/infrastructure/terraform/lambda/_template/variables.tf
--- a/infrastructure/terraform/lambda/address2UPRN/main.tf
+++ b/infrastructure/terraform/lambda/address2UPRN/main.tf
--- a/infrastructure/terraform/lambda/address2UPRN/outputs.tf
+++ b/infrastructure/terraform/lambda/address2UPRN/outputs.tf
--- a/infrastructure/terraform/lambda/address2UPRN/provider.tf
+++ b/infrastructure/terraform/lambda/address2UPRN/provider.tf
--- a/infrastructure/terraform/lambda/address2UPRN/variables.tf
+++ b/infrastructure/terraform/lambda/address2UPRN/variables.tf
--- a/infrastructure/terraform/lambda/bulk_address2uprn_combiner/main.tf
+++ b/infrastructure/terraform/lambda/bulk_address2uprn_combiner/main.tf
--- a/infrastructure/terraform/lambda/bulk_address2uprn_combiner/outputs.tf
+++ b/infrastructure/terraform/lambda/bulk_address2uprn_combiner/outputs.tf
--- a/infrastructure/terraform/lambda/bulk_address2uprn_combiner/provider.tf
+++ b/infrastructure/terraform/lambda/bulk_address2uprn_combiner/provider.tf
--- a/infrastructure/terraform/lambda/bulk_address2uprn_combiner/variables.tf
+++ b/infrastructure/terraform/lambda/bulk_address2uprn_combiner/variables.tf
--- a/infrastructure/terraform/lambda/categorisation/main.tf
+++ b/infrastructure/terraform/lambda/categorisation/main.tf
--- a/infrastructure/terraform/lambda/categorisation/outputs.tf
+++ b/infrastructure/terraform/lambda/categorisation/outputs.tf
--- a/infrastructure/terraform/lambda/categorisation/provider.tf
+++ b/infrastructure/terraform/lambda/categorisation/provider.tf
--- a/infrastructure/terraform/lambda/categorisation/variables.tf
+++ b/infrastructure/terraform/lambda/categorisation/variables.tf
--- a/infrastructure/terraform/lambda/condition-etl/main.tf
+++ b/infrastructure/terraform/lambda/condition-etl/main.tf
--- a/Show more
+++ b/Show more