diff --git a/.devcontainer/asset_list/Dockerfile b/.devcontainer/asset_list/Dockerfile index 512ab109..72a5de53 100644 --- a/.devcontainer/asset_list/Dockerfile +++ b/.devcontainer/asset_list/Dockerfile @@ -27,8 +27,9 @@ RUN useradd -m -s /usr/bin/bash ${USER} \ # # 4) Python deps - if you want to run assest list ENV PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1 -ADD asset_list/requirements.txt requirements.txt -RUN pip install -r requirements.txt +ADD .devcontainer/asset_list/requirements.txt requirements2.txt +ADD asset_list/requirements.txt requirements1.txt +RUN cat requirements1.txt requirements2.txt >> requirements.txt RUN pip install -r requirements.txt # 5) Workdir diff --git a/.devcontainer/asset_list/requirements.txt b/.devcontainer/asset_list/requirements.txt index 33cc4c2e..28730ed5 100644 --- a/.devcontainer/asset_list/requirements.txt +++ b/.devcontainer/asset_list/requirements.txt @@ -15,10 +15,9 @@ uvicorn[standard] pytest==9.0.2 pytest-cov==7.0.0 ipykernel>=6.25,<7 -pydantic-settings<2 pyyaml>=6.0.1 -pydantic>=1.10.7,<2 sqlmodel # Formatting black==26.1.0 dotenv +pydantic-settings \ No newline at end of file diff --git a/.github/workflows/_build_image.yml b/.github/workflows/_build_image.yml index 6b6c4994..408c0319 100644 --- a/.github/workflows/_build_image.yml +++ b/.github/workflows/_build_image.yml @@ -13,6 +13,9 @@ on: required: false default: "." type: string + build_args: + required: false + type: string outputs: image_digest: @@ -29,11 +32,22 @@ on: required: true AWS_REGION: required: true + DEV_DB_HOST: + required: false + DEV_DB_PORT: + required: false + DEV_DB_NAME: + required: false jobs: build: runs-on: ubuntu-latest + env: + DEV_DB_HOST: ${{ secrets.DEV_DB_HOST }} + DEV_DB_PORT: ${{ secrets.DEV_DB_PORT }} + DEV_DB_NAME: ${{ secrets.DEV_DB_NAME }} + outputs: image_digest: ${{ steps.digest.outputs.image_digest }} ecr_repo_url: ${{ steps.repo.outputs.ecr_repo_url }} @@ -64,7 +78,22 @@ jobs: - name: Build & push image run: | IMAGE_URI="${{ steps.repo.outputs.ecr_repo_url }}:${GITHUB_SHA}" - docker build -f ${{ inputs.dockerfile_path }} -t $IMAGE_URI ${{ inputs.build_context }} + + # Writes build args and removes line breaks + BUILD_ARGS="" + while IFS= read -r line; do + # skip empty lines + [ -n "$line" ] || continue + temp=$(eval echo "$line") + BUILD_ARGS="$BUILD_ARGS --build-arg $temp" + done <<< "${{ inputs.build_args }}" + + docker build \ + -f ${{ inputs.dockerfile_path }} \ + $BUILD_ARGS \ + -t $IMAGE_URI \ + ${{ inputs.build_context }} + docker push $IMAGE_URI - name: Resolve image digest diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index 41a551c4..4ac08e41 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -16,6 +16,7 @@ jobs: id: set-stage shell: bash run: | + env BRANCH="${GITHUB_REF_NAME}" if [[ "$BRANCH" == "prod" ]]; then @@ -73,8 +74,8 @@ jobs: uses: ./.github/workflows/_build_image.yml with: ecr_repo: address2uprn-${{ needs.determine_stage.outputs.stage }} - dockerfile_path: backend/address2UPRN/Dockerfile - build_context: backend/address2UPRN + dockerfile_path: backend/address2UPRN/handler/Dockerfile + build_context: . secrets: AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} @@ -96,3 +97,76 @@ jobs: AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} AWS_REGION: ${{ secrets.DEV_AWS_REGION }} + + + # ============================================================ + # 2️⃣ Build Postcode Splitter image and Push + # ============================================================ + postcodeSplitter_image: + needs: [determine_stage, shared_terraform] + uses: ./.github/workflows/_build_image.yml + with: + ecr_repo: postcode_splitter-${{ needs.determine_stage.outputs.stage }} + dockerfile_path: backend/postcode_splitter/handler/Dockerfile + build_context: . + secrets: + AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} + AWS_REGION: ${{ secrets.DEV_AWS_REGION }} + + # ============================================================ + # 3️⃣ Deploy Postcode Splitter Lambda + # ============================================================ + postcodeSplitter_lambda: + needs: [postcodeSplitter_image, determine_stage] + uses: ./.github/workflows/_deploy_lambda.yml + with: + lambda_name: postcodeSplitter + lambda_path: infrastructure/terraform/lambda/postcodeSplitter + stage: ${{ needs.determine_stage.outputs.stage }} + ecr_repo: postcode_splitter-${{ needs.determine_stage.outputs.stage }} + image_digest: ${{ needs.postcodeSplitter_image.outputs.image_digest }} + secrets: + AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} + AWS_REGION: ${{ secrets.DEV_AWS_REGION }} + + # ============================================================ + # Condition ETL image and Push + # ============================================================ + condition_etl_image: + needs: [determine_stage, shared_terraform] + uses: ./.github/workflows/_build_image.yml + with: + ecr_repo: condition-etl-${{ needs.determine_stage.outputs.stage }} + dockerfile_path: backend/condition/handler/Dockerfile + build_context: . + build_args: | + DEV_DB_HOST=$DEV_DB_HOST + DEV_DB_PORT=$DEV_DB_PORT + DEV_DB_NAME=$DEV_DB_NAME + secrets: + AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} + AWS_REGION: ${{ secrets.DEV_AWS_REGION }} + DEV_DB_HOST: ${{ secrets.DEV_DB_HOST }} + DEV_DB_PORT: ${{ secrets.DEV_DB_PORT }} + DEV_DB_NAME: ${{ secrets.DEV_DB_NAME }} + + # ============================================================ + # Deploy Condition ETL Lambda + # ============================================================ + condition_etl_lambda: + needs: [condition_etl_image, determine_stage] + uses: ./.github/workflows/_deploy_lambda.yml + with: + lambda_name: condition-etl + lambda_path: infrastructure/terraform/lambda/condition-etl + stage: ${{ needs.determine_stage.outputs.stage }} + ecr_repo: condition-etl-${{ needs.determine_stage.outputs.stage }} + image_digest: ${{ needs.condition_etl_image.outputs.image_digest }} + secrets: + AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} + AWS_REGION: ${{ secrets.DEV_AWS_REGION }} + diff --git a/asset_list/app.py b/asset_list/app.py index a18feeaf..01062261 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -69,14 +69,51 @@ def app(): Property UPRN """ - data_folder = "/workspaces/home/Downloads" - data_filename = "Anchor 1.xlsx" + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Aspire" + data_filename = "ASPIRE ASSET LIST.xlsx" + sheet_name = "Asset List" + postcode_column = "Postcode" + address1_column = None + address1_method = "house_number_extraction" + fulladdress_column = "Address" + address_cols_to_concat = [] + missing_postcodes_method = None + landlord_year_built = None + landlord_os_uprn = None + landlord_property_type = "Property Type" + landlord_built_form = None + landlord_wall_construction = None + landlord_roof_construction = None + landlord_heating_system = None + landlord_existing_pv = None + landlord_property_id = "LLUPRN" + landlord_sap = None + outcomes_filename = None + outcomes_sheetname = None + outcomes_postcode = None + outcomes_houseno = None + outcomes_id = None + outcomes_address = None + master_filepaths = [] + master_id_colnames = [] + master_to_asset_list_filepath = None + phase = False + ecosurv_landlords = None + asset_list_header = 0 + landlord_block_reference = None + + # Peabody data for cleaning + data_folder = ( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting " + "Project/data_validation" + ) + data_filename = "to_standardise_uprns.xlsx" sheet_name = "Sheet1" postcode_column = "Postcode" - address1_column = "House Number" - address1_method = None - fulladdress_column = None - address_cols_to_concat = ["House Number", "Address Line 1", "Address Line 2"] + address1_column = None + address1_method = "house_number_extraction" + fulladdress_column = "Address" + address_cols_to_concat = None missing_postcodes_method = None landlord_year_built = None landlord_os_uprn = None diff --git a/asset_list/mappings/built_form.py b/asset_list/mappings/built_form.py index a9defdef..d6466539 100644 --- a/asset_list/mappings/built_form.py +++ b/asset_list/mappings/built_form.py @@ -520,4 +520,14 @@ BUILT_FORM_MAPPINGS = { '2.EXT.WALL FLAT': 'mid-terrace', '2 EXT. WALL FLAT': 'mid-terrace', + 'Maisonette: Detached: Ground Floor': 'detached', + 'Maisonette: Enclosed End Terrace: Top Floor': 'enclosed end-terrace', + 'Flat: End Terrace: Basement': 'end-terrace', + 'Flat: Mid Terrace: Basement': 'mid-terrace', + 'Flat: Enclosed Mid Terrace: Basement': 'enclosed mid-terrace', + 'House: Semi Detached: Top Floor': 'semi-detached', + 'House: End Terrace: Ground Floor': 'end-terrace', + 'Maisonette: Enclosed End Terrace: Mid Floor': 'enclosed end-terrace', + 'Bungalow: EnclosedEndTerrace': 'enclosed end-terrace' + } diff --git a/asset_list/mappings/exising_pv.py b/asset_list/mappings/exising_pv.py index e67fafb4..defce35f 100644 --- a/asset_list/mappings/exising_pv.py +++ b/asset_list/mappings/exising_pv.py @@ -17,5 +17,10 @@ EXISTING_PV_MAPPINGS = { 'PV: 10% roof area, PV: 2kWp array': 'already has PV', 'PV: 50% roof area': 'already has PV', 'Solar PV': 'already has PV', - 'SOLAR PV': 'already has PV' + 'SOLAR PV': 'already has PV', + + 'PV: 40% roof area, PV: 2kWp array': 'already has PV', + 'PV: 33% roof area, PV: 2kWp array': 'already has PV', + 'PV: 30% roof area': 'already has PV' + } diff --git a/asset_list/mappings/heating_systems.py b/asset_list/mappings/heating_systems.py index ffd1b198..272d6279 100644 --- a/asset_list/mappings/heating_systems.py +++ b/asset_list/mappings/heating_systems.py @@ -494,6 +494,10 @@ HEATING_MAPPINGS = { 'Gas (including LPG) room heaters: Gas fire, open flue, 1980 or later (open fronted), sitting proud of, ' 'and sealed to, fireplace opening': 'room heaters', 'Boiler: A rated Regular Boiler, System 2: Boiler: C rated Regular Boiler': 'boiler - other fuel', - 'Boiler: G rated Combi': 'gas condensing combi' + 'Boiler: G rated Combi': 'gas condensing combi', + + 'Boiler: A rated Combi, System 2: Boiler: A rated Combi': 'gas combi boiler', + 'System 2: Boiler: A rated Regular Boiler, Boiler: A rated Regular Boiler': 'gas boiler, radiators', + 'Boiler: A rated Combi, System 2: Boiler: C rated Combi': 'gas combi boiler' } diff --git a/asset_list/mappings/property_type.py b/asset_list/mappings/property_type.py index 1f251598..177a7549 100644 --- a/asset_list/mappings/property_type.py +++ b/asset_list/mappings/property_type.py @@ -427,6 +427,23 @@ PROPERTY_MAPPING = { 'End Terrace': 'unknown', 'Detached': 'unknown', 'Mid-terrace': 'unknown', - 'MID - TERRACE': 'unknown' + 'MID - TERRACE': 'unknown', + 'COMOFF': 'unknown', + 'LOTS': 'unknown', + + 'Maisonette: Detached: Ground Floor': 'maisonette', + 'Maisonette: Enclosed End Terrace: Top Floor': 'maisonette', + 'Flat: End Terrace: Basement': 'flat', + 'Bungalow: EnclosedEndTerrace': 'bungalow', + 'Flat: Mid Terrace: Basement': 'flat', + 'House: Semi Detached: Top Floor': 'house', + 'House: End Terrace: Ground Floor': 'house', + 'Maisonette: Enclosed End Terrace: Mid Floor': 'maisonette', + 'Flat: Enclosed Mid Terrace: Basement': 'flat', + + 'Warden Bungalow': 'bungalow', + 'Warden Flat': 'flat', + 'Upper Floor Flat': 'flat', + 'Extracare Scheme': 'other' } diff --git a/asset_list/mappings/roof.py b/asset_list/mappings/roof.py index 0857b046..cf829a5f 100644 --- a/asset_list/mappings/roof.py +++ b/asset_list/mappings/roof.py @@ -301,4 +301,13 @@ ROOF_CONSTRUCTION_MAPPINGS = { 'PitchedWithSlopingCeiling: As Built': 'pitched insulated', 'PitchedNormalLoftAccess: As Built': 'pitched unknown insulation', + 'Flat: 150mm, Flat: Unknown': 'flat insulated', + 'AnotherDwellingAbove: Unknown, Flat: Unknown': 'another dwelling above', + 'AnotherDwellingAbove, AnotherDwellingAbove: Unknown': 'another dwelling above', + 'PitchedNormalNoLoftAccess: Unknown, PitchedWithSlopingCeiling: As Built': 'pitched unknown access to loft', + 'Flat: No Insulation': 'flat uninsulated', + 'AnotherDwellingAbove: Unknown, PitchedNormalLoftAccess: 250mm': 'another dwelling above', + 'PitchedNormalLoftAccess: 175mm': 'pitched insulated', + 'AnotherDwellingAbove: 300mm': 'another dwelling above' + } diff --git a/asset_list/mappings/walls.py b/asset_list/mappings/walls.py index 418ae9f8..1bb02a9a 100644 --- a/asset_list/mappings/walls.py +++ b/asset_list/mappings/walls.py @@ -354,6 +354,15 @@ WALL_CONSTRUCTION_MAPPINGS = { 'System built Internal': 'insulated system built', 'Cavity: AsBuilt (1976-1982), TimberFrame: AsBuilt': 'cavity unknown insulation', - 'Cavity: FilledCavityPlusExternal': 'filled cavity' + 'Cavity: FilledCavityPlusExternal': 'filled cavity', + + 'Cavity, Filled Cavity': 'filled cavity', + 'Solid Brick, As Built': 'solid brick unknown insulation', + 'Cavity, As Built': 'cavity unknown insulation', + 'Sandstone, As Built': 'sandstone or limestone unknown insulation', + 'Timber Frame, As Built': 'timber frame unknown insulation', + 'Solid Brick, Internal Insulation': 'insulated solid brick', + 'Granite or Whinstone, As Built': 'granite or whinstone unknown insulation', + 'Solid Brick, External': 'insulated solid brick' } diff --git a/backend/.env.local b/backend/.env.local deleted file mode 100644 index a05c93a3..00000000 --- a/backend/.env.local +++ /dev/null @@ -1,22 +0,0 @@ -DB_HOST=db -DB_PORT=5432 -DB_NAME=tech_team_local_db -DB_USERNAME=postgres -DB_PASSWORD=makingwarmerhomes - - -#not used -GOOGLE_SOLAR_API_KEY="test" -SAP_PREDICTIONS_BUCKET="test" -CARBON_PREDICTIONS_BUCKET="test" -HEAT_PREDICTIONS_BUCKET="test" -HEATING_KWH_PREDICTIONS_BUCKET="test" -HOTWATER_KWH_PREDICTIONS_BUCKET="test" -API_KEY="test" -ENVIRONMENT="test" -SECRET_KEY="test" -PLAN_TRIGGER_BUCKET="test" -DATA_BUCKET="test" -EPC_AUTH_TOKEN="test" -ENGINE_SQS_URL="test" -ENERGY_ASSESSMENTS_BUCKET="test" \ No newline at end of file diff --git a/backend/.env.test b/backend/.env.test new file mode 100644 index 00000000..5b77f243 --- /dev/null +++ b/backend/.env.test @@ -0,0 +1,22 @@ +DB_HOST=db +DB_PORT=5432 +DB_NAME=tech_team_local_db +DB_USERNAME=postgres +DB_PASSWORD=makingwarmerhomes + + +#not used +GOOGLE_SOLAR_API_KEY=test +SAP_PREDICTIONS_BUCKET=test +CARBON_PREDICTIONS_BUCKET=test +HEAT_PREDICTIONS_BUCKET=test +HEATING_KWH_PREDICTIONS_BUCKET=test +HOTWATER_KWH_PREDICTIONS_BUCKET=test +API_KEY=test +ENVIRONMENT=test +SECRET_KEY=test +PLAN_TRIGGER_BUCKET=test +DATA_BUCKET=test +EPC_AUTH_TOKEN=test +ENGINE_SQS_URL=test +ENERGY_ASSESSMENTS_BUCKET=test \ No newline at end of file diff --git a/backend/Property.py b/backend/Property.py index 14f7e03f..6a84fc09 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -1256,7 +1256,8 @@ class Property: "biodiesel": "Smokeless Fuel", "b30d": "B30K Biofuel", "coal": "Coal", - "oil": "Oil" + "oil": "Oil", + "unknown": None # Handle - anything post 2020 is electricity else gas } self.heating_energy_source = list({ @@ -1326,7 +1327,16 @@ class Property: if self.heating_energy_source == "Varied (Community Scheme)": if self.main_fuel["fuel_type"] in fuel_map: # We assume when None as it's unknown - self.heating_energy_source = fuel_map[self.main_fuel["fuel_type"]] + mapped_to = fuel_map[self.main_fuel["fuel_type"]] + if mapped_to is None and self.main_fuel["fuel_type"] == "unknown": + # Handle logic based on age band + if self.year_built >= 2020: + self.heating_energy_source = "Electricity" + else: + self.heating_energy_source = "Natural Gas (Community Scheme)" + + else: + self.heating_energy_source = mapped_to else: raise NotImplementedError(f"Unhandled fuel {self.main_fuel['fuel_type']}") diff --git a/backend/address2UPRN/Dockerfile b/backend/address2UPRN/Dockerfile deleted file mode 100644 index ac6af2a5..00000000 --- a/backend/address2UPRN/Dockerfile +++ /dev/null @@ -1,7 +0,0 @@ -FROM public.ecr.aws/lambda/python:3.10 - -# Copy function code -COPY main.py . - -# Set the handler -CMD ["main.handler"] diff --git a/backend/address2UPRN/handler/Dockerfile b/backend/address2UPRN/handler/Dockerfile new file mode 100644 index 00000000..5a09bd44 --- /dev/null +++ b/backend/address2UPRN/handler/Dockerfile @@ -0,0 +1,23 @@ +FROM public.ecr.aws/lambda/python:3.10 + +# Set working directory (Lambda task root) +WORKDIR /var/task + +# ----------------------------- +# Copy requirements FIRST (for Docker layer caching) +# ----------------------------- +COPY backend/address2UPRN/handler/requirements.txt . + +# Install dependencies into Lambda runtime +RUN pip install --no-cache-dir -r requirements.txt + +# ----------------------------- +# Copy application code +# ----------------------------- +COPY utils/ utils/ +COPY backend/address2UPRN/main.py . + +# ----------------------------- +# Lambda handler +# ----------------------------- +CMD ["main.handler"] diff --git a/backend/address2UPRN/handler/requirements.txt b/backend/address2UPRN/handler/requirements.txt new file mode 100644 index 00000000..bc753841 --- /dev/null +++ b/backend/address2UPRN/handler/requirements.txt @@ -0,0 +1,3 @@ +epc-api-python==1.0.2 +tqdm +pandas \ No newline at end of file diff --git a/backend/address2UPRN/main.py b/backend/address2UPRN/main.py index 9d27a5ce..ba386e0a 100644 --- a/backend/address2UPRN/main.py +++ b/backend/address2UPRN/main.py @@ -212,6 +212,8 @@ def get_epc_data_with_postcode(postcode, size=500, attempt=1, max_attempts=3): method="get", params={"postcode": postcode}, ) + if not search_resp or "rows" not in search_resp: + return pd.DataFrame() results_df = pd.DataFrame(search_resp["rows"], columns=search_resp["column-names"]) @@ -298,7 +300,7 @@ def get_uprn_candidates( ) -def get_uprn(user_inputed_address: str, postcode: str): +def get_uprn(user_inputed_address: str, postcode: str, return_address=False): """ Return uprn (str) Return False if failed to find a sensible matching epc @@ -337,6 +339,8 @@ def get_uprn(user_inputed_address: str, postcode: str): if found_uprn == "": return None + if return_address: + return found_uprn, address return found_uprn diff --git a/backend/address2UPRN/script.py b/backend/address2UPRN/script.py index bd8f8017..a71b5827 100644 --- a/backend/address2UPRN/script.py +++ b/backend/address2UPRN/script.py @@ -1,17 +1,24 @@ import pandas as pd +from tqdm import tqdm +from backend.address2UPRN.main import get_uprn + +# Enable tqdm for pandas +tqdm.pandas() + +df = pd.read_excel("address2.xlsx") -# use Address 1 -junte_df = pd.read_excel("hackney_uprn_failures.xlsx") +def extract_uprn(row): + print(row["User Input"], row["Postcode"]) + result = get_uprn(row["User Input"], row["Postcode"], return_address=True) + + if result is None: + return pd.Series([None, None]) + + uprn, found_address = result + return pd.Series([uprn, found_address]) -# use domna_address_1 -khalim_df = pd.read_excel("khalim_standard.xlsx") - - -combined_df = junte_df.merge(khalim_df, how="left", left_on="Address 1", right_on='domna_address_1') - -# Find the row in khalim_df that does not app - -result = combined_df[~pd.isnull(combined_df["epc_os_uprn"])] +df[["juntes uprn", "junte found address"]] = df.progress_apply(extract_uprn, axis=1) +df.to_excel("outputs2.xlsx", index=False) diff --git a/backend/app/config.py b/backend/app/config.py index b335c215..41552ae5 100644 --- a/backend/app/config.py +++ b/backend/app/config.py @@ -1,8 +1,22 @@ +import os from functools import lru_cache -from pydantic_settings import BaseSettings +from pydantic_settings import BaseSettings, SettingsConfigDict from typing import Optional +def resolve_env_file() -> Optional[str]: + env = os.getenv("ENVIRONMENT", "local") + + if env == "local": + return "backend/.env" + + if env == "test": + return "backend/.env.test" + + # prod = no env file + return None + + class Settings(BaseSettings): API_KEY: str API_KEY_NAME: str = "X-API-KEY" @@ -41,8 +55,10 @@ class Settings(BaseSettings): AWS_SECRET_KEY_ID: Optional[str] = None AWS_DEFAULT_REGION: Optional[str] = None - class Config: - env_file = "backend/.env.local" + model_config = SettingsConfigDict( + env_file=resolve_env_file(), + env_file_encoding="utf-8", + ) @lru_cache() diff --git a/backend/app/plan/utils.py b/backend/app/plan/utils.py index 33f391d4..10d7fb06 100644 --- a/backend/app/plan/utils.py +++ b/backend/app/plan/utils.py @@ -24,7 +24,7 @@ def get_cleaned(): cleaned = read_from_s3( s3_file_name="cleaned_epc_data/cleaned.bson", - bucket_name="retrofit-data-{environment}".format(environment=get_settings().ENVIRONMENT) + bucket_name=get_settings().DATA_BUCKET ) cleaned = msgpack.unpackb(cleaned, raw=False) diff --git a/backend/condition/condition_trigger_request.py b/backend/condition/condition_trigger_request.py new file mode 100644 index 00000000..03bd6ad1 --- /dev/null +++ b/backend/condition/condition_trigger_request.py @@ -0,0 +1,33 @@ +from enum import Enum +from typing import Optional +from pydantic import BaseModel + + +class ConditionFileType(Enum): + LBWF = "LBWF" + Peabody = "Peabody" + # TODO: make these asset management systems rather than client names + + +class ConditionTriggerRequest(BaseModel): + file_type: ConditionFileType + trigger_file_bucket: str # TODO: get this from settings + trigger_file_key: str + + uprn_lookup_file_bucket: Optional[str] = None # TODO: get this from settings + uprn_lookup_file_key: Optional[str] = None + + +# { +# "file_type": "Peabody", +# "trigger_file_bucket": "condition-data-dev", +# "trigger_file_key": "input/peabody/2026_01_06 - Peabody - Stock Condition Data - Survey Records - D Lower.xlsx", +# "uprn_lookup_file_bucket": "condition-data-dev", +# "uprn_lookup_file_key": "input/peabody/uprn-lookup/PeabodyPropertymatched_Dec25_propref_UPRN.csv" +# } + +# { +# "file_type": "LBWF", +# "trigger_file_bucket": "condition-data-dev", +# "trigger_file_key": "input/lbwf/LBWF - Example Asset Data September 2025.xlsx", +# } diff --git a/backend/condition/domain/mapping/lbwf/lbwf_mapper.py b/backend/condition/domain/mapping/lbwf/lbwf_mapper.py index 60c8b1ac..9dbfcb17 100644 --- a/backend/condition/domain/mapping/lbwf/lbwf_mapper.py +++ b/backend/condition/domain/mapping/lbwf/lbwf_mapper.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, List, Optional, Tuple +from typing import Any, Dict, Optional, Tuple from datetime import date from backend.condition.domain.aspect_condition import AspectCondition diff --git a/backend/condition/file_type.py b/backend/condition/file_type.py deleted file mode 100644 index e0736814..00000000 --- a/backend/condition/file_type.py +++ /dev/null @@ -1,16 +0,0 @@ -from enum import Enum - -class FileType(Enum): - LBWF = "lbwf" - Peabody = "peabody" - -def detect_file_type(filepath: str) -> FileType: - path = filepath.lower() - - if "lbwf" in path: - return FileType.LBWF - - if "peabody" in path: - return FileType.Peabody - - raise ValueError("Unrecognised file path") \ No newline at end of file diff --git a/backend/condition/handler.py b/backend/condition/handler.py deleted file mode 100644 index 5279b029..00000000 --- a/backend/condition/handler.py +++ /dev/null @@ -1,16 +0,0 @@ -from typing import Mapping, Any -from io import BytesIO - -from utils.logger import setup_logger -from backend.condition.processor import process_file - - -logger = setup_logger() - -def handler(event: Mapping[str, Any], context: Any) -> None: - # Temporary stub for PoC wiring - dummy_stream = BytesIO(b"") - - source_key = event.get("source_key", "unknown-source") - - process_file(dummy_stream, source_key) \ No newline at end of file diff --git a/backend/condition/handler/Dockerfile b/backend/condition/handler/Dockerfile new file mode 100644 index 00000000..71556895 --- /dev/null +++ b/backend/condition/handler/Dockerfile @@ -0,0 +1,48 @@ +FROM public.ecr.aws/lambda/python:3.11 +# For local running: +# FROM python:3.11.10-bullseye + +ARG DEV_DB_HOST +ARG DEV_DB_PORT +ARG DEV_DB_NAME + + +# Set working directory (Lambda task root) +WORKDIR /var/task + +# Environment +ENV DB_HOST=${DEV_DB_HOST} +ENV DB_PORT=${DEV_DB_PORT} +ENV DB_NAME=${DEV_DB_NAME} + +COPY backend/.env.test backend/.env + +# ----------------------------- +# Copy requirements FIRST (for Docker layer caching) +# ----------------------------- +COPY backend/condition/handler/requirements.txt . + +# Install dependencies into Lambda runtime +RUN pip install --no-cache-dir -r requirements.txt + +# ----------------------------- +# Copy application code +# ----------------------------- +COPY utils/ utils/ +COPY backend/condition/ backend/condition/ + +COPY backend/app/db/models/condition.py backend/app/db/models/condition.py +COPY backend/app/db/connection.py backend/app/db/connection.py +COPY backend/app/config.py backend/app/config.py + +COPY backend/__init__.py backend/__init__.py +COPY backend/app/__init__.py backend/app/__init__.py +COPY backend/app/db/__init__.py backend/app/db/__init__.py + + +# ----------------------------- +# Lambda handler +# ----------------------------- +CMD ["backend/condition/handler/handler.handler"] +# For local running +# CMD ["python", "-m", "backend.condition.handler.handler"] diff --git a/backend/condition/handler/handler.py b/backend/condition/handler/handler.py new file mode 100644 index 00000000..2f3616a4 --- /dev/null +++ b/backend/condition/handler/handler.py @@ -0,0 +1,51 @@ +import json +from typing import Mapping, Any +from io import BytesIO + +from backend.condition.condition_trigger_request import ConditionTriggerRequest +from backend.condition.lookups.uprn_lookup_s3 import UprnLookupS3 +from backend.condition.processor import process_file +from utils.logger import setup_logger +from utils.s3 import read_io_from_s3 + + +logger = setup_logger() + + +def handler(event: Mapping[str, Any], context: Any) -> None: + + for record in event.get("Records", []): + try: + body_dict = json.loads(record["body"]) + logger.debug("Validating request body") + payload = ConditionTriggerRequest.model_validate(body_dict) + + logger.debug("Successfully validated request body") + + if payload.uprn_lookup_file_bucket and payload.uprn_lookup_file_key: + logger.debug("Getting UPRN lookup file from s3") + uprn_lookup = UprnLookupS3( + bucket=payload.uprn_lookup_file_bucket, + key=payload.uprn_lookup_file_key, + ) # TODO: replace with postgres implementation + logger.debug("Successfully got UPRN lookup file from s3") + else: + uprn_lookup = None + + logger.debug("Getting conditions data from s3") + file_bytes: BytesIO = read_io_from_s3( + bucket_name=payload.trigger_file_bucket, + file_key=payload.trigger_file_key, + ) + logger.debug( + "Successfully got conditions data from s3. Moving on to process file..." + ) + + process_file( + file_stream=file_bytes, + file_type=payload.file_type, + uprn_lookup=uprn_lookup, + ) + + except Exception as e: + logger.error(f"Failed to process record: {e}") diff --git a/backend/condition/handler/requirements.txt b/backend/condition/handler/requirements.txt new file mode 100644 index 00000000..1e259a95 --- /dev/null +++ b/backend/condition/handler/requirements.txt @@ -0,0 +1,9 @@ +openpyxl +sqlmodel +pydantic-settings +psycopg2-binary==2.9.10 + +# pandas isn't used, but needed for importing from utils.s3 +pandas==2.2.2 +numpy==1.26.4 +openpyxl diff --git a/backend/condition/local_runner.py b/backend/condition/local_runner.py index e39d38c7..4595b93b 100644 --- a/backend/condition/local_runner.py +++ b/backend/condition/local_runner.py @@ -1,5 +1,7 @@ from pathlib import Path +from backend.condition.condition_trigger_request import ConditionFileType +from backend.condition.lookups.uprn_lookup_csv import UprnLookupLocal from backend.condition.processor import process_file @@ -20,15 +22,27 @@ def main() -> None: / "peabody" / "2026_01_06 - Peabody - Stock Condition Data - Survey Records - D Lower.xlsx" ) - filepaths = [lbwf_path, peabody_path] - # filepaths = [lbwf_path] + peabody_uprn_lookup_path: Path = ( + path / "peabody" / "PeabodyPropertymatched_Dec25_propref_UPRN.csv" + ) + # filepaths = [lbwf_path, peabody_path] + filepaths = [lbwf_path] # filepaths = [peabody_path] + uprn_lookup = UprnLookupLocal(csv_path=peabody_uprn_lookup_path.as_posix()) + + def get_file_type(file_path: str) -> ConditionFileType: + if "peabody" in file_path: + return ConditionFileType.Peabody + if "lbwf" in file_path: + return ConditionFileType.LBWF + for fp in filepaths: with fp.open("rb") as f: process_file( file_stream=f, - source_key=fp.as_posix(), + file_type=get_file_type(fp.as_posix()), + uprn_lookup=uprn_lookup, ) diff --git a/backend/condition/lookups/uprn_lookup.py b/backend/condition/lookups/uprn_lookup.py new file mode 100644 index 00000000..0f6e78fd --- /dev/null +++ b/backend/condition/lookups/uprn_lookup.py @@ -0,0 +1,8 @@ +from abc import ABC, abstractmethod +from typing import BinaryIO, Dict + + +class UprnLookup(ABC): + @abstractmethod + def get_property_ref_to_uprn_lookup(self) -> Dict[str, int]: + pass diff --git a/backend/condition/lookups/uprn_lookup_csv.py b/backend/condition/lookups/uprn_lookup_csv.py new file mode 100644 index 00000000..8b1c21a2 --- /dev/null +++ b/backend/condition/lookups/uprn_lookup_csv.py @@ -0,0 +1,23 @@ +import csv +from io import TextIOWrapper +from typing import BinaryIO, Dict, TextIO +from backend.condition.lookups.uprn_lookup import UprnLookup + + +class UprnLookupLocal(UprnLookup): + def __init__(self, csv_path: str): + self.csv_path = csv_path + + def get_property_ref_to_uprn_lookup(self) -> Dict[str, int]: + with open(self.csv_path, "rb") as f: + return self.parse_csv(f) + + def parse_csv(self, file_stream: BinaryIO) -> Dict[str, int]: + text_stream: TextIO = TextIOWrapper(file_stream, encoding="utf-8") + mapping: Dict[str, int] = {} + reader = csv.DictReader(text_stream) + for row in reader: + if not row["reference"] or not row["out_uprn"]: + continue + mapping[row["reference"].strip()] = int(row["out_uprn"].strip()) + return mapping diff --git a/backend/condition/lookups/uprn_lookup_s3.py b/backend/condition/lookups/uprn_lookup_s3.py new file mode 100644 index 00000000..da725a2f --- /dev/null +++ b/backend/condition/lookups/uprn_lookup_s3.py @@ -0,0 +1,29 @@ +import csv +from io import BytesIO, TextIOWrapper +from typing import BinaryIO, Dict, TextIO + +from backend.condition.lookups.uprn_lookup import UprnLookup +from utils.s3 import read_io_from_s3 + + +class UprnLookupS3(UprnLookup): + def __init__(self, bucket: str = "", key: str = ""): + self.bucket = bucket + self.key = key + + def get_property_ref_to_uprn_lookup(self) -> Dict[str, int]: + file_bytes: BytesIO = read_io_from_s3( + bucket_name=self.bucket, file_key=self.key + ) + + return self._parse_csv_bytes(file_bytes) + + def _parse_csv_bytes(self, file_stream: BinaryIO) -> Dict[str, int]: + text_stream: TextIO = TextIOWrapper(file_stream, encoding="utf-8") + mapping: Dict[str, int] = {} + reader = csv.DictReader(text_stream) + for row in reader: + if not row["reference"] or not row["out_uprn"]: + continue + mapping[row["reference"].strip()] = int(row["out_uprn"].strip()) + return mapping diff --git a/backend/condition/parsing/factory.py b/backend/condition/parsing/factory.py index 68ca0292..b5d28e18 100644 --- a/backend/condition/parsing/factory.py +++ b/backend/condition/parsing/factory.py @@ -1,27 +1,35 @@ +from typing import Optional +from backend.condition.condition_trigger_request import ConditionFileType from backend.condition.domain.mapping.lbwf.lbwf_mapper import LbwfMapper from backend.condition.domain.mapping.mapper import Mapper from backend.condition.domain.mapping.peabody.peabody_mapper import PeabodyMapper -from backend.condition.file_type import FileType +from backend.condition.lookups.uprn_lookup import UprnLookup from backend.condition.parsing.parser import Parser from backend.condition.parsing.lbwf_parser import LbwfParser from backend.condition.parsing.peabody_parser import PeabodyParser -def select_parser(file_type: FileType) -> Parser: - if file_type is FileType.LBWF: +def select_parser( + file_type: ConditionFileType, uprn_lookup: Optional[UprnLookup] = None +) -> Parser: + if file_type is ConditionFileType.LBWF: return LbwfParser() - if file_type is FileType.Peabody: - return PeabodyParser() + if file_type is ConditionFileType.Peabody: + if not uprn_lookup: + raise ValueError( + "Cannot instantiate Peabody Parser without UPRN lookup being provided" + ) + return PeabodyParser(uprn_lookup=uprn_lookup) raise ValueError("Unrecognised file type, unable to instantiate Parser") -def select_mapper(file_type: FileType) -> Mapper: - if file_type is FileType.LBWF: +def select_mapper(file_type: ConditionFileType) -> Mapper: + if file_type is ConditionFileType.LBWF: return LbwfMapper() - if file_type is FileType.Peabody: + if file_type is ConditionFileType.Peabody: return PeabodyMapper() raise ValueError("Unrecognised file type, unable to instantiate Mapper") diff --git a/backend/condition/parsing/lbwf_parser.py b/backend/condition/parsing/lbwf_parser.py index 3a23d028..a713b1ef 100644 --- a/backend/condition/parsing/lbwf_parser.py +++ b/backend/condition/parsing/lbwf_parser.py @@ -18,7 +18,6 @@ class LbwfParser(Parser): def parse( self, file_stream: BinaryIO, - location_ref_to_uprn_map: Optional[Dict[str, int]] = None, ) -> Any: wb: Workbook = load_workbook(file_stream) address_to_uprn_map: Dict[str, int] = LbwfParser._generate_address_to_uprn_dict( diff --git a/backend/condition/parsing/parser.py b/backend/condition/parsing/parser.py index 825abcd5..b160b217 100644 --- a/backend/condition/parsing/parser.py +++ b/backend/condition/parsing/parser.py @@ -8,6 +8,5 @@ class Parser(ABC): def parse( self, file_stream: BinaryIO, - location_ref_to_uprn_map: Optional[Dict[str, int]] = None, ) -> Any: pass diff --git a/backend/condition/parsing/peabody_parser.py b/backend/condition/parsing/peabody_parser.py index c53fd6d1..4620ba82 100644 --- a/backend/condition/parsing/peabody_parser.py +++ b/backend/condition/parsing/peabody_parser.py @@ -4,6 +4,7 @@ from typing import Any, BinaryIO, Dict, List, Optional, Tuple, DefaultDict from openpyxl import Workbook, load_workbook from collections import defaultdict +from backend.condition.lookups.uprn_lookup import UprnLookup from backend.condition.parsing.parser import Parser from backend.condition.parsing.records.peabody.peabody_asset_condition import ( PeabodyAssetCondition, @@ -15,42 +16,29 @@ logger = setup_logger() class PeabodyParser(Parser): + def __init__(self, uprn_lookup: UprnLookup): + self.uprn_lookup: UprnLookup = uprn_lookup # TODO: move this to the ABC? + def parse( self, file_stream: BinaryIO, - location_ref_to_uprn_map: Optional[Dict[str, int]] = None, ) -> Any: - wb: Workbook = load_workbook(file_stream) - - if location_ref_to_uprn_map is None: - location_ref_to_uprn_map: Dict[str, int] = ( - PeabodyParser._build_location_ref_to_uprn_map() - ) - + file_stream.seek(0) + logger.debug("[PeabodyParser] Loading workbook...") + wb: Workbook = load_workbook(file_stream, read_only=True, data_only=True) + logger.debug("[PeabodyParser] Successfully loaded workbook. Parsing assets...") assets = PeabodyParser._parse_assets(wb) + logger.debug( + "[PeabodyParser] Successfully parsed assets. Parsing UPRN lookup..." + ) + location_ref_to_uprn_map = self.uprn_lookup.get_property_ref_to_uprn_lookup() + logger.debug("[PeabodyParser] Successfully parsed UPRN lookup") return PeabodyParser._group_assets_into_properties( assets=assets, location_ref_to_uprn_map=location_ref_to_uprn_map, ) - @staticmethod - def _build_location_ref_to_uprn_map() -> Dict[str, int]: - location_ref_to_uprn_filepath: Path = ( - Path(__file__).resolve().parents[1] - / "sample_data" - / "peabody" - / "PeabodyPropertymatched_Dec25_propref_UPRN.csv" - ) - location_ref_to_uprn_map: Dict[str, int] = {} - - with location_ref_to_uprn_filepath.open(newline="") as f: - reader: Any = csv.DictReader(f) - for row in reader: - location_ref_to_uprn_map[row["reference"]] = int(row["out_uprn"]) - - return location_ref_to_uprn_map - @staticmethod def _parse_assets(wb: Workbook) -> List[PeabodyAssetCondition]: assets_sheet = wb["Survey Records - D & Lower"] @@ -67,7 +55,7 @@ class PeabodyParser(Parser): ) if not asset.is_block_level: # Block-level condition surveys are out of scope for now - # until we have a wider think on how to handle block + # until we have a wider think on how to handle blocks assets.append(asset) # TODO: handle block-level assets except Exception as e: @@ -92,13 +80,14 @@ class PeabodyParser(Parser): assets_by_location_reference[asset.lo_reference].append(asset) properties: List[PeabodyProperty] = [] + failed_mappings_count = 0 for location_ref, grouped_assets in assets_by_location_reference.items(): uprn = location_ref_to_uprn_map.get(location_ref) if uprn is None: - logger.warning(f"No UPRN found for Location Reference: {location_ref}") + failed_mappings_count += 1 continue properties.append( @@ -108,6 +97,7 @@ class PeabodyParser(Parser): ) ) + logger.warning(f"No UPRN found for {failed_mappings_count} Location References") return properties @staticmethod diff --git a/backend/condition/persistence/condition_postgres.py b/backend/condition/persistence/condition_postgres.py index 9d7895f0..e83df540 100644 --- a/backend/condition/persistence/condition_postgres.py +++ b/backend/condition/persistence/condition_postgres.py @@ -19,18 +19,19 @@ class ConditionPostgres: def bulk_insert_surveys( self, surveys: List[PropertyConditionSurvey], batch_size: Optional[int] = 100 ) -> None: - logger.info( - f"Preparing to load {len(surveys)} property surveys to Postgres. Mapping to SQLModel objects..." + logger.debug( + f"[ConditionPostgres] Preparing to load {len(surveys)} property surveys to Postgres. Mapping to SQLModel objects..." ) survey_models: List[PropertyConditionSurveyModel] = [ ConditionPostgres.map_survey_to_model(s) for s in surveys ] total: int = len(survey_models) - logger.info( - f"Finished mapping {total} surveys. Writing to database in batches of {batch_size}..." + logger.debug( + f"[ConditionPostgres] Finished mapping {total} surveys. Writing to database in batches of {batch_size}..." ) with db_session() as session: + logger.info("[ConditionPostgres] Successfully made connection to database") for start in range(0, total, batch_size): end = min(start + batch_size, total) batch = survey_models[start:end] diff --git a/backend/condition/processor.py b/backend/condition/processor.py index 4d8f16cf..ad5b4232 100644 --- a/backend/condition/processor.py +++ b/backend/condition/processor.py @@ -1,26 +1,31 @@ -from typing import Any, BinaryIO, List +from typing import Any, BinaryIO, List, Optional from datetime import datetime +from backend.condition.condition_trigger_request import ConditionFileType +from backend.condition.lookups.uprn_lookup import UprnLookup from utils.logger import setup_logger from backend.condition.domain.mapping.mapper import Mapper from backend.condition.domain.property_condition_survey import PropertyConditionSurvey from backend.condition.parsing.parser import Parser from backend.condition.persistence.condition_postgres import ConditionPostgres -from backend.condition.file_type import FileType, detect_file_type from backend.condition.parsing.factory import select_parser, select_mapper logger = setup_logger() -def process_file(file_stream: BinaryIO, source_key: str) -> None: - logger.info(f"[processor] Received file: {source_key}") - +def process_file( + file_stream: BinaryIO, + file_type: ConditionFileType, + uprn_lookup: Optional[UprnLookup], +) -> None: # Instantiation - file_type: FileType = detect_file_type(source_key) - parser: Parser = select_parser(file_type) + logger.debug(f"[processor] Instantiating classes...") + parser: Parser = select_parser(file_type, uprn_lookup) mapper: Mapper = select_mapper(file_type) persistence = ConditionPostgres() + logger.debug(f"[processor] Finished instantiating classes. Calling Parser...") + # Orchestration raw_properties: List[Any] = parser.parse(file_stream) diff --git a/backend/condition/tests/lookups/test_uprn_lookup_csv.py b/backend/condition/tests/lookups/test_uprn_lookup_csv.py new file mode 100644 index 00000000..d01c52c2 --- /dev/null +++ b/backend/condition/tests/lookups/test_uprn_lookup_csv.py @@ -0,0 +1,34 @@ +import pytest +from typing import Dict +from tempfile import NamedTemporaryFile + +from backend.condition.lookups.uprn_lookup_csv import UprnLookupLocal + + +@pytest.fixture +def prop_ref_uprn_csv_file() -> str: + csv_content = """reference,out_uprn + ABC123,10000000001 + DEF456,10000000002 + GHI789,10000000003 + """ + with NamedTemporaryFile(mode="w+", delete=False, suffix=".csv") as tmp: + tmp.write(csv_content) + tmp.flush() + return tmp.name + + +def test_generate_prop_ref_uprn_from_csv_file(prop_ref_uprn_csv_file: str) -> None: + # arrange + uprn_lookup = UprnLookupLocal(prop_ref_uprn_csv_file) + expected_map: Dict[str, int] = { + "ABC123": 10000000001, + "DEF456": 10000000002, + "GHI789": 10000000003, + } + + # act + actual_map: Dict[str, int] = uprn_lookup.get_property_ref_to_uprn_lookup() + + # assert + assert actual_map == expected_map diff --git a/backend/condition/tests/parsing/test_parsing_factory.py b/backend/condition/tests/parsing/test_parsing_factory.py index e2b478ff..df01eaad 100644 --- a/backend/condition/tests/parsing/test_parsing_factory.py +++ b/backend/condition/tests/parsing/test_parsing_factory.py @@ -1,11 +1,13 @@ import pytest +from backend.condition.condition_trigger_request import ConditionFileType +from backend.condition.lookups.uprn_lookup_csv import UprnLookupLocal from backend.condition.parsing.factory import select_parser -from backend.condition.file_type import FileType + def test_selects_lbwf_parser(): # arrange - file_type = FileType.LBWF + file_type = ConditionFileType.LBWF expected_class_name = "LbwfParser" # act @@ -14,13 +16,15 @@ def test_selects_lbwf_parser(): # assert assert expected_class_name == actual_class_name + def test_selects_peabody_parser(): # arrange - file_type = FileType.Peabody + file_type = ConditionFileType.Peabody expected_class_name = "PeabodyParser" + uprn_lookup = UprnLookupLocal(csv_path="test") # act - actual_class_name = select_parser(file_type).__class__.__name__ + actual_class_name = select_parser(file_type, uprn_lookup).__class__.__name__ # assert - assert expected_class_name == actual_class_name \ No newline at end of file + assert expected_class_name == actual_class_name diff --git a/backend/condition/tests/parsing/test_peabody_parser.py b/backend/condition/tests/parsing/test_peabody_parser.py index 20f7a28e..5fb42204 100644 --- a/backend/condition/tests/parsing/test_peabody_parser.py +++ b/backend/condition/tests/parsing/test_peabody_parser.py @@ -1,9 +1,11 @@ +from tempfile import NamedTemporaryFile import pytest from typing import Any, Dict from io import BytesIO from openpyxl import Workbook from datetime import datetime +from backend.condition.lookups.uprn_lookup_csv import UprnLookupLocal from backend.condition.parsing.peabody_parser import PeabodyParser from backend.condition.parsing.records.peabody.peabody_asset_condition import ( PeabodyAssetCondition, @@ -145,23 +147,28 @@ def peabody_assets_xlsx_bytes() -> BytesIO: @pytest.fixture -def location_ref_to_uprn_map() -> Dict[str, int]: - return { - "B000RAND": 1, - "B000BLOCK": 2, - "B000FAKE": 3, - "B000MIS": 4, - } +def prop_ref_uprn_csv_file() -> str: + csv_content = """reference,out_uprn + B000RAND,1 + B000BLOCK,2 + B000FAKE,3 + B000MIS,4 + """ + with NamedTemporaryFile(mode="w+", delete=False, suffix=".csv") as tmp: + tmp.write(csv_content) + tmp.flush() + return tmp.name def test_peabody_parser_parses_conditions( - peabody_assets_xlsx_bytes, location_ref_to_uprn_map + peabody_assets_xlsx_bytes, prop_ref_uprn_csv_file ): # arrange - parser = PeabodyParser() + uprn_lookup = UprnLookupLocal(csv_path=prop_ref_uprn_csv_file) + parser = PeabodyParser(uprn_lookup=uprn_lookup) # act - result: Any = parser.parse(peabody_assets_xlsx_bytes, location_ref_to_uprn_map) + result: Any = parser.parse(peabody_assets_xlsx_bytes) # assert assert len(result) == 3 diff --git a/backend/condition/tests/test_detect_file_type.py b/backend/condition/tests/test_detect_file_type.py deleted file mode 100644 index fecf22c1..00000000 --- a/backend/condition/tests/test_detect_file_type.py +++ /dev/null @@ -1,22 +0,0 @@ -import pytest - -from backend.condition.file_type import FileType, detect_file_type - -def test_detects_lbwf_file_type(): - # arrange - file_path_str = "uploads/lbwf/Exaple Asset Data.xlsx" - expected_file_type = FileType.LBWF - - # act - actual_file_type: FileType = detect_file_type(file_path_str) - - # assert - assert expected_file_type == actual_file_type - -def test_unknown_filepath_raises_value_error(): - # arrange - file_path_str = "unknown/Example Asset Data.xlsx" - - # act + assert - with pytest.raises(ValueError): - detect_file_type(file_path_str) \ No newline at end of file diff --git a/backend/engine/engine.py b/backend/engine/engine.py index e833eb89..69726604 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -978,13 +978,15 @@ async def model_engine(body: PlanTriggerRequest): recommendations_scoring_data.extend(p.recommendations_scoring_data) logger.info("Preparing data for scoring in sap change api") - recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data).drop( - columns=[ - "rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending", - "carbon_ending" - ] - ) - # Temp putting this here + recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data) + if not recommendations_scoring_data.empty: + recommendations_scoring_data = recommendations_scoring_data.drop( + columns=[ + "rdsap_change", "heat_demand_change", "carbon_change", "sap_ending", "heat_demand_ending", + "carbon_ending" + ] + ) + # TODO: Temp putting this here recommendations_scoring_data["is_post_sap10_ending"] = True all_predictions = await model_api.async_paginated_predictions( diff --git a/backend/ml_models/api.py b/backend/ml_models/api.py index daf4b715..440367b2 100644 --- a/backend/ml_models/api.py +++ b/backend/ml_models/api.py @@ -313,4 +313,15 @@ class ModelApi: logger.error(f"Batch {chunk}-{chunk + batch_size} failed (Attempt {attempts}): {e}") await asyncio.sleep(2 ** attempts) # exponential backoff await self.close_aiohttp_session() + + # Ensure stable output structure for the datagrame to be utilised by other functions downstream + for k in all_predictions.keys(): + if all_predictions[k].empty: + col_template = ['id', 'predictions', 'property_id', 'recommendation_id', 'phase'] if ( + extract_ids) else ['id', 'predictions'] + + all_predictions[k] = pd.DataFrame( + columns=col_template + ) + return all_predictions diff --git a/backend/onboarders/README.md b/backend/onboarders/README.md new file mode 100644 index 00000000..063fee20 --- /dev/null +++ b/backend/onboarders/README.md @@ -0,0 +1,102 @@ +# Retrofit Property Data Onboarding + +This repository contains an ETL pipeline for transforming raw retrofit property data from external source systems ( +currently Parity) into a standardised internal format, compatible for both address2uprn and engine. + +The pipeline is designed to: + +- Run as an AWS Lambda triggered by SQS +- Read raw CSV/XLSX files from S3 +- Perform rule-based mappings +- Infer as built property attributes, assumed based on age +- Output a processed csv, back to s3 to be consumed by address2uprn + +### Structure + +SQS → Lambda handler → OnboarderFactory → System-specific Onboarder → Mapping → CSV to S3 + +Each source system implements its own **Onboarder**, while sharing a common base and mapping process. + +--- + +### Repository Structure + +onboarders/ +├── `handler.py` # Lambda entrypoint \ +├── `factory.py` # Onboarder factory \ +├── `base.py` # Shared onboarding base class \ +├── `parity.py` # Parity-specific transformation logic \ +├── `mappings/` \ +│ └── `parity/` # Parity domain mappings & classifiers \ +│ ├── `age_band.py` \ +│ ├── `property_type.py` \ +│ ├── `built_form.py` \ +│ ├── `walls.py` \ +│ ├── `roof.py` \ +│ ├── `floor.py` \ +│ ├── `glazing.py` \ +│ ├── `heating.py` \ +│ ├── `as_built_wall_classifiers.py` \ +│ ├── `as_built_roof_classifiers.py` \ +│ └── `as_built_floor_classifiers.py` \ +├── `tests/` \ +├── `requirements.txt` \ +└── `README.md` + + +--- + +### Lambda Entry Point (`handler.py`) + +The Lambda handler: + +1. Consumes SQS queue +2. Validates the payload +3. Instantiates the correct onboarder via `OnboarderFactory` +4. Runs the transformation +5. Writes the transformed CSV back to S3 + +### Expected Event Payload + +```json +{ + "s3_uri": "s3://bucket/path/to/input.xlsx", + "system": "parity", + "format": "xlsx", + "sheet_name": "Sustainability" +} + +``` + +### Onboarder Base `(base.py)` + +OnboarderBase provides shared functionality across all systems. + +*Responsibilities* + +- Reading CSV/XLSX files from S3 +- Writing transformed CSVs to S3 +- Defining canonical output column names +- Providing validation helpers +- Common output - for the moment, onboards will be expected to return a csv + +### Parity Onboarder `(parity.py)` + +`ParityOnboarder` contains all Parity-specific transformation logic. + +Responsibilities* + +- Map raw Parity fields to internal EPC-aligned enums +- Infer “as-built” constructions using age bands when insulation data is missing +- Resolve energy efficiency ratings deterministically +- Normalise output into a fixed schema + +The `transform()` method orchestrates the transformation process. + +### TODOs + +- In `backend/onboarders/mappings/parity/glazing.py` we currently map the partiy descriptions + to duples of descriptions and efficiency ratings. This is okay for the moment but we may consider + using a data class, just given how error-prone this is. +- This is also true for heating mappings in `backend/onboarders/mappings/parity/heating.py` +- Implement a AI-enabled version, to replace the standardised asset list \ No newline at end of file diff --git a/backend/onboarders/__init__.py b/backend/onboarders/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/backend/onboarders/base.py b/backend/onboarders/base.py new file mode 100644 index 00000000..03cb2370 --- /dev/null +++ b/backend/onboarders/base.py @@ -0,0 +1,84 @@ +import pandas as pd +from utils.s3 import read_from_s3, read_excel_from_s3, save_csv_to_s3 + + +class OnboarderBase: + # Input dataset to be transformed + data: pd.DataFrame | None = None + bucket_name = None + input_file_name = None + output_file_name = None + # Description columns + landlord_wall_construction: str = "landlord_wall_construction" + landlord_roof_construction: str = "landlord_roof_construction" + landlord_floor_construction: str = "landlord_floor_construction" + landlord_windows_type: str = "landlord_windows_type" + landlord_heating_construction: str = "landlord_heating_construction" + landlord_fuel_type: str = "landlord_fuel_type" + landlord_heating_controls: str = "landlord_heating_controls" + landlord_hot_water_system: str = "landlord_hot_water_system" + + # Efficiency columns + landlord_roof_efficiency: str = "landlord_roof_efficiency" + landlord_windows_efficiency: str = "landlord_windows_efficiency" + landlord_heating_controls_efficiency: str = "landlord_heating_controls_efficiency" + landlord_heating_efficiency: str = "landlord_heating_efficiency" + landlord_hot_water_efficiency: str = "landlord_hot_water_efficiency" + landlord_wall_efficiency: str = "landlord_wall_efficiency" + + # Additional windows features + landlord_multi_glaze_proportion: str = "landlord_multi_glaze_proportion" + landlord_glazed_type: str = "landlord_glazed_type" + landlord_glazed_area: str = "landlord_glazed_area" + + # Additional roof features + landlord_has_sloping_ceiling: str = "landlord_has_sloping_ceiling" + + # Shape, dimensions, age + landlord_total_floor_area_m2: str = "landlord_total_floor_area_m2" + landlord_construction_age_band: str = "landlord_construction_age_band" + landlord_property_type: str = "landlord_property_type" + landlord_built_form: str = "landlord_built_form" + + def read_s3(self, file_format, **kwargs): + + if self.input_file_name is None or self.bucket_name is None: + raise ValueError("Bucket name and input file name must be set before reading from S3.") + if file_format == "xlsx": + self.data = read_excel_from_s3( + bucket_name=self.bucket_name, + file_key=self.input_file_name, + sheet_name=kwargs.get("sheet_name"), + header_row=kwargs.get("header_row", 0) + ) + else: + self.data = read_from_s3(bucket_name=self.bucket_name, s3_file_name=self.input_file_name) + + def write(self): + if self.data is None: + raise ValueError("No data to write. Please run transform() before writing.") + + if self.bucket_name is None or self.output_file_name is None: + raise ValueError("Bucket name and output file name must be set before writing to S3.") + # Store file as csv - will store in the same route location as the input file + save_csv_to_s3(dataframe=self.data, bucket_name=self.bucket_name, file_name=self.output_file_name) + + @staticmethod + def assert_nulls_only_from_source_nulls(data: pd.DataFrame, original_column: str, mapped_column: str) -> bool: + # We only allow nulls if the original value was null + null_vals = data[pd.isnull(data[mapped_column])] + if null_vals.empty: + return True + # We make sure all original values were null + assert pd.isnull(null_vals[original_column]).all(), ( + f"Some values in {mapped_column} were not mapped, but original values were not null" + ) + + @staticmethod + def assert_no_nulls(data: pd.DataFrame, column: str): + assert pd.isnull(data[column]).sum() == 0, f"column {column} contains null values, but should not" + + def map_construction_age_band(self): + raise NotImplementedError( + "This method should be implemented by subclasses to map construction age bands to descriptions" + ) diff --git a/backend/onboarders/factory.py b/backend/onboarders/factory.py new file mode 100644 index 00000000..2ff7dcbc --- /dev/null +++ b/backend/onboarders/factory.py @@ -0,0 +1,10 @@ +from onboarders.parity import ParityOnboarder + + +class OnboarderFactory: + @staticmethod + def create_onboarder(onboarder_type, **kwargs): + if onboarder_type == "parity": + return ParityOnboarder(**kwargs) + + raise ValueError(f"Unknown onboarder type: {onboarder_type}") diff --git a/backend/onboarders/handler.py b/backend/onboarders/handler.py new file mode 100644 index 00000000..d66b5796 --- /dev/null +++ b/backend/onboarders/handler.py @@ -0,0 +1,50 @@ +import json +from pydantic import BaseModel, Field +from typing import Optional, Literal +from onboarders.factory import OnboarderFactory +from utils.logger import setup_logger + +logger = setup_logger() + + +class OnboardingEvent(BaseModel): + s3_uri: str = Field(..., description="S3 URI of the raw ARA input file") + system: Literal["parity", "generic"] = Field(..., description="Onboarding system identifier") + format: Literal["csv", "xlsx"] + sheet_name: Optional[str] = None + + +def handler(event, context): + """ + Lambda handler that triggers the model engine for each SQS message. + """ + for record in event.get("Records", []): + try: + event_body = json.loads(record["body"]) + # Sample input data + # event_body = { + # "s3_uri": "s3://retrofit-data-dev/ara_raw_inputs/peabody/2025_11_11 - Peabody - Data Extracts for " + # "Domna.xlsx", + # "system": "parity", + # "format": "xlsx", + # "sheet_name": "Sustainability" + # } + + logger.info("Processing record with body: %s", event_body) + + validated_event = OnboardingEvent(**event_body) + onboarder = OnboarderFactory.create_onboarder( + validated_event.system, + fileuri=validated_event.s3_uri, + format=validated_event.format, + sheet_name=validated_event.sheet_name, + file_format=validated_event.format + ) + + logger.info("Transforming data") + onboarder.transform() + logger.info(f"Writing data to {onboarder.output_file_name}, bucket: {onboarder.bucket_name}") + onboarder.write() + + except Exception as e: + logger.error(f"Failed to process record: {e}") diff --git a/backend/onboarders/mappings/age_band.py b/backend/onboarders/mappings/age_band.py deleted file mode 100644 index 2487c921..00000000 --- a/backend/onboarders/mappings/age_band.py +++ /dev/null @@ -1,14 +0,0 @@ -party_map = { - "Before 1900": 'England and Wales: before 1900', - "1900-1929": 'England and Wales: 1900-1929', - "1930-1949": 'England and Wales: 1930-1949', - "1950-1966": 'England and Wales: 1950-1966', - "1967-1975": 'England and Wales: 1967-1975', - "1976-1982": 'England and Wales: 1976-1982', - "1983-1990": 'England and Wales: 1983-1990', - "1991-1995": 'England and Wales: 1991-1995', - "1996-2002": 'England and Wales: 1996-2002', - "2003-2006": 'England and Wales: 2003-2006', - "2007-2011": 'England and Wales: 2007-2011', - "2012 onwards": 'England and Wales: 2012-2021', -} diff --git a/backend/onboarders/mappings/built_form.py b/backend/onboarders/mappings/built_form.py deleted file mode 100644 index 23901fc6..00000000 --- a/backend/onboarders/mappings/built_form.py +++ /dev/null @@ -1,15 +0,0 @@ -parity_map = { - "MidTerrace": "Mid-Terrace", - "EndTerrace": "End-Terrace", - "Detached": "Detached", - "SemiDetached": "Semi-Detached", - "EnclosedMidTerrace": "Enclosed Mid-Terrace", - "EnclosedEndTerrace": "Enclosed End-Terrace", -} - -# MidTerrace 41462 -# EndTerrace 20910 -# Detached 16875 -# SemiDetached 14725 -# EnclosedMidTerrace 3176 -# EnclosedEndTerrace 2393 diff --git a/backend/onboarders/mappings/parity/age_band.py b/backend/onboarders/mappings/parity/age_band.py new file mode 100644 index 00000000..406d39c1 --- /dev/null +++ b/backend/onboarders/mappings/parity/age_band.py @@ -0,0 +1,19 @@ +from datatypes.epc.construction_age_band import EpcConstructionAgeBand + +parity_map = { + "Before 1900": EpcConstructionAgeBand.before_1900, + "1900-1929": EpcConstructionAgeBand.from_1900_to_1929, + "1930-1949": EpcConstructionAgeBand.from_1930_to_1949, + "1950-1966": EpcConstructionAgeBand.from_1950_to_1966, + "1967-1975": EpcConstructionAgeBand.from_1967_to_1975, + "1976-1982": EpcConstructionAgeBand.from_1976_to_1982, + "1983-1990": EpcConstructionAgeBand.from_1983_to_1990, + "1991-1995": EpcConstructionAgeBand.from_1991_to_1995, + "1996-2002": EpcConstructionAgeBand.from_1996_to_2002, + "2003-2006": EpcConstructionAgeBand.from_2003_to_2006, + "2007-2011": EpcConstructionAgeBand.from_2007_to_2011, + "2012 onwards": EpcConstructionAgeBand.from_2012_onwards, + # Newer age bands, under SAP10 + "2012-2022": EpcConstructionAgeBand.from_2012_to_2022, + "2023 onwards": EpcConstructionAgeBand.from_2023_onwards, +} diff --git a/backend/onboarders/mappings/parity/as_built_floor_classifiers.py b/backend/onboarders/mappings/parity/as_built_floor_classifiers.py new file mode 100644 index 00000000..3af3c079 --- /dev/null +++ b/backend/onboarders/mappings/parity/as_built_floor_classifiers.py @@ -0,0 +1,60 @@ +from datatypes.epc.construction_age_band import EpcConstructionAgeBand +from datatypes.epc.floor import EpcFloorDescriptions + + +def unknown_floor_as_built(age_band: EpcConstructionAgeBand) -> EpcFloorDescriptions: + year = age_band.start_year() + + if year >= 2003: + return EpcFloorDescriptions.solid_insulated_assumed + + if year >= 1996: + return EpcFloorDescriptions.solid_limited_insulation_assumed + + if year >= 1930: + return EpcFloorDescriptions.solid_no_insulation_assumed + + return EpcFloorDescriptions.suspended_no_insulation_assumed + + +def unknown_floor_retrofitted(age_band: EpcConstructionAgeBand) -> EpcFloorDescriptions: + year = age_band.start_year() + + if year >= 1930: + return EpcFloorDescriptions.solid_insulated + + return EpcFloorDescriptions.suspended_insulated + + +def map_solid_floor_as_built(age_band: EpcConstructionAgeBand) -> EpcFloorDescriptions: + year = age_band.start_year() + + if year >= 2003: + return EpcFloorDescriptions.solid_insulated_assumed + if year >= 1996: + return EpcFloorDescriptions.solid_limited_insulation_assumed + return EpcFloorDescriptions.solid_no_insulation_assumed + + +def map_suspended_floor_as_built(age_band: EpcConstructionAgeBand) -> EpcFloorDescriptions: + year = age_band.start_year() + + if year >= 2003: + return EpcFloorDescriptions.suspended_insulated_assumed + if year >= 1996: + return EpcFloorDescriptions.suspended_limited_insulation_assumed + + return EpcFloorDescriptions.suspended_no_insulation_assumed + + +as_built_floor_classifiers = { + "Solid": map_solid_floor_as_built, + "SuspendedTimber": map_suspended_floor_as_built, + "SuspendedNotTimber": map_suspended_floor_as_built, +} + +unknown_as_built_floor_classifiers = { + "RetroFitted": unknown_floor_retrofitted, + "AsBuilt": unknown_floor_as_built, + "Unknown": unknown_floor_as_built, +} diff --git a/backend/onboarders/mappings/parity/as_built_roof_classifiers.py b/backend/onboarders/mappings/parity/as_built_roof_classifiers.py new file mode 100644 index 00000000..fcb554bd --- /dev/null +++ b/backend/onboarders/mappings/parity/as_built_roof_classifiers.py @@ -0,0 +1,56 @@ +from datatypes.epc.roof import EpcRoofDescriptions +from datatypes.epc.construction_age_band import EpcConstructionAgeBand + + +def map_flat_roof(age_band: EpcConstructionAgeBand) -> EpcRoofDescriptions: + """ + For a flat, as built roof, these are the breakdowns: + + 2023 onwards → Flat, insulated + 2003–2022 → Flat, insulated + 1983–2002 → Flat, insulated + 1976–1982 → Flat, limited insulation + 1967–1975 → Flat, limited insulation + 1950–1966 and earlier → Flat, no insulation + :param age_band: Input age band + :return: EpcRoofDescriptions + """ + + year = age_band.start_year() + + if year >= 1983: + return EpcRoofDescriptions.flat_insulated + + if year >= 1967: + return EpcRoofDescriptions.flat_limited_insulation + + return EpcRoofDescriptions.flat_no_insulation + + +def map_sloping_ceiling_roof(age_band: EpcConstructionAgeBand) -> EpcRoofDescriptions: + """ + For a sloping ceiling, as built roof, these are the breakdowns: + 2023 onwards → Sloping pitched, insulated + 2003–2022 → Sloping pitched, insulated + 1983–2002 → Sloping pitched, insulated + 1976–1982 → Sloping pitched, limited insulation + 1967–1975 and earlier → Sloping pitched, no insulation + :param age_band: Input age band + :return: EpcRoofDescriptions + """ + year = age_band.start_year() + + if year >= 1983: + return EpcRoofDescriptions.sloping_pitched_insulated + + if year >= 1976: + return EpcRoofDescriptions.sloping_pitched_limited_insulation + + return EpcRoofDescriptions.sloping_pitched_no_insulation + + +as_built_roof_classifiers = { + # Only need to apply this to flat and sloping ceiling roofs + "Flat": map_flat_roof, + "PitchedWithSlopingCeiling": map_sloping_ceiling_roof, +} diff --git a/backend/onboarders/mappings/parity/as_built_wall_classifiers.py b/backend/onboarders/mappings/parity/as_built_wall_classifiers.py new file mode 100644 index 00000000..480a7e24 --- /dev/null +++ b/backend/onboarders/mappings/parity/as_built_wall_classifiers.py @@ -0,0 +1,113 @@ +from datatypes.epc.construction_age_band import EpcConstructionAgeBand +from datatypes.epc.walls import EpcWallDescriptions + + +def map_cavity_wall_insulation(age_band: EpcConstructionAgeBand): + if age_band.start_year() < 1976: + return EpcWallDescriptions.cavity_no_insulation_assumed + + if age_band == EpcConstructionAgeBand.from_1976_to_1982: + return EpcWallDescriptions.cavity_partial_insulated_assumed + + if age_band in EpcConstructionAgeBand.from_year_onwards(1983): + return EpcWallDescriptions.cavity_insulated_assumed + + raise NotImplementedError(f"Age band {age_band} not handled for cavity wall as built insulation mapping") + + +def map_solid_wall_insulation(age_band: EpcConstructionAgeBand): + if age_band.start_year() < 1976: + return EpcWallDescriptions.solid_brick_no_insulation_assumed + + if age_band == EpcConstructionAgeBand.from_1976_to_1982: + return EpcWallDescriptions.solid_brick_partial_insulated_assumed + + if age_band in EpcConstructionAgeBand.from_year_onwards(1983): + return EpcWallDescriptions.solid_brick_insulated_assumed + + raise NotImplementedError( + f"Age band {age_band.value} not handled for solid wall insulation mapping" + ) + + +def map_timber_frame_wall_insulation(age_band: EpcConstructionAgeBand): + if age_band.start_year() < 1950: + return EpcWallDescriptions.timber_frame_no_insulation_assumed + + if age_band.start_year() < 1976: + return EpcWallDescriptions.timber_frame_partial_insulated_assumed + + if age_band in EpcConstructionAgeBand.from_year_onwards(1976): + return EpcWallDescriptions.timber_frame_insulated_assumed + + raise NotImplementedError( + f"Age band {age_band.value} not handled for timber frame wall insulation mapping" + ) + + +def map_system_build_wall_insulation(age_band: EpcConstructionAgeBand): + if age_band.start_year() < 1976: + return EpcWallDescriptions.system_no_insulation_assumed + + if age_band == EpcConstructionAgeBand.from_1976_to_1982: + return EpcWallDescriptions.system_partial_insulated_assumed + + if age_band in EpcConstructionAgeBand.from_year_onwards(1983): + return EpcWallDescriptions.system_insulated_assumed + + raise NotImplementedError( + f"Age band {age_band.value} not handled for system build wall insulation mapping" + ) + + +def map_granite_wall_insulation(age_band: EpcConstructionAgeBand): + if age_band.start_year() < 1976: + return EpcWallDescriptions.granite_whinstone_no_insulation_assumed + + if age_band == EpcConstructionAgeBand.from_1976_to_1982: + return EpcWallDescriptions.granite_whinstone_partial_insulated_assumed + + if age_band in EpcConstructionAgeBand.from_year_onwards(1983): + return EpcWallDescriptions.granite_whinestone_insulated_assumed + + raise NotImplementedError( + f"Age band {age_band.value} not handled for granite wall insulation mapping" + ) + + +def map_sandstone_wall_insulation(age_band: EpcConstructionAgeBand): + if age_band.start_year() < 1976: + return EpcWallDescriptions.sandstone_limestone_no_insulation_assumed + + if age_band == EpcConstructionAgeBand.from_1976_to_1982: + return EpcWallDescriptions.sandstone_limestone_partial_insulated_assumed + + if age_band in EpcConstructionAgeBand.from_year_onwards(1983): + return EpcWallDescriptions.sandstone_limestone_insulated_assumed + + raise NotImplementedError( + f"Age band {age_band.value} not handled for sandstone wall insulation mapping" + ) + + +def map_cob_wall_insulation(age_band: EpcConstructionAgeBand): + if age_band.start_year() < 1983: + return EpcWallDescriptions.cob_as_built_average + + if age_band in EpcConstructionAgeBand.from_year_onwards(1983): + return EpcWallDescriptions.cob_as_built_good + + raise NotImplementedError( + f"Age band {age_band.value} not handled for cob wall insulation mapping" + ) + + +as_built_wall_classifiers = { + "Cavity": map_cavity_wall_insulation, + "Solid Brick": map_solid_wall_insulation, + "Timber Frame": map_timber_frame_wall_insulation, + "System": map_system_build_wall_insulation, + "Granite": map_granite_wall_insulation, + "Sandstone": map_sandstone_wall_insulation, + "Cob": map_cob_wall_insulation, +} diff --git a/backend/onboarders/mappings/parity/built_form.py b/backend/onboarders/mappings/parity/built_form.py new file mode 100644 index 00000000..12ae6360 --- /dev/null +++ b/backend/onboarders/mappings/parity/built_form.py @@ -0,0 +1,10 @@ +from datatypes.epc.property_type_built_form import BuiltForm + +parity_map = { + "MidTerrace": BuiltForm.mid_terrace, + "EndTerrace": BuiltForm.end_terrace, + "Detached": BuiltForm.detached, + "SemiDetached": BuiltForm.semi_detached, + "EnclosedMidTerrace": BuiltForm.enclosed_mid_terrace, + "EnclosedEndTerrace": BuiltForm.enclosed_end_terrace, +} diff --git a/backend/onboarders/mappings/parity/floor.py b/backend/onboarders/mappings/parity/floor.py new file mode 100644 index 00000000..653d4c68 --- /dev/null +++ b/backend/onboarders/mappings/parity/floor.py @@ -0,0 +1,26 @@ +from numpy import nan +from datatypes.epc.floor import EpcFloorDescriptions + +floor_map = { + # Solid floor + ('Solid', 'AsBuilt'): None, # Mapped + ('Solid', 'Unknown'): None, # Mapped + ('Solid', nan): None, # Mapped + ('Solid', 'RetroFitted'): EpcFloorDescriptions.solid_insulated, + + # Suspended floor + ('SuspendedTimber', nan): None, # Mapped suspended_floor_as_built + ('SuspendedTimber', 'AsBuilt'): None, # Mapped suspended_floor_as_built + ('SuspendedTimber', 'RetroFitted'): EpcFloorDescriptions.suspended_insulated, + ('SuspendedTimber', 'Unknown'): None, # Mapped suspended_floor_as_built + ('SuspendedNotTimber', 'RetroFitted'): EpcFloorDescriptions.suspended_insulated, + ('SuspendedNotTimber', nan): None, # Mapped suspended_floor_as_built + ('SuspendedNotTimber', 'Unknown'): None, # Mapped suspended_floor_as_built + ('SuspendedNotTimber', 'AsBuilt'): None, # Mapped suspended_floor_as_built + + # Unknown type - mapped on age + ('Unknown', 'Unknown'): None, # Mapped unknown_floor_as_built + ('Unknown', 'RetroFitted'): None, # Mapped unknown_floor_retrofitted + (nan, nan): None, # No actual information! + ('Unknown', 'AsBuilt'): None, # Mapped unknown_floor_as_built +} diff --git a/backend/onboarders/mappings/parity/glazing.py b/backend/onboarders/mappings/parity/glazing.py new file mode 100644 index 00000000..46c006bd --- /dev/null +++ b/backend/onboarders/mappings/parity/glazing.py @@ -0,0 +1,20 @@ +from datatypes.epc.efficiency import EpcEfficiency + +glazing_map = { + # (description, energy efficiency, multi_glaze_proportion, glazed_type, glazed_area + # For SAP 10 assessments, The glazed type and glazed area are not populated in the EPC API data any more + "Double 2002 or later": ("Fully double glazed", EpcEfficiency.AVERAGE, 1, None, None), + "Double before 2002": ("Fully double glazed", EpcEfficiency.POOR, 1, None, None), + "Double but age unknown": ("Fully double glazed", EpcEfficiency.POOR, 1, None, None), + "Single": ("Single glazed", EpcEfficiency.VERY_POOR, 0, None, None), + # For triple glazing, with age unknown, the performance is only average, whereas if it's a post 2022 + # installation, it's classed as high performance glazing with good efficiency. We'll need to be considerate as to + # how we make updates to the windows data. + # Triple known data is high performance glazing with Good efficiency (at least) + "Triple": ("Fully triple glazed", EpcEfficiency.AVERAGE, 1, None, None), + # This is also classed as high performance glazing + "DoubleKnownData": ("High performance glazing", EpcEfficiency.GOOD, 1, None, None), + # Under SAP 10, secondary glazing is classed as poor efficiency (whereas under SAP 2012 it was generally good) + "Secondary": ("Full secondary glazing", EpcEfficiency.POOR, 1, None, None), + "TripleKnownData": ("High performance glazing", EpcEfficiency.GOOD, 1, None, None), +} diff --git a/backend/onboarders/mappings/parity/heating.py b/backend/onboarders/mappings/parity/heating.py new file mode 100644 index 00000000..aa74834b --- /dev/null +++ b/backend/onboarders/mappings/parity/heating.py @@ -0,0 +1,330 @@ +from datatypes.epc.main_heating import EpcHeatingSystems +from datatypes.epc.efficiency import EpcEfficiency +from datatypes.epc.fuel import EpcFuel +from datatypes.epc.heating_controls import EpcHeatingControls +from datatypes.epc.hotwater import EpcHotWaterSystems + +heating_map = { + # 0 + ('Boilers', 'A', 'ElectricityNotCommunity', 'Optimal'): ( + EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 1 + ('Boilers', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community, + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 2 + ('Boilers', 'A', 'ElectricityNotCommunity', 'Top Spec'): ( + EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community, + EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 3 + ('Boilers', 'A', 'LPGNotCommunity', 'Optimal'): ( + EpcHeatingSystems.boiler_and_radiators_lpg, EpcEfficiency.POOR, EpcFuel.lpg_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 4 + ('Boilers', 'A', 'MainsGasNotCommunity', 'Optimal'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.VERY_GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 5 + ('Boilers', 'A', 'MainsGasNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.VERY_GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 6 + ('Boilers', 'A', 'MainsGasNotCommunity', 'Top Spec'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.VERY_GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 7 + ('Boilers', 'B', 'MainsGasNotCommunity', 'Optimal'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 8 + ('Boilers', 'B', 'MainsGasNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 9 + ('Boilers', 'B', 'MainsGasNotCommunity', 'Top Spec'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 10 + ('Boilers', 'C', 'ElectricityNotCommunity', 'Optimal'): ( + EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 11 + ('Boilers', 'C', 'ElectricityNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community, + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 12 + ('Boilers', 'C', 'ElectricityNotCommunity', 'Top Spec'): ( + EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community, + EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 13 + ('Boilers', 'C', 'LPGNotCommunity', 'Optimal'): ( + EpcHeatingSystems.boiler_and_radiators_lpg, EpcEfficiency.POOR, EpcFuel.lpg_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 14 + ('Boilers', 'C', 'LPGNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.boiler_and_radiators_lpg, EpcEfficiency.POOR, EpcFuel.lpg_not_community, + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 15 + ('Boilers', 'C', 'MainsGasNotCommunity', 'Optimal'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 16 + ('Boilers', 'C', 'MainsGasNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 17 + ('Boilers', 'C', 'MainsGasNotCommunity', 'Top Spec'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + ('Boilers', 'C', 'OilNotCommunity', 'Optimal'): ( + EpcHeatingSystems.boiler_radiators_oil, EpcEfficiency.AVERAGE, EpcFuel.oil_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 19 + ('Boilers', 'C', 'OilNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.boiler_radiators_oil, EpcEfficiency.AVERAGE, EpcFuel.oil_not_community, + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 20 + ('Boilers', 'C', 'OilNotCommunity', 'Top Spec'): ( + EpcHeatingSystems.boiler_radiators_oil, EpcEfficiency.AVERAGE, EpcFuel.oil_not_community, + EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 21 + ('Boilers', 'D', 'MainsGasNotCommunity', 'Optimal'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 22 + ('Boilers', 'D', 'MainsGasNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 23 + ('Boilers', 'D', 'MainsGasNotCommunity', 'Top Spec'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 24 + ('Boilers', 'E', 'ElectricityNotCommunity', 'Optimal'): ( + EpcHeatingSystems.boiler_and_radiators_electric, EpcEfficiency.VERY_POOR, EpcFuel.electricity_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 25 + ('Boilers', 'E', 'MainsGasNotCommunity', 'Optimal'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 26 + ('Boilers', 'E', 'MainsGasNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 27 + ('Boilers', 'E', 'MainsGasNotCommunity', 'Top Spec'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 28 + ('Boilers', 'E', 'OilNotCommunity', 'Optimal'): ( + EpcHeatingSystems.boiler_radiators_oil, EpcEfficiency.AVERAGE, EpcFuel.oil_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 29 + ('Boilers', 'E', 'OilNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.boiler_radiators_oil, EpcEfficiency.AVERAGE, EpcFuel.oil_not_community, + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 30 + ('Boilers', 'F', 'MainsGasNotCommunity', 'Optimal'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 31 + ('Boilers', 'F', 'MainsGasNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 32 + ('Boilers', 'F', 'MainsGasNotCommunity', 'Top Spec'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 33 + ('Boilers', 'G', 'MainsGasNotCommunity', 'Optimal'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 34 + ('Boilers', 'G', 'MainsGasNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 35 + ('Boilers', 'G', 'MainsGasNotCommunity', 'Top Spec'): ( + EpcHeatingSystems.boiler_radiators_mains_gas, EpcEfficiency.AVERAGE, EpcFuel.mains_gas_not_community, + EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 36 + ('Electric underfloor', 'A', 'ElectricityNotCommunity', 'Optimal'): ( + EpcHeatingSystems.electric_underfloor_heating, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE + ), + # 37 + ('Electric underfloor', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.electric_underfloor_heating, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community, + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE + ), + # 38 + ('Electric underfloor', 'A', 'ElectricityNotCommunity', 'Top Spec'): ( + EpcHeatingSystems.electric_underfloor_heating, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community, + EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD, + EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE + ), + # 39 + ('Heat pumps (warm air)', 'A', 'ElectricityNotCommunity', 'Optimal'): ( + EpcHeatingSystems.air_to_air_ashp, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 40 + ('Heat pumps (warm air)', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.air_to_air_ashp, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community, + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 41 + ('Heat pumps (wet)', 'A', 'ElectricityNotCommunity', 'Optimal'): ( + EpcHeatingSystems.ashp_radiators_electric, EpcEfficiency.GOOD, EpcFuel.electricity_not_community, + EpcHeatingControls.programmer_room_thermostat_trvs, EpcEfficiency.GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 42 + ('Heat pumps (wet)', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.ashp_radiators_electric, EpcEfficiency.GOOD, EpcFuel.electricity_not_community, + EpcHeatingControls.programmers_trvs_bypass, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 43 + ('Heat pumps (wet)', 'A', 'ElectricityNotCommunity', 'Top Spec'): ( + EpcHeatingSystems.ashp_radiators_electric, EpcEfficiency.GOOD, EpcFuel.electricity_not_community, + EpcHeatingControls.time_and_temperature_zone_control, EpcEfficiency.VERY_GOOD, + EpcHotWaterSystems.from_main_system, EpcEfficiency.AVERAGE + ), + # 44 + ('Room heaters', 'A', 'ElectricityNotCommunity', 'Optimal'): ( + EpcHeatingSystems.room_heaters_electric, EpcEfficiency.POOR, EpcFuel.electricity_not_community, + EpcHeatingControls.programmer_and_appliance_thermostats, EpcEfficiency.GOOD, + EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE + ), + # 45 + ('Room heaters', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.room_heaters_electric, EpcEfficiency.POOR, EpcFuel.electricity_not_community, + EpcHeatingControls.appliance_thermostats, EpcEfficiency.GOOD, + EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE + ), + # 46 + ('Room heaters', 'C', 'MainsGasNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.room_heaters_mains_gas, EpcEfficiency.AVERAGE, EpcFuel.mains_gas_not_community, + EpcHeatingControls.appliance_thermostats, EpcEfficiency.GOOD, + EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE + ), + # 47 - water done from here + ('Room heaters', 'F', 'MainsGasNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.room_heaters_mains_gas, EpcEfficiency.POOR, EpcFuel.mains_gas_not_community, + EpcHeatingControls.appliance_thermostats, EpcEfficiency.GOOD, + EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE + ), + ('Room heaters', 'G', 'MainsGasNotCommunity', 'Optimal'): ( + EpcHeatingSystems.room_heaters_mains_gas, EpcEfficiency.POOR, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmer_and_appliance_thermostats, EpcEfficiency.GOOD, + EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE + ), + ('Room heaters', 'G', 'MainsGasNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.room_heaters_mains_gas, EpcEfficiency.POOR, EpcFuel.mains_gas_not_community, + EpcHeatingControls.appliance_thermostats, EpcEfficiency.GOOD, + EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE + ), + ('Room heaters', 'G', 'SmokelessCoal', 'Sub Optimal'): ( + EpcHeatingSystems.room_heaters_smokeless_fuel, EpcEfficiency.VERY_POOR, EpcFuel.smokeless_coal, + EpcHeatingControls.appliance_thermostats, EpcEfficiency.GOOD, + EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE + ), + ('Storage heaters', 'A', 'ElectricityNotCommunity', 'Optimal'): ( + EpcHeatingSystems.electric_storage_heaters, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community, + EpcHeatingControls.automatic_charge_control, EpcEfficiency.AVERAGE, + EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE + ), + ('Storage heaters', 'A', 'ElectricityNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.electric_storage_heaters, EpcEfficiency.AVERAGE, EpcFuel.electricity_not_community, + EpcHeatingControls.manual_charge_control, EpcEfficiency.POOR, + EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE + ), + ('Warm Air (not heat pump)', 'G', 'ElectricityNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.warm_air_electricaire, EpcEfficiency.GOOD, EpcFuel.electricity_not_community, + EpcHeatingControls.programmer_and_atleast_two_room_thermostats, EpcEfficiency.GOOD, + EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE + ), + ('Warm Air (not heat pump)', 'G', 'MainsGasNotCommunity', 'Sub Optimal'): ( + EpcHeatingSystems.warm_air_mains_gas, EpcEfficiency.GOOD, EpcFuel.mains_gas_not_community, + EpcHeatingControls.programmer_and_atleast_two_room_thermostats, EpcEfficiency.GOOD, + EpcHotWaterSystems.electric_immersion_off_peak, EpcEfficiency.AVERAGE + ) +} diff --git a/backend/onboarders/mappings/parity/property_type.py b/backend/onboarders/mappings/parity/property_type.py new file mode 100644 index 00000000..f91c0c88 --- /dev/null +++ b/backend/onboarders/mappings/parity/property_type.py @@ -0,0 +1,8 @@ +from datatypes.epc.property_type_built_form import PropertyType + +parity_map = { + "Flat": PropertyType.flat, + "Maisonette": PropertyType.maisonette, + "Bungalow": PropertyType.bungalow, + "House": PropertyType.house, +} diff --git a/backend/onboarders/mappings/parity/roof.py b/backend/onboarders/mappings/parity/roof.py new file mode 100644 index 00000000..02518c3e --- /dev/null +++ b/backend/onboarders/mappings/parity/roof.py @@ -0,0 +1,461 @@ +import pandas as pd +from numpy import nan +from typing import Union, Callable +from collections.abc import Mapping +from datatypes.epc.roof import EpcRoofDescriptions +from datatypes.epc.efficiency import EpcEfficiency +from datatypes.epc.construction_age_band import EpcConstructionAgeBand + +roof_map = { + # Dwelling above + ('AnotherDwellingAbove', 'Another Dwelling Above'): EpcRoofDescriptions.another_dwelling_above, + ('SameDwellingAbove', 'Same Dwelling Above'): EpcRoofDescriptions.another_dwelling_above, + # Pitched, normal loft access, with a loft thickness + ('PitchedNormalLoftAccess', 'mm25'): EpcRoofDescriptions.loft_25mm_insulation, + ('PitchedNormalLoftAccess', 'mm50'): EpcRoofDescriptions.loft_50mm_insulation, + ('PitchedNormalLoftAccess', 'mm75'): EpcRoofDescriptions.loft_75mm_insulation, + ('PitchedNormalLoftAccess', 'mm100'): EpcRoofDescriptions.loft_100mm_insulation, + ('PitchedNormalLoftAccess', 'mm150'): EpcRoofDescriptions.loft_150mm_insulation, + ('PitchedNormalLoftAccess', 'mm200'): EpcRoofDescriptions.loft_200mm_insulation, + ('PitchedNormalLoftAccess', 'mm250'): EpcRoofDescriptions.loft_250mm_insulation, + ('PitchedNormalLoftAccess', 'mm270'): EpcRoofDescriptions.loft_270mm_insulation, + ('PitchedNormalLoftAccess', 'mm300'): EpcRoofDescriptions.loft_300mm_insulation, + ('PitchedNormalLoftAccess', 'mm350'): EpcRoofDescriptions.loft_350mm_insulation, + ('PitchedNormalLoftAccess', 'mm400'): EpcRoofDescriptions.loft_400mm_plus_insulation, + + # Pitched, no loft access, with a loft thickness + ('PitchedNormalNoLoftAccess', 'mm25'): EpcRoofDescriptions.loft_25mm_insulation, + ('PitchedNormalNoLoftAccess', 'mm50'): EpcRoofDescriptions.loft_50mm_insulation, + ('PitchedNormalNoLoftAccess', 'mm75'): EpcRoofDescriptions.loft_75mm_insulation, + ('PitchedNormalNoLoftAccess', 'mm100'): EpcRoofDescriptions.loft_100mm_insulation, + ('PitchedNormalNoLoftAccess', 'mm150'): EpcRoofDescriptions.loft_150mm_insulation, + ('PitchedNormalNoLoftAccess', 'mm200'): EpcRoofDescriptions.loft_200mm_insulation, + ('PitchedNormalNoLoftAccess', 'mm250'): EpcRoofDescriptions.loft_250mm_insulation, + ('PitchedNormalNoLoftAccess', 'mm270'): EpcRoofDescriptions.loft_270mm_insulation, + ('PitchedNormalNoLoftAccess', 'mm300'): EpcRoofDescriptions.loft_300mm_insulation, + ('PitchedNormalNoLoftAccess', 'mm350'): EpcRoofDescriptions.loft_350mm_insulation, + ('PitchedNormalNoLoftAccess', 'mm400'): EpcRoofDescriptions.loft_400mm_plus_insulation, + + # All pitched options with asbuilt or unknown got to EpcRoofDescriptions.pitched_insulated_assumed + # With access + ('PitchedNormalLoftAccess', nan): EpcRoofDescriptions.pitched_insulated_assumed, + ('PitchedNormalLoftAccess', 'AsBuilt'): EpcRoofDescriptions.pitched_insulated_assumed, + ('PitchedNormalLoftAccess', 'Unknown'): EpcRoofDescriptions.pitched_insulated_assumed, + # No access + ('PitchedNormalNoLoftAccess', nan): EpcRoofDescriptions.pitched_insulated_assumed, + ('PitchedNormalNoLoftAccess', 'AsBuilt'): EpcRoofDescriptions.pitched_insulated_assumed, + ('PitchedNormalNoLoftAccess', 'Unknown'): EpcRoofDescriptions.pitched_insulated_assumed, + + # Flat + ('Flat', 'NoInsulation'): EpcRoofDescriptions.flat_no_insulation, + # Flat - limited insulation + ('Flat', '12mm'): EpcRoofDescriptions.flat_limited_insulation, + ('Flat', 'mm25'): EpcRoofDescriptions.flat_limited_insulation, + ('Flat', 'mm50'): EpcRoofDescriptions.flat_limited_insulation, + # Flat insulated + ('Flat', 'mm75'): EpcRoofDescriptions.flat_insulated, + ('Flat', 'mm100'): EpcRoofDescriptions.flat_insulated, + ('Flat', 'mm150'): EpcRoofDescriptions.flat_insulated, + ('Flat', 'mm200'): EpcRoofDescriptions.flat_insulated, + ('Flat', 'mm250'): EpcRoofDescriptions.flat_insulated, + ('Flat', 'mm300'): EpcRoofDescriptions.flat_insulated, + ('Flat', 'mm350'): EpcRoofDescriptions.flat_insulated, + ('Flat', 'mm400'): EpcRoofDescriptions.flat_insulated, + # Flat - as built or unknown + ('Flat', 'AsBuilt'): None, # To be classified + ('Flat', nan): None, # To be classified + ('Flat', 'Unknown'): None, # To be classified + + # 12mm = very poor & has limited insulation description + # 25, 50 = poor & has limited insulation description + # 75, 100, 125mm = average (Flat, insulated) + # 150, 175, 200, 225, 250mm = good (Flat, insulated) + # 270mm+ = very good (Flat, insulated) + + # Thatched + ('PitchedThatched', 'mm50'): EpcRoofDescriptions.thatched_with_additional_insulation, + ('PitchedThatched', 'mm150'): EpcRoofDescriptions.thatched_with_additional_insulation, + ('PitchedThatched', 'mm300'): EpcRoofDescriptions.thatched_with_additional_insulation, + ('PitchedThatched', 'Unknown'): EpcRoofDescriptions.thatched, # efficiency classified based on age + + # Sloping: + # Limited (12 very poor, 25-50 poor) + ('PitchedWithSlopingCeiling', 'mm12'): EpcRoofDescriptions.sloping_pitched_limited_insulation, + ('PitchedWithSlopingCeiling', 'mm25'): EpcRoofDescriptions.sloping_pitched_limited_insulation, + ('PitchedWithSlopingCeiling', 'mm50'): EpcRoofDescriptions.sloping_pitched_limited_insulation, + # Insulated 75mm+ (75 - 125 average, 150 - 250 good, 270+ very good) + ('PitchedWithSlopingCeiling', 'mm75'): EpcRoofDescriptions.sloping_pitched_insulated, + ('PitchedWithSlopingCeiling', 'mm100'): EpcRoofDescriptions.sloping_pitched_insulated, + ('PitchedWithSlopingCeiling', 'mm150'): EpcRoofDescriptions.sloping_pitched_insulated, + ('PitchedWithSlopingCeiling', 'mm200'): EpcRoofDescriptions.sloping_pitched_insulated, + ('PitchedWithSlopingCeiling', 'mm250'): EpcRoofDescriptions.sloping_pitched_insulated, + ('PitchedWithSlopingCeiling', 'mm270'): EpcRoofDescriptions.sloping_pitched_insulated, + ('PitchedWithSlopingCeiling', 'mm300'): EpcRoofDescriptions.sloping_pitched_insulated, + ('PitchedWithSlopingCeiling', 'mm350'): EpcRoofDescriptions.sloping_pitched_insulated, + ('PitchedWithSlopingCeiling', 'mm400'): EpcRoofDescriptions.sloping_pitched_insulated, + # As built/unknown + ('PitchedWithSlopingCeiling', 'AsBuilt'): None, # To be classified + ('PitchedWithSlopingCeiling', nan): None, # To be classified + ('PitchedWithSlopingCeiling', 'Unknown'): None, # +} + +roof_unknown_age_fallback = { + "Flat": EpcRoofDescriptions.flat_as_built_unknown, + "PitchedWithSlopingCeiling": EpcRoofDescriptions.sloping_pitched_as_built_unknown, + "PitchedThatched": EpcRoofDescriptions.thatched_as_built_unknown, + "PitchedNormalLoftAccess": EpcRoofDescriptions.loft_as_built_unknown, + "PitchedNormalNoLoftAccess": EpcRoofDescriptions.loft_as_built_unknown, +} + +RoofEfficiencyRule = Union[ + EpcEfficiency, + Callable[[EpcConstructionAgeBand, int | None], EpcEfficiency], +] + + +def flat_insulated_efficiency_age_band(age_band: EpcConstructionAgeBand) -> EpcEfficiency: + """ + before 1900, 1900-1929, 1930-1949, 1950-1966, 1967-1975 -> Pitched, no insulation, Very Poor + 1976-1982 -> Pitched, limited insulation, Poor + 1983-1990, to 1996-2002 Pitched, insulated, Average + 2003 - 2006, 2012-2022 -> Pitched, insulated, Good + 2023 onwards -> Pitched, insulated, Very Good + :param age_band: EpcConstructionAgeBand + :return: EpcEfficiency + """ + + start_year = age_band.start_year() + if start_year >= 2023: + return EpcEfficiency.VERY_GOOD + + if start_year >= 2003: + return EpcEfficiency.GOOD + + if start_year >= 1983: + return EpcEfficiency.AVERAGE + + if start_year >= 1976: + return EpcEfficiency.POOR + + return EpcEfficiency.VERY_POOR + + +def flat_insulated_efficiency_thickness(insulation_thickness: int | None) -> EpcEfficiency: + """ + 12mm -> Very Poor + 25mm - 50mm -> Poor + 75mm - 125mm -> Pitched, insulated, average + 150mm - 250mm -> good + 270mm+ -> very good + :param insulation_thickness: Insulation thickness in mm + :return: EpcEfficiency + """ + + if insulation_thickness is None: + raise ValueError("Insulation thickness is required for flat insulated efficiency calculation") + + if insulation_thickness >= 270: + return EpcEfficiency.VERY_GOOD + + if 150 <= insulation_thickness <= 250: + return EpcEfficiency.GOOD + + if 75 <= insulation_thickness <= 125: + return EpcEfficiency.AVERAGE + + if 25 <= insulation_thickness <= 50: + return EpcEfficiency.POOR + + return EpcEfficiency.VERY_POOR + + +def flat_efficiency(insulation_thickness: int | None, age_band: EpcConstructionAgeBand) -> EpcEfficiency: + """ + Combines both age band and insulation thickness to determine flat roof efficiency. + :param insulation_thickness: Insulation thickness in mm + :param age_band: EpcConstructionAgeBand + :return: EpcEfficiency + """ + if insulation_thickness is not None: + return flat_insulated_efficiency_thickness(insulation_thickness) + + return flat_insulated_efficiency_age_band(age_band) + + +def loft_insulated_efficiency(age_band: EpcConstructionAgeBand) -> EpcEfficiency: + """ + 2023 onwards -> Very Good + 2012-2022 -> Very Good + 2007-2011 -> Very Good + 2003-2006 -> Very Good + 1996-2002 -> Good + 1991-1995 -> Good + 1983-1990 -> Average + 1976-1982 -> Average + 1967-1975 -> Average + 1950-1966 -> Average + 1930-1949 -> Average + 1900-1929 -> Average + before 1900 -> Average + :param age_band: Input age band, EpcConstructionAgeBand + :return: EpcEfficiency + """ + year = age_band.start_year() + if year >= 2003: + return EpcEfficiency.VERY_GOOD + if year >= 1991: + return EpcEfficiency.GOOD + + return EpcEfficiency.AVERAGE + + +def thatched_efficiency_age_band(age_band: EpcConstructionAgeBand) -> EpcEfficiency: + """ + Maps thatched roof efficiency based on construction age band. + :param age_band: EpcConstructionAgeBand + :return: EpcEfficiency + """ + year = age_band.start_year() + if year >= 2023: + return EpcEfficiency.VERY_GOOD + if year >= 2003: + return EpcEfficiency.GOOD + + return EpcEfficiency.AVERAGE + + +def thatched_efficiency_thickness(insulation_thickness: int | None) -> EpcEfficiency: + """ + Maps thatched roof efficiency based on insulation thickness. + :param insulation_thickness: Insulation thickness in mm + :return: EpcEfficiency + """ + if insulation_thickness is None: + raise ValueError("Insulation thickness is required for thatched efficiency calculation") + + if insulation_thickness >= 175: + return EpcEfficiency.VERY_GOOD + + if insulation_thickness >= 25: + return EpcEfficiency.GOOD + + return EpcEfficiency.AVERAGE + + +def thatched_efficiency( + insulation_thickness: int | None, + age_band: EpcConstructionAgeBand, +) -> EpcEfficiency: + """ + Combines both age band and insulation thickness to determine thatched roof efficiency. + :param insulation_thickness: Insulation thickness in mm + :param age_band: EpcConstructionAgeBand + :return: EpcEfficiency + """ + if insulation_thickness is not None: + return thatched_efficiency_thickness(insulation_thickness) + + return thatched_efficiency_age_band(age_band) + + +def sloping_ceiling_efficiency_age_band(age_band: EpcConstructionAgeBand) -> EpcEfficiency: + """ + Maps sloping ceiling roof efficiency based on construction age band. + :param age_band: EpcConstructionAgeBand + :return: EpcEfficiency + """ + year = age_band.start_year() + if year >= 2023: + return EpcEfficiency.VERY_GOOD + if year >= 2003: + return EpcEfficiency.GOOD + if year >= 1983: + return EpcEfficiency.AVERAGE + if year >= 1976: + return EpcEfficiency.POOR + + return EpcEfficiency.VERY_POOR + + +def sloping_ceiling_efficiency_thickness(insulation_thickness: int | None) -> EpcEfficiency: + """ + Maps sloping ceiling roof efficiency based on insulation thickness. + :param insulation_thickness: Insulation thickness in mm + :return: EpcEfficiency + """ + if insulation_thickness is None: + raise ValueError("Insulation thickness is required for sloping ceiling efficiency calculation") + + if insulation_thickness >= 270: + return EpcEfficiency.VERY_GOOD + + if insulation_thickness >= 150: + return EpcEfficiency.GOOD + + if insulation_thickness >= 75: + return EpcEfficiency.AVERAGE + + if insulation_thickness >= 25: + return EpcEfficiency.POOR + + return EpcEfficiency.VERY_POOR + + +def sloping_ceiling_efficiency( + insulation_thickness: int | None, + age_band: EpcConstructionAgeBand, +) -> EpcEfficiency: + """ + Combines both age band and insulation thickness to determine sloping ceiling roof efficiency. + :param insulation_thickness: Insulation thickness in mm + :param age_band: EpcConstructionAgeBand + :return: EpcEfficiency + """ + if insulation_thickness is not None: + return sloping_ceiling_efficiency_thickness(insulation_thickness) + + return sloping_ceiling_efficiency_age_band(age_band) + + +def loft_insulated_at_rafters_efficiency_thickness(insulation_thickness: int | None) -> EpcEfficiency: + """ + 400mm, 350mm = very good + 200-300mm = good + 125-175 = average + 50-100 = poor + 25 and below= very poor + :return: + """ + if insulation_thickness is None: + raise ValueError("Insulation thickness is required for loft insulated at rafters efficiency calculation") + + if insulation_thickness >= 350: + return EpcEfficiency.VERY_GOOD + + if insulation_thickness >= 200: + return EpcEfficiency.GOOD + + if insulation_thickness >= 125: + return EpcEfficiency.AVERAGE + + if insulation_thickness >= 50: + return EpcEfficiency.POOR + + return EpcEfficiency.VERY_POOR + + +def loft_insulated_at_rafters_efficiency_age_band(age_band: EpcConstructionAgeBand) -> EpcEfficiency: + """ + # 2023 onwards -> Very Good + # 2003-2006, 2012-2022 -> Good + # 1983 - 1990, 1996-2002 -> Average + # 1976-1982 -> Poor + # 1967-1975 and earlier bands -> Very Poor + :param age_band: EpcConstructionAgeBand + :return: EpcEfficiency + """ + year = age_band.start_year() + if year >= 2023: + return EpcEfficiency.VERY_GOOD + if year >= 2003: + return EpcEfficiency.GOOD + if year >= 1983: + return EpcEfficiency.AVERAGE + if year >= 1976: + return EpcEfficiency.POOR + + return EpcEfficiency.VERY_POOR + + +def loft_insulated_at_rafters_efficiency( + insulation_thickness: int | None, + age_band: EpcConstructionAgeBand, +) -> EpcEfficiency: + """ + Combines both age band and insulation thickness to determine loft insulated at rafters roof efficiency. + :param insulation_thickness: Insulation thickness in mm + :param age_band: EpcConstructionAgeBand + :return: EpcEfficiency + """ + if insulation_thickness is not None: + return loft_insulated_at_rafters_efficiency_thickness(insulation_thickness) + + return loft_insulated_at_rafters_efficiency_age_band(age_band) + + +ROOF_DESCRIPTION_EFFICIENCIES: Mapping[EpcRoofDescriptions, RoofEfficiencyRule] = { + # Flat roof + EpcRoofDescriptions.flat_no_insulation: EpcEfficiency.VERY_POOR, + EpcRoofDescriptions.flat_limited_insulation: flat_efficiency, + EpcRoofDescriptions.flat_insulated: flat_efficiency, + + # Loft: + # value mappings + EpcRoofDescriptions.loft_12mm_insulation: EpcEfficiency.VERY_POOR, + EpcRoofDescriptions.loft_25mm_insulation: EpcEfficiency.POOR, + EpcRoofDescriptions.loft_50mm_insulation: EpcEfficiency.POOR, + EpcRoofDescriptions.loft_75mm_insulation: EpcEfficiency.AVERAGE, + EpcRoofDescriptions.loft_100mm_insulation: EpcEfficiency.AVERAGE, + EpcRoofDescriptions.loft_125mm_insulation: EpcEfficiency.AVERAGE, + EpcRoofDescriptions.loft_150mm_insulation: EpcEfficiency.GOOD, + EpcRoofDescriptions.loft_175mm_insulation: EpcEfficiency.GOOD, + EpcRoofDescriptions.loft_200mm_insulation: EpcEfficiency.GOOD, + EpcRoofDescriptions.loft_250mm_insulation: EpcEfficiency.GOOD, + EpcRoofDescriptions.loft_270mm_insulation: EpcEfficiency.VERY_GOOD, + EpcRoofDescriptions.loft_300mm_insulation: EpcEfficiency.VERY_GOOD, + EpcRoofDescriptions.loft_350mm_insulation: EpcEfficiency.VERY_GOOD, + EpcRoofDescriptions.loft_400mm_plus_insulation: EpcEfficiency.VERY_GOOD, + EpcRoofDescriptions.pitched_no_insulation: EpcEfficiency.VERY_POOR, + # function mappings + EpcRoofDescriptions.pitched_insulated_assumed: loft_insulated_efficiency, + + # Loft af rafters + EpcRoofDescriptions.loft_insulated_at_rafters: loft_insulated_at_rafters_efficiency, + + # Another dwelling above + EpcRoofDescriptions.another_dwelling_above: EpcEfficiency.NA, + + # Thatched + EpcRoofDescriptions.thatched: thatched_efficiency, + EpcRoofDescriptions.thatched_with_additional_insulation: thatched_efficiency, + + # Sloping ceiling + EpcRoofDescriptions.sloping_pitched_insulated: sloping_ceiling_efficiency, + EpcRoofDescriptions.sloping_pitched_limited_insulation: sloping_ceiling_efficiency, + EpcRoofDescriptions.sloping_pitched_no_insulation: EpcEfficiency.VERY_POOR, + +} + + +def resolve_roof_efficiency( + description: EpcRoofDescriptions, + age_band: EpcConstructionAgeBand | None, + insulation_thickness: int | None, +) -> EpcEfficiency: + """ + Resolve roof efficiency from description + age band + insulation thickness. + """ + + # Unknown / holding descriptions → efficiency unknown + if description in description.unknown_descriptions: + return EpcEfficiency.NA + + rule = ROOF_DESCRIPTION_EFFICIENCIES.get(description) + + if rule is None: + return EpcEfficiency.NA + + # Fixed efficiency + if isinstance(rule, EpcEfficiency): + return rule + + # Callable rule + if age_band is None or pd.isnull(age_band): + return EpcEfficiency.NA + + try: + # Try (thickness, age_band) + return rule(insulation_thickness, age_band) + except TypeError: + # Fallback to (age_band) + return rule(age_band) diff --git a/backend/onboarders/mappings/parity/walls.py b/backend/onboarders/mappings/parity/walls.py new file mode 100644 index 00000000..0ad6d6e1 --- /dev/null +++ b/backend/onboarders/mappings/parity/walls.py @@ -0,0 +1,211 @@ +from typing import Callable, Union +from collections.abc import Mapping +from datatypes.epc.walls import EpcWallDescriptions +from datatypes.epc.construction_age_band import EpcConstructionAgeBand +from datatypes.epc.efficiency import EpcEfficiency + +# Unique combinations +wall_map = { + # Cavity walls + ('Cavity', 'FilledCavity'): EpcWallDescriptions.cavity_filled_cavity, + ('Cavity', 'Internal'): EpcWallDescriptions.cavity_internal_insulation, + ('Cavity', 'External'): EpcWallDescriptions.cavity_external_insulation, + ('Cavity', 'FilledCavityPlusInternal'): EpcWallDescriptions.cavity_filled_plus_internal, + ('Cavity', 'FilledCavityPlusExternal'): EpcWallDescriptions.cavity_filled_plus_external, + ('Cavity', 'AsBuilt'): None, # To be classified + ('Cavity', 'Unknown'): None, # To be classified + + # System built walls + ('System', 'External'): EpcWallDescriptions.system_external_insulation, + ('System', 'Internal'): EpcWallDescriptions.system_internal_insulation, + ('System', 'AsBuilt'): None, # To be classified + ('System', 'Unknown'): None, + + # Timber Frame walls + ('Timber Frame', 'Internal'): EpcWallDescriptions.timber_frame_internal_insulation, + ('Timber Frame', 'External'): EpcWallDescriptions.timber_frame_external_insulation, + ('Timber Frame', 'AsBuilt'): None, # To be classified + ('Timber Frame', 'Unknown'): None, + + # Solid Brick walls + ('Solid Brick', 'External'): EpcWallDescriptions.solid_brick_external_insulation, + ('Solid Brick', 'Internal'): EpcWallDescriptions.solid_brick_internal_insulation, + ('Solid Brick', 'AsBuilt'): None, # To be classified + ('Solid Brick', 'Unknown'): None, + + # Granite walls + ('Granite', 'External'): EpcWallDescriptions.granite_whinstone_external_insulation, + ("Granite", 'Internal'): EpcWallDescriptions.granite_whinstone_internal_insulation, + ('Granite', 'AsBuilt'): None, + ('Granite', 'Unknown'): None, + + # Sandstone walls + ('Sandstone', 'Internal'): EpcWallDescriptions.sandstone_limestone_internal_insulation, + ('Sandstone', 'External'): EpcWallDescriptions.sandstone_limestone_external_insulation, + ('Sandstone', 'Unknown'): None, + ('Sandstone', 'AsBuilt'): None, + + # Cob walls + ('Cob', 'AsBuilt'): None, +} + +wall_unknown_age_fallback = { + "Cavity": EpcWallDescriptions.cavity_as_built_unknown, + "Solid Brick": EpcWallDescriptions.solid_brick_as_built_unknown, + "Timber Frame": EpcWallDescriptions.timber_frame_as_built_unknown, + "System": EpcWallDescriptions.system_as_built_unknown, + "Granite": EpcWallDescriptions.granite_as_built_unknown, + "Sandstone": EpcWallDescriptions.sandstone_as_built_unknown, + "Cob": EpcWallDescriptions.cob_as_built_unknown, +} + + +def cavity_filled_efficiency(age_band: EpcConstructionAgeBand) -> EpcEfficiency: + """" + Maps cavity filled to efficiency based on construction age band. + :param age_band: EpcConstructionAgeBand + :return: EpcEfficiency + """ + if age_band in { + EpcConstructionAgeBand.from_2023_onwards + }: + return EpcEfficiency.VERY_GOOD + + return EpcEfficiency.GOOD + + +def internal_external_insulation_efficiency( + age_band: EpcConstructionAgeBand, +) -> EpcEfficiency: + """ + Maps: + - cavity unfilled with internal/external insulation to efficiency based on construction age band. We assumed + based on 100mm insulation + - solid brick with internal/external insulation to efficiency based on construction age band. We assumed + based on 100mm insulation + - system built with internal/external insulation to efficiency based on construction age band. We assumed + based on 100mm insulation + + All of these wall types have the same behaviour in elmhurst + :param age_band: EpcConstructionAgeBand + :return: EpcEfficiency + """ + if age_band in { + EpcConstructionAgeBand.from_1983_to_1990, + EpcConstructionAgeBand.from_1991_to_1995, + EpcConstructionAgeBand.from_1996_to_2002, + EpcConstructionAgeBand.from_2003_to_2006, + EpcConstructionAgeBand.from_2007_to_2011, + EpcConstructionAgeBand.from_2012_to_2022, + EpcConstructionAgeBand.from_2023_onwards, + }: + return EpcEfficiency.VERY_GOOD + + return EpcEfficiency.GOOD + + +def timber_granite_sandstone_internal_external_efficiency(age_band: EpcConstructionAgeBand) -> EpcEfficiency: + """" + Maps: + - timber frame with internal/external wall insulation to efficiency based on construction age band. + - sandstone/limestone with internal/external wall insulation to efficiency based on construction age band. + - granite/whinstone with internal/external wall insulation to efficiency based on construction age band. + :param age_band: EpcConstructionAgeBand + :return: EpcEfficiency + """ + if age_band in { + EpcConstructionAgeBand.from_2023_onwards + }: + return EpcEfficiency.VERY_GOOD + + return EpcEfficiency.GOOD + + +WallEfficiencyRule = Union[ + EpcEfficiency, + Callable[[EpcConstructionAgeBand, int | None], EpcEfficiency], +] + +WALL_DESCRIPTION_EFFICIENCIES: Mapping[EpcWallDescriptions, WallEfficiencyRule] = { + # Note: all function mappings have been defined based on Elmhurst + # Cavity + # value mappings + EpcWallDescriptions.cavity_no_insulation_assumed: EpcEfficiency.POOR, + EpcWallDescriptions.cavity_partial_insulated_assumed: EpcEfficiency.AVERAGE, + EpcWallDescriptions.cavity_insulated_assumed: EpcEfficiency.GOOD, + EpcWallDescriptions.cavity_filled_plus_internal: EpcEfficiency.VERY_GOOD, + EpcWallDescriptions.cavity_filled_plus_external: EpcEfficiency.VERY_GOOD, + # function mappings + EpcWallDescriptions.cavity_filled_cavity: cavity_filled_efficiency, + EpcWallDescriptions.cavity_internal_insulation: internal_external_insulation_efficiency, + EpcWallDescriptions.cavity_external_insulation: internal_external_insulation_efficiency, + + # Solid brick + # value mappings + EpcWallDescriptions.solid_brick_no_insulation_assumed: EpcEfficiency.POOR, + EpcWallDescriptions.solid_brick_partial_insulated_assumed: EpcEfficiency.AVERAGE, + EpcWallDescriptions.solid_brick_insulated_assumed: EpcEfficiency.GOOD, + # function mappings + EpcWallDescriptions.solid_brick_internal_insulation: internal_external_insulation_efficiency, + EpcWallDescriptions.solid_brick_external_insulation: internal_external_insulation_efficiency, + + # System + # value mappings + EpcWallDescriptions.system_no_insulation_assumed: EpcEfficiency.POOR, + EpcWallDescriptions.system_partial_insulated_assumed: EpcEfficiency.AVERAGE, + EpcWallDescriptions.system_insulated_assumed: EpcEfficiency.GOOD, + # function mappings + EpcWallDescriptions.system_internal_insulation: internal_external_insulation_efficiency, + EpcWallDescriptions.system_external_insulation: internal_external_insulation_efficiency, + + # Timber frame + # value mappings + EpcWallDescriptions.timber_frame_no_insulation_assumed: EpcEfficiency.POOR, + EpcWallDescriptions.timber_frame_partial_insulated_assumed: EpcEfficiency.AVERAGE, + EpcWallDescriptions.timber_frame_insulated_assumed: EpcEfficiency.GOOD, + # function mappings + EpcWallDescriptions.timber_frame_internal_insulation: timber_granite_sandstone_internal_external_efficiency, + EpcWallDescriptions.timber_frame_external_insulation: timber_granite_sandstone_internal_external_efficiency, + + # Granite / whinstone + EpcWallDescriptions.granite_whinstone_no_insulation_assumed: EpcEfficiency.VERY_POOR, + EpcWallDescriptions.granite_whinstone_partial_insulated_assumed: EpcEfficiency.AVERAGE, + EpcWallDescriptions.granite_whinestone_insulated_assumed: EpcEfficiency.GOOD, + # function mappings + EpcWallDescriptions.granite_whinstone_internal_insulation: timber_granite_sandstone_internal_external_efficiency, + EpcWallDescriptions.granite_whinstone_external_insulation: timber_granite_sandstone_internal_external_efficiency, + + # Sandstone / limestone + EpcWallDescriptions.sandstone_limestone_no_insulation_assumed: EpcEfficiency.VERY_POOR, + EpcWallDescriptions.sandstone_limestone_partial_insulated_assumed: EpcEfficiency.AVERAGE, + EpcWallDescriptions.sandstone_limestone_insulated_assumed: EpcEfficiency.GOOD, + # function mappings + EpcWallDescriptions.sandstone_limestone_internal_insulation: timber_granite_sandstone_internal_external_efficiency, + EpcWallDescriptions.sandstone_limestone_external_insulation: timber_granite_sandstone_internal_external_efficiency, + + # Cob (special case) + EpcWallDescriptions.cob_as_built_average: EpcEfficiency.AVERAGE, + EpcWallDescriptions.cob_as_built_good: EpcEfficiency.GOOD, + + # Unknown mappings which are unhandled + EpcWallDescriptions.cavity_as_built_unknown: EpcEfficiency.NA, + EpcWallDescriptions.solid_brick_as_built_unknown: EpcEfficiency.NA, + EpcWallDescriptions.system_as_built_unknown: EpcEfficiency.NA, + EpcWallDescriptions.timber_frame_as_built_unknown: EpcEfficiency.NA, + EpcWallDescriptions.granite_as_built_unknown: EpcEfficiency.NA, + EpcWallDescriptions.sandstone_as_built_unknown: EpcEfficiency.NA, + EpcWallDescriptions.cob_as_built_unknown: EpcEfficiency.NA, + +} + + +def resolve_wall_efficiency( + description: EpcWallDescriptions, + age_band: EpcConstructionAgeBand, +) -> EpcEfficiency: + rule = WALL_DESCRIPTION_EFFICIENCIES[description] + + if isinstance(rule, EpcEfficiency): + return rule + + return rule(age_band) diff --git a/backend/onboarders/mappings/property_type.py b/backend/onboarders/mappings/property_type.py deleted file mode 100644 index 75deef04..00000000 --- a/backend/onboarders/mappings/property_type.py +++ /dev/null @@ -1,6 +0,0 @@ -parity_map = { - "Flat": "Flat", - "Maisonette": "Maisonette", - "Bungalow": "Bungalow", - "House": "House", -} diff --git a/backend/onboarders/mappings/walls.py b/backend/onboarders/mappings/walls.py deleted file mode 100644 index 9b70b49c..00000000 --- a/backend/onboarders/mappings/walls.py +++ /dev/null @@ -1,3 +0,0 @@ -parity_map = { - -} diff --git a/backend/onboarders/parity.py b/backend/onboarders/parity.py index 27244777..6c79d027 100644 --- a/backend/onboarders/parity.py +++ b/backend/onboarders/parity.py @@ -1,93 +1,371 @@ +import re +from tqdm import tqdm import pandas as pd -from etl.epc.DataProcessor import construction_age_bounds_map -from backend.onboarders.mappings.property_type import parity_map as property_map -from backend.onboarders.mappings.age_band import party_map as age_band_map -from backend.onboarders.mappings.built_form import parity_map as built_form_map - - -def check_nulls(data, original_column, mapped_column): - # We only allow nulls if the oroginal value was null - null_vals = data[pd.isnull(data[mapped_column])] - if null_vals.empty: - return True - # We make sure all original values were null - assert pd.isnull(null_vals[original_column]).all(), ( - f"Some values in {mapped_column} were not mapped, but original values were not null" - ) - - -# Sample input data - -data = pd.read_excel( - "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody " - "- Data Extracts for Domna.xlsx", - sheet_name="Sustainability" +from backend.onboarders.base import OnboarderBase +# Parity mappings +from backend.onboarders.mappings.parity.property_type import parity_map as property_map +from backend.onboarders.mappings.parity.age_band import parity_map as age_band_map +from backend.onboarders.mappings.parity.built_form import parity_map as built_form_map +from backend.onboarders.mappings.parity.walls import wall_map, wall_unknown_age_fallback, WALL_DESCRIPTION_EFFICIENCIES +from onboarders.mappings.parity.roof import roof_map, roof_unknown_age_fallback, resolve_roof_efficiency +from onboarders.mappings.parity.floor import floor_map +from onboarders.mappings.parity.heating import heating_map +from onboarders.mappings.parity.glazing import glazing_map +from backend.onboarders.mappings.parity.as_built_wall_classifiers import as_built_wall_classifiers +from backend.onboarders.mappings.parity.as_built_roof_classifiers import as_built_roof_classifiers +from backend.onboarders.mappings.parity.as_built_floor_classifiers import ( + as_built_floor_classifiers, unknown_as_built_floor_classifiers ) +from datatypes.epc.roof import EpcRoofDescriptions +from datatypes.epc.floor import EpcFloorDescriptions +from datatypes.epc.construction_age_band import EpcConstructionAgeBand +from datatypes.epc.walls import EpcWallDescriptions +from datatypes.epc.efficiency import EpcEfficiency -# We want to map the parity fields to standard EPC references. This will allow us to -# 1) Estimate EPCs, more accurately -# 2) Patch incorrect EPCs with ease -# 3) Indicate already installed measures - -# ------------ construction_age_band ------------ -# Map to EPC age bands -# def construction_date_to_band(year): -# if pd.isnull(year): -# return None -# # Get the year from the date which is numpy datetime format -# for label, ranges in construction_age_bounds_map.items(): -# if ranges["l"] <= year <= ranges["u"]: -# return label -# raise NotImplementedError("year out of bounds") -# -# -# data["construction_age_band"] = pd.to_datetime(data["Construction Date"]).dt.year.apply(construction_date_to_band) - -data["construction_age_band"] = data["Construction Years"].map(age_band_map) - -check_nulls(data, "Construction Years", "construction_age_band") - -# ------------ property_type ------------ -data["property_type"] = data["Type"].map(property_map) - -assert pd.isnull(data["property_type"]).sum() == 0, "Some property types were not mapped" - -# ------------ built_form ------------ -data["built_form"] = data["Attachment"].map(built_form_map) - -assert pd.isnull(data["built_form"]).sum() == 0, "Some built forms were not mapped" - -# ------------ Wall Construction ------------ - -data["walls_combined"] = data["Wall Construction"] + "+" + data["Wall Insulation"].fillna("Unknown Insulation") - -data["Wall Insulation"].value_counts() -data["Wall Construction"].value_counts() - -as_built_map = { - "Cavity": {"insulated_age_bands": [], "partial_insulated_age_bands": []}, - "Solid Brick": {"insulated_age_bands": [], "partial_insulated_age_bands": []}, - "System": {"insulated_age_bands": [], "partial_insulated_age_bands": []}, - "Timber Frame": {"insulated_age_bands": [], "partial_insulated_age_bands": []}, - "Sandstone": {"insulated_age_bands": [], "partial_insulated_age_bands": []}, - "Granite": {"insulated_age_bands": [], "partial_insulated_age_bands": []}, - "Cob": {"insulated_age_bands": [], "partial_insulated_age_bands": []}, -} +tqdm.pandas() -def map_wall_construction(wall_constuction, wall_insulation, construction_age_band): - if wall_insulation == "AsBuilt": - # Deduce based on wall construction and age band - bands = as_built_map.get(wall_constuction, None) - if bands is None: - raise NotImplementedError(f"Wall construction {wall_constuction} not in as built map") +class ParityOnboarder(OnboarderBase): - # We check if the age band is in insulated or partial insulated, and if neither, we assume uninsulated + def __init__( + self, + fileuri: str, + file_format: str, + **kwargs + ): + # Extract bucket, and filekey; Will be in the format s3://bucket/key + self.bucket_name = fileuri.split("/")[2] + self.input_file_name = "/".join(fileuri.split("/")[3:]) + # Also prepare output file name + self.output_file_name = self.input_file_name.replace("." + file_format, "") + "_transformed.csv" -# Variables we want to map -# 'Org Ref', 'Address 1', 'Address 2', 'Address 3', 'Postcode', 'Type', -# 'Attachment', 'Construction Years', 'Wall Construction', -# 'Wall Insulation', 'Roof Construction', 'Roof Insulation', -# 'Floor Construction', 'Floor Insulation', 'Glazing', 'Heating', -# 'Boiler Efficiency', 'Main Fuel', 'Controls Adequacy', 'UPRN', -# 'Total Floor Area (m2)' + self.read_s3(file_format=file_format, **kwargs) + pass + + def map_construction_age_band(self): + self.data[self.landlord_construction_age_band] = self.data["Construction Years"].map(age_band_map) + self.assert_nulls_only_from_source_nulls( + self.data, "Construction Years", self.landlord_construction_age_band + ) + + def map_property_type(self): + self.data[self.landlord_property_type] = self.data["Type"].map(property_map) + self.assert_no_nulls(self.data, self.landlord_property_type) + + def map_built_form(self): + self.data[self.landlord_built_form] = self.data["Attachment"].map(built_form_map) + self.assert_no_nulls(self.data, self.landlord_built_form) + + @staticmethod + def _fill_wall_as_built(row: pd.Series) -> EpcWallDescriptions | None: + """ + Utility function, used by map_wall_construction in parity transformation module + :param row: row of input sustainability data, being transformed + :return: EpcWallDescriptions, the as built wall description for the input row, based on the wall construction + type and age band + """ + # Already resolved via direct mapping + if row.landlord_wall_construction is not None: + return row.landlord_wall_construction + + wall_type = row["Wall Construction"] + + # Missing construction age → conservative fallback + if pd.isnull(row.landlord_construction_age_band): + return wall_unknown_age_fallback.get(wall_type) + + classifier = as_built_wall_classifiers.get(wall_type) + if classifier is None: + return None + + return classifier(row.landlord_construction_age_band) + + @staticmethod + def _resolve_wall_efficiency( + description: EpcWallDescriptions, + age_band: EpcConstructionAgeBand | None, + ) -> EpcEfficiency: + # Unknown / holding descriptions → efficiency unknown + if "unknown insulation" in description.value.lower(): + return EpcEfficiency.NA + + rule = WALL_DESCRIPTION_EFFICIENCIES.get(description) + + if rule is None: + return EpcEfficiency.NA + + if isinstance(rule, EpcEfficiency): + return rule + + # Rule needs age band but we don't have one + if age_band is None or pd.isnull(age_band): + return EpcEfficiency.NA + + return rule(age_band) + + def map_wall_construction(self): + self.data[self.landlord_wall_construction] = ( + self.data[["Wall Construction", "Wall Insulation"]] + .apply(tuple, axis=1) + .map(wall_map) + ) + + self.data[self.landlord_wall_construction] = self.data.progress_apply(self._fill_wall_as_built, axis=1) + + # Sanity check + self.assert_no_nulls(self.data, self.landlord_wall_construction) + + self.data[self.landlord_wall_efficiency] = self.data.progress_apply( + lambda row: self._resolve_wall_efficiency( + row.landlord_wall_construction, + row.landlord_construction_age_band, + ), + axis=1, + ) + # Additional santify check + self.assert_no_nulls(self.data, self.landlord_wall_efficiency) + + @staticmethod + def _fill_roof_as_built(row: pd.Series) -> EpcRoofDescriptions | None: + # Already resolved + if not pd.isnull(row.landlord_roof_construction): + return row.landlord_roof_construction + + roof_type = row["Roof Construction"] + + classifier = as_built_roof_classifiers.get(roof_type) + if classifier is None: + raise NotImplementedError(f"No roof classifier for roof type '{roof_type}'") + + if pd.isnull(row.landlord_construction_age_band): + return roof_unknown_age_fallback.get(roof_type) + + output = classifier(row.landlord_construction_age_band) + if output is None: + raise NotImplementedError( + f"Roof classification returned None for roof type '{roof_type}'" + ) + + return output + + @staticmethod + def _extract_insulation_thickness(value: str | None) -> int | None: + """ + Extract insulation thickness in mm from a string like 'mm150'. + Returns None if not present or not parseable. + """ + if value is None or pd.isnull(value): + return None + + match = re.search(r"(\d+)", str(value)) + if not match: + return None + + return int(match.group(1)) + + def map_roof_construction(self): + self.data[self.landlord_roof_construction] = ( + self.data[["Roof Construction", "Roof Insulation"]] + .progress_apply(tuple, axis=1) + .map(roof_map) + ) + + self.data[self.landlord_roof_construction] = self.data.progress_apply( + self._fill_roof_as_built, + axis=1, + ) + + # sanity check + self.assert_no_nulls(self.data, self.landlord_roof_construction) + + self.data["roof_insulation_thickness_mm"] = self.data["Roof Insulation"].apply( + self._extract_insulation_thickness + ) + + self.data[self.landlord_roof_efficiency] = self.data.progress_apply( + lambda row: resolve_roof_efficiency( + description=row.landlord_roof_construction, + age_band=row.landlord_construction_age_band, + insulation_thickness=row.roof_insulation_thickness_mm, + ), + axis=1, + ) + # sanity check + self.assert_no_nulls(self.data, self.landlord_roof_efficiency) + + # Flag sloping ceiling + self.data[self.landlord_has_sloping_ceiling] = self.data["Roof Construction"].apply( + lambda x: x == "PitchedWithSlopingCeiling" + ) + + @staticmethod + def _fill_floor_as_built(row: pd.Series): + # 1. Already resolved + if row.landlord_floor_construction is not None: + return row.landlord_floor_construction + + age_band = row.landlord_construction_age_band + floor_type = row["Floor Construction"] + insulation = row["Floor Insulation"] + + # 2. Missing age band → conservative fallback + if pd.isnull(age_band): + return EpcFloorDescriptions.unknown + + # 3. Known floor types + if floor_type in ["Solid", "SuspendedTimber", "SuspendedNotTimber"]: + classifier = as_built_floor_classifiers[floor_type] + return classifier(age_band) + + # 4. Unknown floor type + if floor_type == "Unknown": + classifier = unknown_as_built_floor_classifiers[insulation] + return classifier(age_band) + + # 5. Truly missing / garbage input + return EpcFloorDescriptions.unknown + + def map_floor_construction(self): + self.data[self.landlord_floor_construction] = ( + self.data[["Floor Construction", "Floor Insulation"]] + .progress_apply(tuple, axis=1) + .map(floor_map) + ) + + self.data[self.landlord_floor_construction] = self.data.progress_apply( + self._fill_floor_as_built, + axis=1, + ) + + self.assert_no_nulls(self.data, self.landlord_floor_construction) + + def map_glazing(self): + # TODO: probably doesn't make sense to store multi glazed proportion, glazed type or glazed area. + # There is maybe an argument for landlord_multi_glaze_proportion as this could be variable, + # however + self.data[ + [ + self.landlord_windows_type, + self.landlord_windows_efficiency, + self.landlord_multi_glaze_proportion, + self.landlord_glazed_type, + self.landlord_glazed_area + ] + ] = self.data["Glazing"].map(glazing_map).progress_apply(pd.Series) + + def map_heating(self): + # TODO - when mapping heating controls, we should check the existing heating controls and the efficiency rating + # For sub optimal heating controls, we're going to make an assumption as to what the heating controls are + # and the energy efficiency rating we prescribe here may not be accurate. We therefore use this as an + # upper limit + # as opposed to a guaranteed efficiency rating. To stress, this is only relevant for sub optimal heating + # controls. E.g. it may be programmer and room thermostat + self.data[ + [ + self.landlord_heating_construction, + self.landlord_heating_efficiency, + self.landlord_fuel_type, + self.landlord_heating_controls, + self.landlord_heating_controls_efficiency, + self.landlord_hot_water_system, + self.landlord_hot_water_efficiency + ] + ] = self.data[ + [ + "Heating", + "Boiler Efficiency", + "Main Fuel", + "Controls Adequacy" + ] + ].progress_apply(tuple, axis=1).map(heating_map).progress_apply(pd.Series) + + def map_floor_area(self): + # This is just a rename + self.data = self.data.rename( + columns={"Total Floor Area (m2)": self.landlord_total_floor_area_m2} + ) + + def select_columns(self): + self.data = self.data[ + [ + "Org Ref", + "UPRN", + "Address 1", + "Address 2", + "Address 3", + "Postcode", + self.landlord_total_floor_area_m2, + self.landlord_construction_age_band, + self.landlord_property_type, + self.landlord_built_form, + self.landlord_wall_construction, + self.landlord_wall_efficiency, + self.landlord_roof_construction, + self.landlord_roof_efficiency, + self.landlord_has_sloping_ceiling, + self.landlord_floor_construction, + self.landlord_windows_type, + self.landlord_windows_efficiency, + self.landlord_multi_glaze_proportion, + self.landlord_glazed_type, + self.landlord_glazed_area, + self.landlord_heating_construction, + self.landlord_heating_efficiency, + self.landlord_fuel_type, + self.landlord_heating_controls, + self.landlord_heating_controls_efficiency, + self.landlord_hot_water_system, + self.landlord_hot_water_efficiency + ] + ].rename( + columns={ + "Org Ref": "landlord_property_id", + "Address1": "address1", + "Address2": "address2", + "Address3": "address3", + "Postcode": "postcode", + } + ) + + def extract_values(self): + for columns in [ + self.landlord_construction_age_band, self.landlord_property_type, self.landlord_built_form, + self.landlord_wall_construction, self.landlord_wall_efficiency, self.landlord_roof_construction, + self.landlord_roof_efficiency, self.landlord_floor_construction, self.landlord_windows_type, + self.landlord_windows_efficiency, self.landlord_heating_construction, self.landlord_heating_efficiency, + self.landlord_fuel_type, self.landlord_heating_controls, self.landlord_heating_controls_efficiency, + self.landlord_hot_water_system, self.landlord_hot_water_efficiency + ]: + self.data[columns] = self.data[columns].progress_apply(lambda x: x.value if hasattr(x, "value") else x) + + def transform(self): + # ------------ construction_age_band ------------ + self.map_construction_age_band() + + # ------------ property_type ------------ + self.map_property_type() + + # ------------ built_form ------------ + self.map_built_form() + + # ------------ Wall Construction ------------ + self.map_wall_construction() + + # ------------ Roof Construction ------------ + self.map_roof_construction() + + # ------------ Floor Construction ------------ + self.map_floor_construction() + + # ------------ Glazing ------------ + self.map_glazing() + + # ------------ Heating, fuel, controls & hot water ------------ + self.map_heating() + + # ------------ Floor Area ------------ + self.map_floor_area() + + # ------------ Formating ------------ + self.select_columns() + self.extract_values() diff --git a/backend/onboarders/requirements.txt b/backend/onboarders/requirements.txt new file mode 100644 index 00000000..907cb877 --- /dev/null +++ b/backend/onboarders/requirements.txt @@ -0,0 +1,6 @@ +boto3 +numpy==2.1.2 +pandas==2.2.3 +tqdm==4.66.5 +pydantic==2.9.2 +openpyxl==3.1.2 \ No newline at end of file diff --git a/backend/onboarders/tests/test_floor_remapping.py b/backend/onboarders/tests/test_floor_remapping.py new file mode 100644 index 00000000..c20372b7 --- /dev/null +++ b/backend/onboarders/tests/test_floor_remapping.py @@ -0,0 +1,97 @@ +import pytest + +from datatypes.epc.construction_age_band import EpcConstructionAgeBand +from datatypes.epc.floor import EpcFloorDescriptions + +from backend.onboarders.mappings.parity.as_built_floor_classifiers import ( + unknown_floor_as_built, + unknown_floor_retrofitted, + map_solid_floor_as_built, + map_suspended_floor_as_built, +) + + +@pytest.mark.parametrize( + "age_band,expected", + [ + # Before 1900 / 1900–1929 → suspended, no insulation + (EpcConstructionAgeBand.before_1900, EpcFloorDescriptions.suspended_no_insulation_assumed), + (EpcConstructionAgeBand.from_1900_to_1929, EpcFloorDescriptions.suspended_no_insulation_assumed), + + # 1930–1995 → solid, no insulation + (EpcConstructionAgeBand.from_1930_to_1949, EpcFloorDescriptions.solid_no_insulation_assumed), + (EpcConstructionAgeBand.from_1950_to_1966, EpcFloorDescriptions.solid_no_insulation_assumed), + (EpcConstructionAgeBand.from_1967_to_1975, EpcFloorDescriptions.solid_no_insulation_assumed), + (EpcConstructionAgeBand.from_1976_to_1982, EpcFloorDescriptions.solid_no_insulation_assumed), + (EpcConstructionAgeBand.from_1983_to_1990, EpcFloorDescriptions.solid_no_insulation_assumed), + (EpcConstructionAgeBand.from_1991_to_1995, EpcFloorDescriptions.solid_no_insulation_assumed), + + # 1996–2002 → solid, limited insulation + (EpcConstructionAgeBand.from_1996_to_2002, EpcFloorDescriptions.solid_limited_insulation_assumed), + + # 2003+ → solid, insulated + (EpcConstructionAgeBand.from_2003_to_2006, EpcFloorDescriptions.solid_insulated_assumed), + (EpcConstructionAgeBand.from_2012_to_2022, EpcFloorDescriptions.solid_insulated_assumed), + (EpcConstructionAgeBand.from_2023_onwards, EpcFloorDescriptions.solid_insulated_assumed), + ], +) +def test_unknown_floor_as_built(age_band, expected): + assert unknown_floor_as_built(age_band) == expected + + +@pytest.mark.parametrize( + "age_band,expected", + [ + # Pre-1930 → suspended, insulated + (EpcConstructionAgeBand.before_1900, EpcFloorDescriptions.suspended_insulated), + (EpcConstructionAgeBand.from_1900_to_1929, EpcFloorDescriptions.suspended_insulated), + + # 1930+ → solid, insulated + (EpcConstructionAgeBand.from_1930_to_1949, EpcFloorDescriptions.solid_insulated), + (EpcConstructionAgeBand.from_1950_to_1966, EpcFloorDescriptions.solid_insulated), + (EpcConstructionAgeBand.from_1976_to_1982, EpcFloorDescriptions.solid_insulated), + (EpcConstructionAgeBand.from_2023_onwards, EpcFloorDescriptions.solid_insulated), + ], +) +def test_unknown_floor_retrofitted(age_band, expected): + assert unknown_floor_retrofitted(age_band) == expected + + +@pytest.mark.parametrize( + "age_band,expected", + [ + # 1983–1995 → no insulation + (EpcConstructionAgeBand.from_1983_to_1990, EpcFloorDescriptions.solid_no_insulation_assumed), + (EpcConstructionAgeBand.from_1991_to_1995, EpcFloorDescriptions.solid_no_insulation_assumed), + + # 1996–2002 → limited insulation + (EpcConstructionAgeBand.from_1996_to_2002, EpcFloorDescriptions.solid_limited_insulation_assumed), + + # 2003+ → insulated + (EpcConstructionAgeBand.from_2003_to_2006, EpcFloorDescriptions.solid_insulated_assumed), + (EpcConstructionAgeBand.from_2012_to_2022, EpcFloorDescriptions.solid_insulated_assumed), + (EpcConstructionAgeBand.from_2023_onwards, EpcFloorDescriptions.solid_insulated_assumed), + ], +) +def test_solid_floor_as_built(age_band, expected): + assert map_solid_floor_as_built(age_band) == expected + + +@pytest.mark.parametrize( + "age_band,expected", + [ + # 1983–1995 → no insulation + (EpcConstructionAgeBand.from_1983_to_1990, EpcFloorDescriptions.suspended_no_insulation_assumed), + (EpcConstructionAgeBand.from_1991_to_1995, EpcFloorDescriptions.suspended_no_insulation_assumed), + + # 1996–2002 → limited insulation + (EpcConstructionAgeBand.from_1996_to_2002, EpcFloorDescriptions.suspended_limited_insulation_assumed), + + # 2003+ → insulated + (EpcConstructionAgeBand.from_2003_to_2006, EpcFloorDescriptions.suspended_insulated_assumed), + (EpcConstructionAgeBand.from_2012_to_2022, EpcFloorDescriptions.suspended_insulated_assumed), + (EpcConstructionAgeBand.from_2023_onwards, EpcFloorDescriptions.suspended_insulated_assumed), + ], +) +def test_suspended_floor_as_built(age_band, expected): + assert map_suspended_floor_as_built(age_band) == expected diff --git a/backend/onboarders/tests/test_roof_remapping.py b/backend/onboarders/tests/test_roof_remapping.py new file mode 100644 index 00000000..cc19e057 --- /dev/null +++ b/backend/onboarders/tests/test_roof_remapping.py @@ -0,0 +1,173 @@ +import pytest + +from datatypes.epc.construction_age_band import EpcConstructionAgeBand +from datatypes.epc.roof import EpcRoofDescriptions +from datatypes.epc.efficiency import EpcEfficiency + +from backend.onboarders.mappings.parity.as_built_roof_classifiers import ( + map_flat_roof, + map_sloping_ceiling_roof, +) +from backend.onboarders.mappings.parity.roof import resolve_roof_efficiency + + +# --------------------------------------------------------------------- +# As-built roof description classification +# --------------------------------------------------------------------- + +@pytest.mark.parametrize( + "age_band, expected", + [ + (EpcConstructionAgeBand.before_1900, EpcRoofDescriptions.flat_no_insulation), + (EpcConstructionAgeBand.from_1950_to_1966, EpcRoofDescriptions.flat_no_insulation), + (EpcConstructionAgeBand.from_1967_to_1975, EpcRoofDescriptions.flat_limited_insulation), + (EpcConstructionAgeBand.from_1976_to_1982, EpcRoofDescriptions.flat_limited_insulation), + (EpcConstructionAgeBand.from_1983_to_1990, EpcRoofDescriptions.flat_insulated), + (EpcConstructionAgeBand.from_2007_to_2011, EpcRoofDescriptions.flat_insulated), + (EpcConstructionAgeBand.from_2023_onwards, EpcRoofDescriptions.flat_insulated), + ], +) +def test_classify_flat_roof(age_band, expected): + assert map_flat_roof(age_band) == expected + + +@pytest.mark.parametrize( + "age_band, expected", + [ + (EpcConstructionAgeBand.before_1900, EpcRoofDescriptions.sloping_pitched_no_insulation), + (EpcConstructionAgeBand.from_1967_to_1975, EpcRoofDescriptions.sloping_pitched_no_insulation), + (EpcConstructionAgeBand.from_1976_to_1982, EpcRoofDescriptions.sloping_pitched_limited_insulation), + (EpcConstructionAgeBand.from_1983_to_1990, EpcRoofDescriptions.sloping_pitched_insulated), + (EpcConstructionAgeBand.from_2012_to_2022, EpcRoofDescriptions.sloping_pitched_insulated), + (EpcConstructionAgeBand.from_2023_onwards, EpcRoofDescriptions.sloping_pitched_insulated), + ], +) +def test_classify_sloping_ceiling_roof(age_band, expected): + assert map_sloping_ceiling_roof(age_band) == expected + + +# --------------------------------------------------------------------- +# Roof efficiency — fixed & age-band driven +# --------------------------------------------------------------------- + +@pytest.mark.parametrize( + "description, age_band, expected", + [ + # Flat roof, no insulation + (EpcRoofDescriptions.flat_no_insulation, EpcConstructionAgeBand.before_1900, EpcEfficiency.VERY_POOR), + + # Flat roof, limited insulation (age-band driven) + (EpcRoofDescriptions.flat_limited_insulation, EpcConstructionAgeBand.from_1976_to_1982, EpcEfficiency.POOR), + ( + EpcRoofDescriptions.flat_limited_insulation, EpcConstructionAgeBand.from_1967_to_1975, + EpcEfficiency.VERY_POOR), + + # Flat roof, insulated (age-band driven) + (EpcRoofDescriptions.flat_insulated, EpcConstructionAgeBand.from_1983_to_1990, EpcEfficiency.AVERAGE), + (EpcRoofDescriptions.flat_insulated, EpcConstructionAgeBand.from_2003_to_2006, EpcEfficiency.GOOD), + (EpcRoofDescriptions.flat_insulated, EpcConstructionAgeBand.from_2023_onwards, EpcEfficiency.VERY_GOOD), + + # Pitched, insulated assumed (loft) + (EpcRoofDescriptions.pitched_insulated_assumed, EpcConstructionAgeBand.from_1996_to_2002, EpcEfficiency.GOOD), + (EpcRoofDescriptions.pitched_insulated_assumed, EpcConstructionAgeBand.from_2007_to_2011, + EpcEfficiency.VERY_GOOD), + ], +) +def test_roof_efficiency_age_band_only(description, age_band, expected): + assert resolve_roof_efficiency( + description=description, + age_band=age_band, + insulation_thickness=None, + ) == expected + + +# --------------------------------------------------------------------- +# Roof efficiency — insulation thickness driven +# --------------------------------------------------------------------- + +@pytest.mark.parametrize( + "description, thickness, expected", + [ + # Loft insulation + (EpcRoofDescriptions.loft_12mm_insulation, 12, EpcEfficiency.VERY_POOR), + (EpcRoofDescriptions.loft_25mm_insulation, 25, EpcEfficiency.POOR), + (EpcRoofDescriptions.loft_75mm_insulation, 75, EpcEfficiency.AVERAGE), + (EpcRoofDescriptions.loft_150mm_insulation, 150, EpcEfficiency.GOOD), + (EpcRoofDescriptions.loft_300mm_insulation, 300, EpcEfficiency.VERY_GOOD), + + # Flat insulated — thickness overrides age band + (EpcRoofDescriptions.flat_insulated, 50, EpcEfficiency.POOR), + (EpcRoofDescriptions.flat_insulated, 100, EpcEfficiency.AVERAGE), + (EpcRoofDescriptions.flat_insulated, 200, EpcEfficiency.GOOD), + (EpcRoofDescriptions.flat_insulated, 300, EpcEfficiency.VERY_GOOD), + + # Sloping ceiling + (EpcRoofDescriptions.sloping_pitched_insulated, 75, EpcEfficiency.AVERAGE), + (EpcRoofDescriptions.sloping_pitched_insulated, 150, EpcEfficiency.GOOD), + (EpcRoofDescriptions.sloping_pitched_insulated, 350, EpcEfficiency.VERY_GOOD), + ], +) +def test_roof_efficiency_thickness_based(description, thickness, expected): + assert resolve_roof_efficiency( + description=description, + age_band=EpcConstructionAgeBand.before_1900, # should be ignored + insulation_thickness=thickness, + ) == expected + + +# --------------------------------------------------------------------- +# Thatched roofs +# --------------------------------------------------------------------- + +@pytest.mark.parametrize( + "description, age_band, expected", + [ + (EpcRoofDescriptions.thatched, EpcConstructionAgeBand.before_1900, EpcEfficiency.AVERAGE), + (EpcRoofDescriptions.thatched, EpcConstructionAgeBand.from_2003_to_2006, EpcEfficiency.GOOD), + (EpcRoofDescriptions.thatched, EpcConstructionAgeBand.from_2023_onwards, EpcEfficiency.VERY_GOOD), + ], +) +def test_thatched_efficiency_age_band(description, age_band, expected): + assert resolve_roof_efficiency( + description=description, + age_band=age_band, + insulation_thickness=None, + ) == expected + + +@pytest.mark.parametrize( + "thickness, expected", + [ + (12, EpcEfficiency.AVERAGE), + (50, EpcEfficiency.GOOD), + (150, EpcEfficiency.GOOD), + (200, EpcEfficiency.VERY_GOOD), + ], +) +def test_thatched_efficiency_thickness(thickness, expected): + assert resolve_roof_efficiency( + description=EpcRoofDescriptions.thatched_with_additional_insulation, + age_band=EpcConstructionAgeBand.before_1900, + insulation_thickness=thickness, + ) == expected + + +# --------------------------------------------------------------------- +# Unknown / holding descriptions +# --------------------------------------------------------------------- + +@pytest.mark.parametrize( + "description", + [ + EpcRoofDescriptions.flat_as_built_unknown, + EpcRoofDescriptions.loft_as_built_unknown, + EpcRoofDescriptions.thatched_as_built_unknown, + EpcRoofDescriptions.sloping_pitched_as_built_unknown, + ], +) +def test_unknown_roof_descriptions_return_na(description): + assert resolve_roof_efficiency( + description=description, + age_band=None, + insulation_thickness=None, + ) == EpcEfficiency.NA diff --git a/backend/onboarders/tests/test_wall_remapping.py b/backend/onboarders/tests/test_wall_remapping.py new file mode 100644 index 00000000..c9476211 --- /dev/null +++ b/backend/onboarders/tests/test_wall_remapping.py @@ -0,0 +1,161 @@ +import pytest + +from datatypes.epc.construction_age_band import EpcConstructionAgeBand +from datatypes.epc.walls import EpcWallDescriptions +from datatypes.epc.efficiency import EpcEfficiency + +from backend.onboarders.mappings.parity.walls import resolve_wall_efficiency +from backend.onboarders.mappings.parity.as_built_wall_classifiers import ( + map_cavity_wall_insulation, + map_solid_wall_insulation, + map_timber_frame_wall_insulation, + map_system_build_wall_insulation, + map_granite_wall_insulation, + map_sandstone_wall_insulation, + map_cob_wall_insulation, +) + + +# --------------------------------------------------------------------- +# As-built wall description classification +# --------------------------------------------------------------------- + +@pytest.mark.parametrize( + "age_band, expected", + [ + (EpcConstructionAgeBand.before_1900, EpcWallDescriptions.cavity_no_insulation_assumed), + (EpcConstructionAgeBand.from_1950_to_1966, EpcWallDescriptions.cavity_no_insulation_assumed), + (EpcConstructionAgeBand.from_1976_to_1982, EpcWallDescriptions.cavity_partial_insulated_assumed), + (EpcConstructionAgeBand.from_1983_to_1990, EpcWallDescriptions.cavity_insulated_assumed), + (EpcConstructionAgeBand.from_2023_onwards, EpcWallDescriptions.cavity_insulated_assumed), + ], +) +def test_map_cavity_wall_insulation(age_band, expected): + assert map_cavity_wall_insulation(age_band) == expected + + +@pytest.mark.parametrize( + "age_band, expected", + [ + (EpcConstructionAgeBand.before_1900, EpcWallDescriptions.solid_brick_no_insulation_assumed), + (EpcConstructionAgeBand.from_1976_to_1982, EpcWallDescriptions.solid_brick_partial_insulated_assumed), + (EpcConstructionAgeBand.from_1996_to_2002, EpcWallDescriptions.solid_brick_insulated_assumed), + ], +) +def test_map_solid_wall_insulation(age_band, expected): + assert map_solid_wall_insulation(age_band) == expected + + +@pytest.mark.parametrize( + "age_band, expected", + [ + (EpcConstructionAgeBand.before_1900, EpcWallDescriptions.timber_frame_no_insulation_assumed), + (EpcConstructionAgeBand.from_1950_to_1966, EpcWallDescriptions.timber_frame_partial_insulated_assumed), + (EpcConstructionAgeBand.from_1983_to_1990, EpcWallDescriptions.timber_frame_insulated_assumed), + ], +) +def test_map_timber_frame_wall_insulation(age_band, expected): + assert map_timber_frame_wall_insulation(age_band) == expected + + +@pytest.mark.parametrize( + "age_band, expected", + [ + (EpcConstructionAgeBand.before_1900, EpcWallDescriptions.system_no_insulation_assumed), + (EpcConstructionAgeBand.from_1976_to_1982, EpcWallDescriptions.system_partial_insulated_assumed), + (EpcConstructionAgeBand.from_2003_to_2006, EpcWallDescriptions.system_insulated_assumed), + ], +) +def test_map_system_wall_insulation(age_band, expected): + assert map_system_build_wall_insulation(age_band) == expected + + +@pytest.mark.parametrize( + "age_band, expected", + [ + (EpcConstructionAgeBand.before_1900, EpcWallDescriptions.granite_whinstone_no_insulation_assumed), + (EpcConstructionAgeBand.from_1976_to_1982, EpcWallDescriptions.granite_whinstone_partial_insulated_assumed), + (EpcConstructionAgeBand.from_2012_to_2022, EpcWallDescriptions.granite_whinestone_insulated_assumed), + ], +) +def test_map_granite_wall_insulation(age_band, expected): + assert map_granite_wall_insulation(age_band) == expected + + +@pytest.mark.parametrize( + "age_band, expected", + [ + (EpcConstructionAgeBand.before_1900, EpcWallDescriptions.sandstone_limestone_no_insulation_assumed), + (EpcConstructionAgeBand.from_1976_to_1982, EpcWallDescriptions.sandstone_limestone_partial_insulated_assumed), + (EpcConstructionAgeBand.from_2007_to_2011, EpcWallDescriptions.sandstone_limestone_insulated_assumed), + ], +) +def test_map_sandstone_wall_insulation(age_band, expected): + assert map_sandstone_wall_insulation(age_band) == expected + + +@pytest.mark.parametrize( + "age_band, expected", + [ + (EpcConstructionAgeBand.before_1900, EpcWallDescriptions.cob_as_built_average), + (EpcConstructionAgeBand.from_1976_to_1982, EpcWallDescriptions.cob_as_built_average), + (EpcConstructionAgeBand.from_1983_to_1990, EpcWallDescriptions.cob_as_built_good), + ], +) +def test_map_cob_wall_insulation(age_band, expected): + assert map_cob_wall_insulation(age_band) == expected + + +# --------------------------------------------------------------------- +# Wall efficiency resolution +# --------------------------------------------------------------------- + +@pytest.mark.parametrize( + "description, age_band, expected", + [ + # Fixed efficiencies + (EpcWallDescriptions.cavity_no_insulation_assumed, None, EpcEfficiency.POOR), + (EpcWallDescriptions.cavity_partial_insulated_assumed, None, EpcEfficiency.AVERAGE), + (EpcWallDescriptions.cavity_insulated_assumed, None, EpcEfficiency.GOOD), + + # Function-based efficiencies + ( + EpcWallDescriptions.cavity_filled_cavity, + EpcConstructionAgeBand.from_2023_onwards, + EpcEfficiency.VERY_GOOD, + ), + ( + EpcWallDescriptions.cavity_filled_cavity, + EpcConstructionAgeBand.from_1991_to_1995, + EpcEfficiency.GOOD, + ), + ( + EpcWallDescriptions.solid_brick_internal_insulation, + EpcConstructionAgeBand.from_2003_to_2006, + EpcEfficiency.VERY_GOOD, + ), + ( + EpcWallDescriptions.solid_brick_internal_insulation, + EpcConstructionAgeBand.from_1950_to_1966, + EpcEfficiency.GOOD, + ), + ], +) +def test_resolve_wall_efficiency(description, age_band, expected): + assert resolve_wall_efficiency(description, age_band) == expected + + +@pytest.mark.parametrize( + "description", + [ + EpcWallDescriptions.cavity_as_built_unknown, + EpcWallDescriptions.solid_brick_as_built_unknown, + EpcWallDescriptions.system_as_built_unknown, + EpcWallDescriptions.timber_frame_as_built_unknown, + EpcWallDescriptions.granite_as_built_unknown, + EpcWallDescriptions.sandstone_as_built_unknown, + EpcWallDescriptions.cob_as_built_unknown, + ], +) +def test_unknown_wall_descriptions_return_na(description): + assert resolve_wall_efficiency(description, None) == EpcEfficiency.NA diff --git a/backend/postcode_splitter/hackney.xlsx b/backend/postcode_splitter/hackney.xlsx deleted file mode 100644 index 64892f3a..00000000 Binary files a/backend/postcode_splitter/hackney.xlsx and /dev/null differ diff --git a/backend/postcode_splitter/handler/Dockerfile b/backend/postcode_splitter/handler/Dockerfile new file mode 100644 index 00000000..7c1a7989 --- /dev/null +++ b/backend/postcode_splitter/handler/Dockerfile @@ -0,0 +1,9 @@ +FROM public.ecr.aws/lambda/python:3.10 + +# Set working directory (Lambda task root) +WORKDIR /var/task + +# ----------------------------- +# Lambda handler +# ----------------------------- +CMD ["main.handler"] diff --git a/backend/postcode_splitter/handler/requirements.txt b/backend/postcode_splitter/handler/requirements.txt new file mode 100644 index 00000000..e69de29b diff --git a/backend/postcode_splitter/main.py b/backend/postcode_splitter/main.py index d417c8f1..d55f618a 100644 --- a/backend/postcode_splitter/main.py +++ b/backend/postcode_splitter/main.py @@ -1,10 +1,12 @@ import pandas as pd import requests -from backend.address2UPRN.main import resolve_uprns_for_postcode_group, get_epc_data_with_postcode +from backend.address2UPRN.main import ( + resolve_uprns_for_postcode_group, + get_epc_data_with_postcode, +) from tqdm import tqdm - def sanitise_postcode(postcode: str) -> str | None: """ Normalise postcode for grouping. @@ -51,11 +53,7 @@ def main(): # --- validate AFTER grouping (save API calls) --- # Get unique, non-null postcodes - unique_postcodes = ( - df["postcode_clean"] - .dropna() - .unique() - ) + unique_postcodes = df["postcode_clean"].dropna().unique() # Validate each postcode once, TODOadd a progress bar postcode_validity = { @@ -66,7 +64,6 @@ def main(): # Map validity back onto dataframe df["postcode_valid"] = df["postcode_clean"].map(postcode_validity) - results = [] for postcode, group_df in tqdm( @@ -98,17 +95,33 @@ def main(): results.append(tmp) final_df = pd.concat(results, ignore_index=True) - a = final_df[[ - "best_match_lexiscore","Address 1", - "best_match_address", "Postcode", - "UPRN", "best_match_uprn" - ]] # add levi score to viewing - b = final_df[final_df["best_match_lexiscore"]>0] # add levi score to viewing - b = b[[ - "best_match_lexiscore","Address 1", - "best_match_address", "Postcode", - "UPRN", "best_match_uprn" - ]] + a = final_df[ + [ + "best_match_lexiscore", + "Address 1", + "best_match_address", + "Postcode", + "UPRN", + "best_match_uprn", + ] + ] # add levi score to viewing + b = final_df[final_df["best_match_lexiscore"] > 0] # add levi score to viewing + b = b[ + [ + "best_match_lexiscore", + "Address 1", + "best_match_address", + "Postcode", + "UPRN", + "best_match_uprn", + ] + ] + + +def handler(event, context): + print("hello Postcode splitter world") + return {"statusCode": 200, "body": "hello world"} + if __name__ == "__main__": main() diff --git a/datatypes/epc/__init__.py b/datatypes/epc/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/datatypes/epc/construction_age_band.py b/datatypes/epc/construction_age_band.py new file mode 100644 index 00000000..c5e7a03b --- /dev/null +++ b/datatypes/epc/construction_age_band.py @@ -0,0 +1,45 @@ +import re +from enum import Enum +from typing import List + + +class EpcConstructionAgeBand(Enum): + before_1900: str = 'England and Wales: before 1900' + from_1900_to_1929: str = 'England and Wales: 1900-1929' + from_1930_to_1949: str = 'England and Wales: 1930-1949' + from_1950_to_1966: str = 'England and Wales: 1950-1966' + from_1967_to_1975: str = 'England and Wales: 1967-1975' + from_1976_to_1982: str = 'England and Wales: 1976-1982' + from_1983_to_1990: str = 'England and Wales: 1983-1990' + from_1991_to_1995: str = 'England and Wales: 1991-1995' + from_1996_to_2002: str = 'England and Wales: 1996-2002' + from_2003_to_2006: str = 'England and Wales: 2003-2006' + from_2007_to_2011: str = 'England and Wales: 2007-2011' + from_2012_onwards: str = 'England and Wales: 2012-onwards' + from_2012_to_2022: str = 'England and Wales: 2012-2022' + from_2023_onwards: str = 'England and Wales: 2023 onwards' + + def start_year(self) -> int: + """ + Extract the starting year of the age band. + """ + value = self.value.lower() + + if 'before' in value: + return 0 + match = re.search(r'(\d{4})', value) + if not match: + raise ValueError(f"Cannot determine start year from '{self.value}'") + + return int(match.group(1)) + + @classmethod + def from_year_onwards(cls, year: int) -> List["EpcConstructionAgeBand"]: + """ + Return all age bands whose starting year is >= the given year. + """ + return [ + band + for band in cls + if band.start_year() >= year + ] diff --git a/datatypes/epc/efficiency.py b/datatypes/epc/efficiency.py new file mode 100644 index 00000000..0417f49e --- /dev/null +++ b/datatypes/epc/efficiency.py @@ -0,0 +1,10 @@ +from enum import Enum + + +class EpcEfficiency(Enum): + VERY_POOR: str = "Very Poor" + POOR: str = "Poor" + AVERAGE: str = "Average" + GOOD: str = "Good" + VERY_GOOD: str = "Very Good" + NA: str = "N/A" diff --git a/datatypes/epc/floor.py b/datatypes/epc/floor.py new file mode 100644 index 00000000..41786101 --- /dev/null +++ b/datatypes/epc/floor.py @@ -0,0 +1,17 @@ +from enum import Enum + + +class EpcFloorDescriptions(Enum): + # Solid floor + solid_insulated = "Solid, insulated" + solid_insulated_assumed = "Solid, insulated (assumed)" + solid_no_insulation_assumed = "Solid, no insulation (assumed)" + solid_limited_insulation_assumed = "Solid, limited insulation (assumed)" + + # Suspended floor + suspended_insulated = "Suspended, insulated" + suspended_insulated_assumed = "Suspended, insulated (assumed)" + suspended_no_insulation_assumed = "Suspended, no insulation (assumed)" + suspended_limited_insulation_assumed = "Suspended, limited insulation (assumed)" + + unknown = None # We don't resolve anything diff --git a/datatypes/epc/fuel.py b/datatypes/epc/fuel.py new file mode 100644 index 00000000..0d1e455c --- /dev/null +++ b/datatypes/epc/fuel.py @@ -0,0 +1,10 @@ +from enum import Enum + + +class EpcFuel(Enum): + electricity_not_community = "electricity (not community)" + lpg_not_community = "LPG (not community)" + mains_gas_not_community = "mains gas (not community)" + oil_not_community = "oil (not community)" + manufactured_smokeless_fuel = "Solid fuel: manufactured smokeless fuel" + smokeless_coal = "smokeless coal" diff --git a/datatypes/epc/heating_controls.py b/datatypes/epc/heating_controls.py new file mode 100644 index 00000000..48538bff --- /dev/null +++ b/datatypes/epc/heating_controls.py @@ -0,0 +1,18 @@ +from enum import Enum + + +class EpcHeatingControls(Enum): + programmer_room_thermostat_trvs = "Programmer, room thermostat and TRVs" + programmers_trvs_bypass = "Programmer, TRVs and bypass" + time_and_temperature_zone_control = "Time and temperature zone control" + + # Room heaters + programmer_and_appliance_thermostats = "Programmer and appliance thermostats" + appliance_thermostats = "Appliance thermostats" + + # Storage heaters + automatic_charge_control = "Automatic charge control" + manual_charge_control = "Manual charge control" + + # Warm air + programmer_and_atleast_two_room_thermostats = "Programmer and at least two room thermostats" diff --git a/datatypes/epc/hotwater.py b/datatypes/epc/hotwater.py new file mode 100644 index 00000000..96af2be3 --- /dev/null +++ b/datatypes/epc/hotwater.py @@ -0,0 +1,8 @@ +from enum import Enum + + +class EpcHotWaterSystems(Enum): + # from primary heating system + from_main_system = "From main system" + # Common for heater-based systems, e.g. room heaters or storage heaters + electric_immersion_off_peak = "Electric immersion, off-peak" diff --git a/datatypes/epc/main_heating.py b/datatypes/epc/main_heating.py new file mode 100644 index 00000000..663ada99 --- /dev/null +++ b/datatypes/epc/main_heating.py @@ -0,0 +1,24 @@ +from enum import Enum + + +class EpcHeatingSystems(Enum): + # boiler and radiators + boiler_and_radiators_electric = "Boiler and radiators, electric" + boiler_and_radiators_lpg = "Boiler and radiators, LPG" + boiler_radiators_mains_gas = "Boiler and radiators, mains gas" + boiler_radiators_oil = "Boiler and radiators, oil" + # underfloor + electric_underfloor_heating = "Electric underfloor heating" + # ashp + air_to_air_ashp = "Air source heat pump, warm air, electric" + ashp_radiators_electric = "Air source heat pump, radiators, electric" + # Room heaters + room_heaters_electric = "Room heaters, electric" + room_heaters_mains_gas = "Room heaters, mains gas" + room_heaters_smokeless_fuel = "Room heaters, smokeless fuel" + room_heaters_coal = "Room heaters, coal" + # Storage heaters + electric_storage_heaters = "Electric storage heaters" + # Warm air + warm_air_electricaire = "Warm air, Electricaire" + warm_air_mains_gas = "Warm air, mains gas" diff --git a/datatypes/epc/property_type_built_form.py b/datatypes/epc/property_type_built_form.py new file mode 100644 index 00000000..2fd59ddf --- /dev/null +++ b/datatypes/epc/property_type_built_form.py @@ -0,0 +1,17 @@ +from enum import Enum + + +class PropertyType(Enum): + flat = "Flat" + maisonette = "Maisonette" + bungalow = "Bungalow" + house = "House" + + +class BuiltForm(Enum): + mid_terrace = "Mid-Terrace" + end_terrace = "End-Terrace" + detached = "Detached" + semi_detached = "Semi-Detached" + enclosed_mid_terrace = "Enclosed Mid-Terrace" + enclosed_end_terrace = "Enclosed End-Terrace" diff --git a/datatypes/epc/roof.py b/datatypes/epc/roof.py new file mode 100644 index 00000000..9cdaac96 --- /dev/null +++ b/datatypes/epc/roof.py @@ -0,0 +1,86 @@ +from enum import Enum +from typing import List + + +class EpcRoofDescriptions(Enum): + # Loft + # Assumed options + pitched_insulated_assumed: str = "Pitched, insulated (assumed)" + pitched_no_insulation: str = "Pitched, no insulation" + # Insulation thickness options + loft_12mm_insulation: str = "Pitched, 12 mm loft insulation" + loft_25mm_insulation: str = "Pitched, 25 mm loft insulation" + loft_50mm_insulation: str = "Pitched, 50 mm loft insulation" + loft_75mm_insulation: str = "Pitched, 75 mm loft insulation" + loft_100mm_insulation: str = "Pitched, 100 mm loft insulation" + loft_125mm_insulation: str = "Pitched, 125 mm loft insulation" + loft_150mm_insulation: str = "Pitched, 150 mm loft insulation" + loft_175mm_insulation: str = "Pitched, 175 mm loft insulation" + loft_200mm_insulation: str = "Pitched, 200 mm loft insulation" + loft_250mm_insulation: str = "Pitched, 250 mm loft insulation" + loft_270mm_insulation: str = "Pitched, 270 mm loft insulation" + loft_300mm_insulation: str = "Pitched, 300 mm loft insulation" + loft_350mm_insulation: str = "Pitched, 350 mm loft insulation" + loft_400mm_plus_insulation: str = "Pitched, 400+ mm loft insulation" + # Insulated at rafters "Pitched, insulated at rafters" + # Rafters + # 400mm, 350mm = very good + # 200-300mm = good + # 125-175 = average + # 50-100 = poor + # 25 and below= very poor + loft_insulated_at_rafters: str = "Pitched, insulated at rafters" + # another dwelling above + another_dwelling_above: str = "(another dwelling above)" + # flat roof, which if there is observed insulation is just "flat, insulated", however there is a + # different efficiency rating depending on insulation thickness + # categories: + # 12mm = very poor & has limited insulation description + # 25, 50 = poor & has limited insulation description + # 75, 100, 125mm = average (Flat, insulated) + # 150, 175, 200, 225, 250mm = good (Flat, insulated) + # 270mm+ = very good (Flat, insulated) + # As built 2023 = Flat, insulated, Very good + # 2003 - 2006, up to 2012-2022 = Flat insulated, Good + # 1983-1990, 1996-2002 = Flat, insulated, Average + # 1976-1982 = Flat, limited insulation, poor + # 1967 - 1975 = Flat, limited insulation, Very Poor + # 1950-1966 and earlier bands = flat, no insulation, very poor + + flat_insulated: str = "Flat, insulated" + flat_limited_insulation: str = "Flat, limited insulation" + flat_no_insulation: str = "Flat, no insulation" + + # Thatched roof descriptions + # With Loft insulation at joists + # Thatched + 12mm = thatched, with additional insulation, average + # Thatched + 25, 50, 100, 150mm = thatched, with additional insulation, good + # Thatched + 175mm+ = thatched, with additional insulation, very good + # With loft insulation at rafters [out of scope atm] + # Unknown insulation + # Pre 1900, 1930-1949, 1967-1975, 1983-1990, 1996-2002 = "Thatched", Average + # 2003-2006, 2012-2022 = "Thatched", Good + # 2023 onwards = "Thatched", Very Good + thatched: str = "Thatched" # We see this for no insulation, has average performance + thatched_with_additional_insulation: str = "Thatched, with additional insulation" + + # Sloping ceiling + # For sloping ceiling tags, we don't use any (assumed) tags so that it's unambiguous that the roof is sloped + sloping_pitched_no_insulation: str = "Pitched, no insulation" + sloping_pitched_limited_insulation: str = "Pitched, limited insulation" + sloping_pitched_insulated: str = "Pitched, insulated" + + # Unknown descriptions which may get mapped later or handled via fallback + flat_as_built_unknown: str = "Flat, as built, unknown insulation" + loft_as_built_unknown: str = "Loft, as built, unknown insulation" + thatched_as_built_unknown: str = "Thatched, as built, unknown insulation" + sloping_pitched_as_built_unknown: str = "Pitched, as built, unknown insulation" + + @property + def unknown_descriptions(self) -> List["EpcRoofDescriptions"]: + return [ + EpcRoofDescriptions.flat_as_built_unknown, + EpcRoofDescriptions.loft_as_built_unknown, + EpcRoofDescriptions.thatched_as_built_unknown, + EpcRoofDescriptions.sloping_pitched_as_built_unknown, + ] diff --git a/datatypes/epc/walls.py b/datatypes/epc/walls.py new file mode 100644 index 00000000..44ca7e49 --- /dev/null +++ b/datatypes/epc/walls.py @@ -0,0 +1,74 @@ +from enum import Enum +from typing import List + + +class EpcWallDescriptions(Enum): + # Cavity wall descriptions + cavity_insulated_assumed: str = "Cavity wall, as built, insulated (assumed)" + cavity_partial_insulated_assumed: str = "Cavity wall, as built, partial insulation (assumed)" + cavity_no_insulation_assumed: str = "Cavity wall, as built, no insulation (assumed)" + cavity_filled_cavity: str = "Cavity wall, filled cavity" + cavity_internal_insulation: str = "Cavity wall, with internal insulation" + cavity_external_insulation: str = "Cavity wall, with external insulation" + cavity_filled_plus_internal: str = "Cavity wall, filled cavity and internal insulation" + cavity_filled_plus_external: str = "Cavity wall, filled cavity and external insulation" + + # Solid wall descriptions + solid_brick_internal_insulation: str = "Solid brick, with internal insulation" + solid_brick_external_insulation: str = "Solid brick, with external insulation" + solid_brick_no_insulation_assumed: str = 'Solid brick, as built, no insulation (assumed)' + solid_brick_partial_insulated_assumed: str = 'Solid brick, as built, partial insulation (assumed)' + solid_brick_insulated_assumed: str = 'Solid brick, as built, insulated (assumed)' + + # System + system_external_insulation: str = "System built, with external insulation" + system_internal_insulation: str = "System built, with internal insulation" + system_no_insulation_assumed: str = "System built, as built, no insulation (assumed)" + system_partial_insulated_assumed: str = "System built, as built, partial insulation (assumed)" + system_insulated_assumed: str = "System built, as built, insulated (assumed)" + + # Timber + timber_frame_internal_insulation: str = "Timber frame, with internal insulation" + timber_frame_external_insulation: str = "Timber frame, with external insulation" + timber_frame_no_insulation_assumed: str = "Timber frame, as built, no insulation (assumed)" + timber_frame_partial_insulated_assumed: str = "Timber frame, as built, partial insulation (assumed)" + timber_frame_insulated_assumed: str = "Timber frame, as built, insulated (assumed)" + + # Granite/whinstone + granite_whinstone_external_insulation: str = "Granite or whin, with external insulation" + granite_whinstone_internal_insulation: str = "Granite or whin, with internal insulation" + granite_whinstone_no_insulation_assumed: str = "Granite or whin, as built, no insulation (assumed)" + granite_whinstone_partial_insulated_assumed: str = "Granite or whin, as built, partial insulation (assumed)" + granite_whinestone_insulated_assumed: str = "Granite or whin, as built, insulated (assumed)" + + # Sandstone/limestone + sandstone_limestone_internal_insulation: str = "Sandstone, with internal insulation" + sandstone_limestone_external_insulation: str = "Sandstone, with external insulation" + sandstone_limestone_no_insulation_assumed: str = "Sandstone, as built, no insulation (assumed)" + sandstone_limestone_partial_insulated_assumed: str = "Sandstone, as built, partial insulation (assumed)" + sandstone_limestone_insulated_assumed: str = "Sandstone, as built, insulated (assumed)" + + # Cob + cob_as_built_average: str = "Cob, as built" + cob_as_built_good: str = "Cob, as built" + + # unknown descriptions which may get mapped later or handled via fallback + cavity_as_built_unknown: str = "Cavity wall, as built, unknown insulation" + solid_brick_as_built_unknown: str = "Solid brick, as built, unknown insulation" + system_as_built_unknown: str = "System built, as built, unknown insulation" + timber_frame_as_built_unknown: str = "Timber frame, as built, unknown insulation" + granite_as_built_unknown: str = "Granite or whin, as built, unknown insulation" + sandstone_as_built_unknown: str = "Sandstone, as built, unknown insulation" + cob_as_built_unknown: str = "Cob, as built, unknown insulation" + + @property + def unknown_descriptions(self) -> List["EpcWallDescriptions"]: + return [ + EpcWallDescriptions.cavity_as_built_unknown, + EpcWallDescriptions.solid_brick_as_built_unknown, + EpcWallDescriptions.system_as_built_unknown, + EpcWallDescriptions.timber_frame_as_built_unknown, + EpcWallDescriptions.granite_as_built_unknown, + EpcWallDescriptions.sandstone_as_built_unknown, + EpcWallDescriptions.cob_as_built_unknown, + ] diff --git a/etl/bill_savings/KwhData.py b/etl/bill_savings/KwhData.py index 3291e909..b4bb979d 100644 --- a/etl/bill_savings/KwhData.py +++ b/etl/bill_savings/KwhData.py @@ -196,6 +196,10 @@ class KwhData: if save and self.bucket is None: raise Exception("bucket not set, cannot save data") + if data.empty: + # If we have no data + return data + # TODO: New is a temporary parameter, which will transform the epc descriptions to their transformed features # in anticipation of the new model diff --git a/infrastructure/terraform/lambda/condition-etl/main.tf b/infrastructure/terraform/lambda/condition-etl/main.tf new file mode 100644 index 00000000..4219f209 --- /dev/null +++ b/infrastructure/terraform/lambda/condition-etl/main.tf @@ -0,0 +1,43 @@ +data "aws_secretsmanager_secret_version" "db_credentials" { + secret_id = "${var.stage}/assessment_model/db_credentials" +} + +data "terraform_remote_state" "shared" { + backend = "s3" + config = { + bucket = "assessment-model-terraform-state" + key = "env:/${var.stage}/terraform.tfstate" # TODO: dont hardcode this + region = "eu-west-2" + } +} + +locals { + db_credentials = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string) +} + + +module "lambda" { + source = "../modules/lambda_with_sqs" + + name = "condition-etl" + stage = var.stage + + image_uri = local.image_uri + timeout = 180 + + + environment = merge( + { + STAGE = var.stage + LOG_LEVEL = "info" + DB_USERNAME = local.db_credentials.db_assessment_model_username + DB_PASSWORD = local.db_credentials.db_assessment_model_password + }, + ) + +} + +resource "aws_iam_role_policy_attachment" "attach_condition_etl_s3_read" { + role = module.lambda.role_name + policy_arn = data.terraform_remote_state.shared.outputs.condition_etl_s3_read_arn +} \ No newline at end of file diff --git a/infrastructure/terraform/lambda/condition-etl/provider.tf b/infrastructure/terraform/lambda/condition-etl/provider.tf new file mode 100644 index 00000000..c633d238 --- /dev/null +++ b/infrastructure/terraform/lambda/condition-etl/provider.tf @@ -0,0 +1,16 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 4.16" + } + } + + backend "s3" { + bucket = "condition-etl-terraform-state" + key = "terraform.tfstate" + region = "eu-west-2" + } + + required_version = ">= 1.2.0" +} \ No newline at end of file diff --git a/infrastructure/terraform/lambda/condition-etl/variables.tf b/infrastructure/terraform/lambda/condition-etl/variables.tf new file mode 100644 index 00000000..e4bab243 --- /dev/null +++ b/infrastructure/terraform/lambda/condition-etl/variables.tf @@ -0,0 +1,27 @@ +variable "lambda_name" { + type = string + description = "Logical name of the lambda (e.g. address2uprn)" +} + +variable "stage" { + description = "Deployment stage (e.g. dev, prod)" + type = string +} +variable "ecr_repo_url" { + type = string + description = "ECR repository URL (no tag, no digest)" +} + +variable "image_digest" { + type = string + description = "Image digest (sha256:...)" +} + + +locals { + image_uri = "${var.ecr_repo_url}@${var.image_digest}" +} + +output "resolved_image_uri" { + value = local.image_uri +} diff --git a/infrastructure/terraform/lambda/modules/lambda_with_sqs/main.tf b/infrastructure/terraform/lambda/modules/lambda_with_sqs/main.tf index 3816c206..065fb790 100644 --- a/infrastructure/terraform/lambda/modules/lambda_with_sqs/main.tf +++ b/infrastructure/terraform/lambda/modules/lambda_with_sqs/main.tf @@ -6,6 +6,10 @@ module "role" { name = "${var.name}-lambda-${var.stage}" } +output "role_name" { + value = module.role.role_name +} + ############################################ # SQS queue + DLQ ############################################ diff --git a/infrastructure/terraform/lambda/postcodeSplitter/main.tf b/infrastructure/terraform/lambda/postcodeSplitter/main.tf new file mode 100644 index 00000000..ebbdbfdc --- /dev/null +++ b/infrastructure/terraform/lambda/postcodeSplitter/main.tf @@ -0,0 +1,14 @@ +module "lambda" { + source = "../modules/lambda_with_sqs" + + name = "postcode-splitter" + stage = var.stage + + image_uri = local.image_uri + + + environment = { + STAGE = var.stage + LOG_LEVEL = "info" + } +} diff --git a/infrastructure/terraform/lambda/postcodeSplitter/provider.tf b/infrastructure/terraform/lambda/postcodeSplitter/provider.tf new file mode 100644 index 00000000..dbe323f2 --- /dev/null +++ b/infrastructure/terraform/lambda/postcodeSplitter/provider.tf @@ -0,0 +1,16 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 4.16" + } + } + + backend "s3" { + bucket = "postcode-splitter-terraform-state" + key = "terraform.tfstate" + region = "eu-west-2" + } + + required_version = ">= 1.2.0" +} \ No newline at end of file diff --git a/infrastructure/terraform/lambda/postcodeSplitter/variables.tf b/infrastructure/terraform/lambda/postcodeSplitter/variables.tf new file mode 100644 index 00000000..9ce45fa5 --- /dev/null +++ b/infrastructure/terraform/lambda/postcodeSplitter/variables.tf @@ -0,0 +1,26 @@ +variable "lambda_name" { + type = string + description = "Logical name of the lambda (e.g. address2uprn)" +} + +variable "stage" { + description = "Deployment stage (e.g. dev, prod)" + type = string +} +variable "ecr_repo_url" { + type = string + description = "ECR repository URL (no tag, no digest)" +} + +variable "image_digest" { + type = string + description = "Image digest (sha256:...)" +} + +locals { + image_uri = "${var.ecr_repo_url}@${var.image_digest}" +} + +output "resolved_image_uri" { + value = local.image_uri +} diff --git a/infrastructure/terraform/shared/main.tf b/infrastructure/terraform/shared/main.tf index 3ba78ef3..b1474055 100644 --- a/infrastructure/terraform/shared/main.tf +++ b/infrastructure/terraform/shared/main.tf @@ -84,7 +84,7 @@ resource "aws_db_instance" "default" { # Temporary to enfore immediate change apply_immediately = true # Set up storage type to gp3 for better performance - storage_type = "gp3" + storage_type = "gp3" } # Set up the bucket that recieve the csv uploads of epc to be retrofit @@ -298,10 +298,6 @@ module "address2uprn_state_bucket" { } -output "address2uprn_state_bucket_name" { - value = module.address2uprn_state_bucket.bucket_name -} - module "address2uprn_registry" { source = "../modules/container_registry" name = "address2uprn" @@ -309,6 +305,62 @@ module "address2uprn_registry" { } -output "address2uprn_repository_url" { - value = module.address2uprn_registry.repository_url +################################################ +# Condition ETL – Lambda ECR +################################################ +module "condition_etl_state_bucket" { + source = "../modules/tf_state_bucket" + bucket_name = "condition-etl-terraform-state" + +} + +module "condition_etl_registry" { + source = "../modules/container_registry" + name = "condition-etl" + stage = var.stage + +} + +################################################ +# Postcode Splitter – Lambda ECR +################################################ +module "postcode_splitter_state_bucket" { + source = "../modules/tf_state_bucket" + bucket_name = "postcode-splitter-terraform-state" + +} + +module "postcode_splitter_registry" { + source = "../modules/container_registry" + name = "postcode_splitter" + stage = var.stage + +} + +################################################ +# Conidition data – S3 bucket +################################################ +module "condition_data_bucket" { + source = "../modules/s3" + bucketname = "condition-data-${var.stage}" + allowed_origins = var.allowed_origins +} + +resource "aws_iam_policy" "condition_etl_s3_read" { + name = "ConditionETLReadS3" + description = "Allow Lambda to read objects from condition-data-${var.stage}" + policy = jsonencode({ + Version = "2012-10-17" + Statement = [ + { + Effect = "Allow" + Action = ["s3:GetObject"] + Resource = "arn:aws:s3:::condition-data-${var.stage}/*" + } + ] + }) +} + +output "condition_etl_s3_read_arn" { + value = aws_iam_policy.condition_etl_s3_read.arn } \ No newline at end of file diff --git a/pytest.ini b/pytest.ini index 0a0bbf73..ee203d46 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,4 +1,4 @@ [pytest] pythonpath = . addopts = --cov-report term-missing --cov=etl/epc --cov=recommendations --cov=backend --cov=etl/epc_clean --cov=etl/spatial -testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests backend/address2UPRN/tests +testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests backend/address2UPRN/tests backend/onboarders/tests diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py index c6fea3b6..e470c1a3 100644 --- a/recommendations/Recommendations.py +++ b/recommendations/Recommendations.py @@ -1090,6 +1090,7 @@ class Recommendations: ashp_cop = ashp_cop if ashp_cop else assumptions.AVERAGE_ASHP_EFFICIENCY + # kwh_impact_table = kwh_simulation_predictions["heating_kwh_predictions"][ kwh_simulation_predictions["heating_kwh_predictions"]["property_id"] == str(property_instance.id) ].merge( diff --git a/recommendations/RoofRecommendations.py b/recommendations/RoofRecommendations.py index 71e47ba6..f88a672b 100644 --- a/recommendations/RoofRecommendations.py +++ b/recommendations/RoofRecommendations.py @@ -331,18 +331,18 @@ class RoofRecommendations: """ # Can a non-primary part satisfy loft insulation? - primary_needs_loft = component_needs[1]["needs_loft_insulation"] + primary_needs_loft = component_needs[0]["needs_loft_insulation"] secondary_needs_loft = any( - p['needs_loft_insulation'] for idx, p in component_needs.items() if idx != 1 + p['needs_loft_insulation'] for idx, p in component_needs.items() if idx != 0 ) if primary_needs_loft and not secondary_needs_loft: # Only option is loft return "loft" - primary_needs_sloping = component_needs[1]["needs_sloping_ceiling"] + primary_needs_sloping = component_needs[0]["needs_sloping_ceiling"] secondary_needs_sloping = any( - p['needs_sloping_ceiling'] for idx, p in component_needs.items() if idx != 1 + p['needs_sloping_ceiling'] for idx, p in component_needs.items() if idx != 0 ) if primary_needs_sloping and not secondary_needs_sloping: @@ -418,11 +418,13 @@ class RoofRecommendations: return needs_sloping, not needs_loft # Indicates that the property needs sloping ceiling as we only run # this in that case + roof_components = [x for x in find_my_epc_components if x["component_name"] == "Roof"] + extracted_roof_descriptions = { idx: { "description": component["description"], **RoofAttributes(component["description"]).process() - } for idx, component in enumerate(find_my_epc_components) if component["component_name"] == "Roof" + } for idx, component in enumerate(roof_components) } component_needs = {} diff --git a/sfr/principal_pitch/2_export_data.py b/sfr/principal_pitch/2_export_data.py index ae807654..a65509d5 100644 --- a/sfr/principal_pitch/2_export_data.py +++ b/sfr/principal_pitch/2_export_data.py @@ -2,6 +2,10 @@ This script prepares the data for the financial model """ +from dotenv import load_dotenv + +load_dotenv(".env.local") + import pandas as pd import numpy as np from backend.app.utils import sap_to_epc @@ -24,12 +28,12 @@ from sqlalchemy import func # PORTFOLIO_ID = 206 # SCENARIOS = [389] -PORTFOLIO_ID = 502 # Peabody +PORTFOLIO_ID = 524 SCENARIOS = [ - 986, + 1009, ] scenario_names = { - 986: "EPC C", + 1009: "EPC C; Most Economic", } diff --git a/utils/s3.py b/utils/s3.py index e70669d0..2e67d4f0 100644 --- a/utils/s3.py +++ b/utils/s3.py @@ -264,6 +264,7 @@ def save_excel_to_s3(df, bucket_name, file_key): def read_csv_from_s3(bucket_name, filepath): + logger.info(f"Reading CSV file from S3 bucket '{bucket_name}' with key '{filepath}'") s3 = boto3.client('s3') # Get the object from s3