diff --git a/.devcontainer/asset_list/Dockerfile b/.devcontainer/asset_list/Dockerfile index 512ab109..72a5de53 100644 --- a/.devcontainer/asset_list/Dockerfile +++ b/.devcontainer/asset_list/Dockerfile @@ -27,8 +27,9 @@ RUN useradd -m -s /usr/bin/bash ${USER} \ # # 4) Python deps - if you want to run assest list ENV PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1 -ADD asset_list/requirements.txt requirements.txt -RUN pip install -r requirements.txt +ADD .devcontainer/asset_list/requirements.txt requirements2.txt +ADD asset_list/requirements.txt requirements1.txt +RUN cat requirements1.txt requirements2.txt >> requirements.txt RUN pip install -r requirements.txt # 5) Workdir diff --git a/.devcontainer/asset_list/requirements.txt b/.devcontainer/asset_list/requirements.txt index 0640f2c9..fe536a81 100644 --- a/.devcontainer/asset_list/requirements.txt +++ b/.devcontainer/asset_list/requirements.txt @@ -15,10 +15,9 @@ uvicorn[standard] pytest==9.0.2 pytest-cov==7.0.0 ipykernel>=6.25,<7 -pydantic-settings<2 pyyaml>=6.0.1 -pydantic>=1.10.7,<2 sqlmodel # Formatting black==26.1.0 dotenv +pydantic-settings \ No newline at end of file diff --git a/.github/workflows/_build_image.yml b/.github/workflows/_build_image.yml index 6b6c4994..408c0319 100644 --- a/.github/workflows/_build_image.yml +++ b/.github/workflows/_build_image.yml @@ -13,6 +13,9 @@ on: required: false default: "." type: string + build_args: + required: false + type: string outputs: image_digest: @@ -29,11 +32,22 @@ on: required: true AWS_REGION: required: true + DEV_DB_HOST: + required: false + DEV_DB_PORT: + required: false + DEV_DB_NAME: + required: false jobs: build: runs-on: ubuntu-latest + env: + DEV_DB_HOST: ${{ secrets.DEV_DB_HOST }} + DEV_DB_PORT: ${{ secrets.DEV_DB_PORT }} + DEV_DB_NAME: ${{ secrets.DEV_DB_NAME }} + outputs: image_digest: ${{ steps.digest.outputs.image_digest }} ecr_repo_url: ${{ steps.repo.outputs.ecr_repo_url }} @@ -64,7 +78,22 @@ jobs: - name: Build & push image run: | IMAGE_URI="${{ steps.repo.outputs.ecr_repo_url }}:${GITHUB_SHA}" - docker build -f ${{ inputs.dockerfile_path }} -t $IMAGE_URI ${{ inputs.build_context }} + + # Writes build args and removes line breaks + BUILD_ARGS="" + while IFS= read -r line; do + # skip empty lines + [ -n "$line" ] || continue + temp=$(eval echo "$line") + BUILD_ARGS="$BUILD_ARGS --build-arg $temp" + done <<< "${{ inputs.build_args }}" + + docker build \ + -f ${{ inputs.dockerfile_path }} \ + $BUILD_ARGS \ + -t $IMAGE_URI \ + ${{ inputs.build_context }} + docker push $IMAGE_URI - name: Resolve image digest diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index 41a551c4..4ac08e41 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -16,6 +16,7 @@ jobs: id: set-stage shell: bash run: | + env BRANCH="${GITHUB_REF_NAME}" if [[ "$BRANCH" == "prod" ]]; then @@ -73,8 +74,8 @@ jobs: uses: ./.github/workflows/_build_image.yml with: ecr_repo: address2uprn-${{ needs.determine_stage.outputs.stage }} - dockerfile_path: backend/address2UPRN/Dockerfile - build_context: backend/address2UPRN + dockerfile_path: backend/address2UPRN/handler/Dockerfile + build_context: . secrets: AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} @@ -96,3 +97,76 @@ jobs: AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} AWS_REGION: ${{ secrets.DEV_AWS_REGION }} + + + # ============================================================ + # 2️⃣ Build Postcode Splitter image and Push + # ============================================================ + postcodeSplitter_image: + needs: [determine_stage, shared_terraform] + uses: ./.github/workflows/_build_image.yml + with: + ecr_repo: postcode_splitter-${{ needs.determine_stage.outputs.stage }} + dockerfile_path: backend/postcode_splitter/handler/Dockerfile + build_context: . + secrets: + AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} + AWS_REGION: ${{ secrets.DEV_AWS_REGION }} + + # ============================================================ + # 3️⃣ Deploy Postcode Splitter Lambda + # ============================================================ + postcodeSplitter_lambda: + needs: [postcodeSplitter_image, determine_stage] + uses: ./.github/workflows/_deploy_lambda.yml + with: + lambda_name: postcodeSplitter + lambda_path: infrastructure/terraform/lambda/postcodeSplitter + stage: ${{ needs.determine_stage.outputs.stage }} + ecr_repo: postcode_splitter-${{ needs.determine_stage.outputs.stage }} + image_digest: ${{ needs.postcodeSplitter_image.outputs.image_digest }} + secrets: + AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} + AWS_REGION: ${{ secrets.DEV_AWS_REGION }} + + # ============================================================ + # Condition ETL image and Push + # ============================================================ + condition_etl_image: + needs: [determine_stage, shared_terraform] + uses: ./.github/workflows/_build_image.yml + with: + ecr_repo: condition-etl-${{ needs.determine_stage.outputs.stage }} + dockerfile_path: backend/condition/handler/Dockerfile + build_context: . + build_args: | + DEV_DB_HOST=$DEV_DB_HOST + DEV_DB_PORT=$DEV_DB_PORT + DEV_DB_NAME=$DEV_DB_NAME + secrets: + AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} + AWS_REGION: ${{ secrets.DEV_AWS_REGION }} + DEV_DB_HOST: ${{ secrets.DEV_DB_HOST }} + DEV_DB_PORT: ${{ secrets.DEV_DB_PORT }} + DEV_DB_NAME: ${{ secrets.DEV_DB_NAME }} + + # ============================================================ + # Deploy Condition ETL Lambda + # ============================================================ + condition_etl_lambda: + needs: [condition_etl_image, determine_stage] + uses: ./.github/workflows/_deploy_lambda.yml + with: + lambda_name: condition-etl + lambda_path: infrastructure/terraform/lambda/condition-etl + stage: ${{ needs.determine_stage.outputs.stage }} + ecr_repo: condition-etl-${{ needs.determine_stage.outputs.stage }} + image_digest: ${{ needs.condition_etl_image.outputs.image_digest }} + secrets: + AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} + AWS_REGION: ${{ secrets.DEV_AWS_REGION }} + diff --git a/backend/.env.test b/backend/.env.test index a05c93a3..5b77f243 100644 --- a/backend/.env.test +++ b/backend/.env.test @@ -6,17 +6,17 @@ DB_PASSWORD=makingwarmerhomes #not used -GOOGLE_SOLAR_API_KEY="test" -SAP_PREDICTIONS_BUCKET="test" -CARBON_PREDICTIONS_BUCKET="test" -HEAT_PREDICTIONS_BUCKET="test" -HEATING_KWH_PREDICTIONS_BUCKET="test" -HOTWATER_KWH_PREDICTIONS_BUCKET="test" -API_KEY="test" -ENVIRONMENT="test" -SECRET_KEY="test" -PLAN_TRIGGER_BUCKET="test" -DATA_BUCKET="test" -EPC_AUTH_TOKEN="test" -ENGINE_SQS_URL="test" -ENERGY_ASSESSMENTS_BUCKET="test" \ No newline at end of file +GOOGLE_SOLAR_API_KEY=test +SAP_PREDICTIONS_BUCKET=test +CARBON_PREDICTIONS_BUCKET=test +HEAT_PREDICTIONS_BUCKET=test +HEATING_KWH_PREDICTIONS_BUCKET=test +HOTWATER_KWH_PREDICTIONS_BUCKET=test +API_KEY=test +ENVIRONMENT=test +SECRET_KEY=test +PLAN_TRIGGER_BUCKET=test +DATA_BUCKET=test +EPC_AUTH_TOKEN=test +ENGINE_SQS_URL=test +ENERGY_ASSESSMENTS_BUCKET=test \ No newline at end of file diff --git a/backend/address2UPRN/Dockerfile b/backend/address2UPRN/Dockerfile deleted file mode 100644 index ac6af2a5..00000000 --- a/backend/address2UPRN/Dockerfile +++ /dev/null @@ -1,7 +0,0 @@ -FROM public.ecr.aws/lambda/python:3.10 - -# Copy function code -COPY main.py . - -# Set the handler -CMD ["main.handler"] diff --git a/backend/address2UPRN/handler/Dockerfile b/backend/address2UPRN/handler/Dockerfile new file mode 100644 index 00000000..5a09bd44 --- /dev/null +++ b/backend/address2UPRN/handler/Dockerfile @@ -0,0 +1,23 @@ +FROM public.ecr.aws/lambda/python:3.10 + +# Set working directory (Lambda task root) +WORKDIR /var/task + +# ----------------------------- +# Copy requirements FIRST (for Docker layer caching) +# ----------------------------- +COPY backend/address2UPRN/handler/requirements.txt . + +# Install dependencies into Lambda runtime +RUN pip install --no-cache-dir -r requirements.txt + +# ----------------------------- +# Copy application code +# ----------------------------- +COPY utils/ utils/ +COPY backend/address2UPRN/main.py . + +# ----------------------------- +# Lambda handler +# ----------------------------- +CMD ["main.handler"] diff --git a/backend/address2UPRN/handler/requirements.txt b/backend/address2UPRN/handler/requirements.txt new file mode 100644 index 00000000..bc753841 --- /dev/null +++ b/backend/address2UPRN/handler/requirements.txt @@ -0,0 +1,3 @@ +epc-api-python==1.0.2 +tqdm +pandas \ No newline at end of file diff --git a/backend/address2UPRN/main.py b/backend/address2UPRN/main.py index 9d27a5ce..ba386e0a 100644 --- a/backend/address2UPRN/main.py +++ b/backend/address2UPRN/main.py @@ -212,6 +212,8 @@ def get_epc_data_with_postcode(postcode, size=500, attempt=1, max_attempts=3): method="get", params={"postcode": postcode}, ) + if not search_resp or "rows" not in search_resp: + return pd.DataFrame() results_df = pd.DataFrame(search_resp["rows"], columns=search_resp["column-names"]) @@ -298,7 +300,7 @@ def get_uprn_candidates( ) -def get_uprn(user_inputed_address: str, postcode: str): +def get_uprn(user_inputed_address: str, postcode: str, return_address=False): """ Return uprn (str) Return False if failed to find a sensible matching epc @@ -337,6 +339,8 @@ def get_uprn(user_inputed_address: str, postcode: str): if found_uprn == "": return None + if return_address: + return found_uprn, address return found_uprn diff --git a/backend/address2UPRN/script.py b/backend/address2UPRN/script.py index bd8f8017..a71b5827 100644 --- a/backend/address2UPRN/script.py +++ b/backend/address2UPRN/script.py @@ -1,17 +1,24 @@ import pandas as pd +from tqdm import tqdm +from backend.address2UPRN.main import get_uprn + +# Enable tqdm for pandas +tqdm.pandas() + +df = pd.read_excel("address2.xlsx") -# use Address 1 -junte_df = pd.read_excel("hackney_uprn_failures.xlsx") +def extract_uprn(row): + print(row["User Input"], row["Postcode"]) + result = get_uprn(row["User Input"], row["Postcode"], return_address=True) + + if result is None: + return pd.Series([None, None]) + + uprn, found_address = result + return pd.Series([uprn, found_address]) -# use domna_address_1 -khalim_df = pd.read_excel("khalim_standard.xlsx") - - -combined_df = junte_df.merge(khalim_df, how="left", left_on="Address 1", right_on='domna_address_1') - -# Find the row in khalim_df that does not app - -result = combined_df[~pd.isnull(combined_df["epc_os_uprn"])] +df[["juntes uprn", "junte found address"]] = df.progress_apply(extract_uprn, axis=1) +df.to_excel("outputs2.xlsx", index=False) diff --git a/backend/condition/condition_trigger_request.py b/backend/condition/condition_trigger_request.py new file mode 100644 index 00000000..03bd6ad1 --- /dev/null +++ b/backend/condition/condition_trigger_request.py @@ -0,0 +1,33 @@ +from enum import Enum +from typing import Optional +from pydantic import BaseModel + + +class ConditionFileType(Enum): + LBWF = "LBWF" + Peabody = "Peabody" + # TODO: make these asset management systems rather than client names + + +class ConditionTriggerRequest(BaseModel): + file_type: ConditionFileType + trigger_file_bucket: str # TODO: get this from settings + trigger_file_key: str + + uprn_lookup_file_bucket: Optional[str] = None # TODO: get this from settings + uprn_lookup_file_key: Optional[str] = None + + +# { +# "file_type": "Peabody", +# "trigger_file_bucket": "condition-data-dev", +# "trigger_file_key": "input/peabody/2026_01_06 - Peabody - Stock Condition Data - Survey Records - D Lower.xlsx", +# "uprn_lookup_file_bucket": "condition-data-dev", +# "uprn_lookup_file_key": "input/peabody/uprn-lookup/PeabodyPropertymatched_Dec25_propref_UPRN.csv" +# } + +# { +# "file_type": "LBWF", +# "trigger_file_bucket": "condition-data-dev", +# "trigger_file_key": "input/lbwf/LBWF - Example Asset Data September 2025.xlsx", +# } diff --git a/backend/condition/domain/mapping/lbwf/lbwf_mapper.py b/backend/condition/domain/mapping/lbwf/lbwf_mapper.py index 60c8b1ac..9dbfcb17 100644 --- a/backend/condition/domain/mapping/lbwf/lbwf_mapper.py +++ b/backend/condition/domain/mapping/lbwf/lbwf_mapper.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, List, Optional, Tuple +from typing import Any, Dict, Optional, Tuple from datetime import date from backend.condition.domain.aspect_condition import AspectCondition diff --git a/backend/condition/file_type.py b/backend/condition/file_type.py deleted file mode 100644 index e0736814..00000000 --- a/backend/condition/file_type.py +++ /dev/null @@ -1,16 +0,0 @@ -from enum import Enum - -class FileType(Enum): - LBWF = "lbwf" - Peabody = "peabody" - -def detect_file_type(filepath: str) -> FileType: - path = filepath.lower() - - if "lbwf" in path: - return FileType.LBWF - - if "peabody" in path: - return FileType.Peabody - - raise ValueError("Unrecognised file path") \ No newline at end of file diff --git a/backend/condition/handler.py b/backend/condition/handler.py deleted file mode 100644 index 5279b029..00000000 --- a/backend/condition/handler.py +++ /dev/null @@ -1,16 +0,0 @@ -from typing import Mapping, Any -from io import BytesIO - -from utils.logger import setup_logger -from backend.condition.processor import process_file - - -logger = setup_logger() - -def handler(event: Mapping[str, Any], context: Any) -> None: - # Temporary stub for PoC wiring - dummy_stream = BytesIO(b"") - - source_key = event.get("source_key", "unknown-source") - - process_file(dummy_stream, source_key) \ No newline at end of file diff --git a/backend/condition/handler/Dockerfile b/backend/condition/handler/Dockerfile new file mode 100644 index 00000000..031d981e --- /dev/null +++ b/backend/condition/handler/Dockerfile @@ -0,0 +1,48 @@ +FROM public.ecr.aws/lambda/python:3.11 +# For local running: +# FROM python:3.11.10-bullseye + +ARG DEV_DB_HOST +ARG DEV_DB_PORT +ARG DEV_DB_NAME + + +# Set working directory (Lambda task root) +WORKDIR /var/task + +# Environment +ENV DB_HOST=${DEV_DB_HOST} +ENV DB_PORT=${DEV_DB_PORT} +ENV DB_NAME=${DEV_DB_NAME} + +COPY backend/.env.local backend/.env.local + +# ----------------------------- +# Copy requirements FIRST (for Docker layer caching) +# ----------------------------- +COPY backend/condition/handler/requirements.txt . + +# Install dependencies into Lambda runtime +RUN pip install --no-cache-dir -r requirements.txt + +# ----------------------------- +# Copy application code +# ----------------------------- +COPY utils/ utils/ +COPY backend/condition/ backend/condition/ + +COPY backend/app/db/models/condition.py backend/app/db/models/condition.py +COPY backend/app/db/connection.py backend/app/db/connection.py +COPY backend/app/config.py backend/app/config.py + +COPY backend/__init__.py backend/__init__.py +COPY backend/app/__init__.py backend/app/__init__.py +COPY backend/app/db/__init__.py backend/app/db/__init__.py + + +# ----------------------------- +# Lambda handler +# ----------------------------- +CMD ["backend/condition/handler/handler.handler"] +# For local running +# CMD ["python", "-m", "backend.condition.handler.handler"] diff --git a/backend/condition/handler/handler.py b/backend/condition/handler/handler.py new file mode 100644 index 00000000..2f3616a4 --- /dev/null +++ b/backend/condition/handler/handler.py @@ -0,0 +1,51 @@ +import json +from typing import Mapping, Any +from io import BytesIO + +from backend.condition.condition_trigger_request import ConditionTriggerRequest +from backend.condition.lookups.uprn_lookup_s3 import UprnLookupS3 +from backend.condition.processor import process_file +from utils.logger import setup_logger +from utils.s3 import read_io_from_s3 + + +logger = setup_logger() + + +def handler(event: Mapping[str, Any], context: Any) -> None: + + for record in event.get("Records", []): + try: + body_dict = json.loads(record["body"]) + logger.debug("Validating request body") + payload = ConditionTriggerRequest.model_validate(body_dict) + + logger.debug("Successfully validated request body") + + if payload.uprn_lookup_file_bucket and payload.uprn_lookup_file_key: + logger.debug("Getting UPRN lookup file from s3") + uprn_lookup = UprnLookupS3( + bucket=payload.uprn_lookup_file_bucket, + key=payload.uprn_lookup_file_key, + ) # TODO: replace with postgres implementation + logger.debug("Successfully got UPRN lookup file from s3") + else: + uprn_lookup = None + + logger.debug("Getting conditions data from s3") + file_bytes: BytesIO = read_io_from_s3( + bucket_name=payload.trigger_file_bucket, + file_key=payload.trigger_file_key, + ) + logger.debug( + "Successfully got conditions data from s3. Moving on to process file..." + ) + + process_file( + file_stream=file_bytes, + file_type=payload.file_type, + uprn_lookup=uprn_lookup, + ) + + except Exception as e: + logger.error(f"Failed to process record: {e}") diff --git a/backend/condition/handler/requirements.txt b/backend/condition/handler/requirements.txt new file mode 100644 index 00000000..1e259a95 --- /dev/null +++ b/backend/condition/handler/requirements.txt @@ -0,0 +1,9 @@ +openpyxl +sqlmodel +pydantic-settings +psycopg2-binary==2.9.10 + +# pandas isn't used, but needed for importing from utils.s3 +pandas==2.2.2 +numpy==1.26.4 +openpyxl diff --git a/backend/condition/local_runner.py b/backend/condition/local_runner.py index e39d38c7..4595b93b 100644 --- a/backend/condition/local_runner.py +++ b/backend/condition/local_runner.py @@ -1,5 +1,7 @@ from pathlib import Path +from backend.condition.condition_trigger_request import ConditionFileType +from backend.condition.lookups.uprn_lookup_csv import UprnLookupLocal from backend.condition.processor import process_file @@ -20,15 +22,27 @@ def main() -> None: / "peabody" / "2026_01_06 - Peabody - Stock Condition Data - Survey Records - D Lower.xlsx" ) - filepaths = [lbwf_path, peabody_path] - # filepaths = [lbwf_path] + peabody_uprn_lookup_path: Path = ( + path / "peabody" / "PeabodyPropertymatched_Dec25_propref_UPRN.csv" + ) + # filepaths = [lbwf_path, peabody_path] + filepaths = [lbwf_path] # filepaths = [peabody_path] + uprn_lookup = UprnLookupLocal(csv_path=peabody_uprn_lookup_path.as_posix()) + + def get_file_type(file_path: str) -> ConditionFileType: + if "peabody" in file_path: + return ConditionFileType.Peabody + if "lbwf" in file_path: + return ConditionFileType.LBWF + for fp in filepaths: with fp.open("rb") as f: process_file( file_stream=f, - source_key=fp.as_posix(), + file_type=get_file_type(fp.as_posix()), + uprn_lookup=uprn_lookup, ) diff --git a/backend/condition/lookups/uprn_lookup.py b/backend/condition/lookups/uprn_lookup.py new file mode 100644 index 00000000..0f6e78fd --- /dev/null +++ b/backend/condition/lookups/uprn_lookup.py @@ -0,0 +1,8 @@ +from abc import ABC, abstractmethod +from typing import BinaryIO, Dict + + +class UprnLookup(ABC): + @abstractmethod + def get_property_ref_to_uprn_lookup(self) -> Dict[str, int]: + pass diff --git a/backend/condition/lookups/uprn_lookup_csv.py b/backend/condition/lookups/uprn_lookup_csv.py new file mode 100644 index 00000000..8b1c21a2 --- /dev/null +++ b/backend/condition/lookups/uprn_lookup_csv.py @@ -0,0 +1,23 @@ +import csv +from io import TextIOWrapper +from typing import BinaryIO, Dict, TextIO +from backend.condition.lookups.uprn_lookup import UprnLookup + + +class UprnLookupLocal(UprnLookup): + def __init__(self, csv_path: str): + self.csv_path = csv_path + + def get_property_ref_to_uprn_lookup(self) -> Dict[str, int]: + with open(self.csv_path, "rb") as f: + return self.parse_csv(f) + + def parse_csv(self, file_stream: BinaryIO) -> Dict[str, int]: + text_stream: TextIO = TextIOWrapper(file_stream, encoding="utf-8") + mapping: Dict[str, int] = {} + reader = csv.DictReader(text_stream) + for row in reader: + if not row["reference"] or not row["out_uprn"]: + continue + mapping[row["reference"].strip()] = int(row["out_uprn"].strip()) + return mapping diff --git a/backend/condition/lookups/uprn_lookup_s3.py b/backend/condition/lookups/uprn_lookup_s3.py new file mode 100644 index 00000000..da725a2f --- /dev/null +++ b/backend/condition/lookups/uprn_lookup_s3.py @@ -0,0 +1,29 @@ +import csv +from io import BytesIO, TextIOWrapper +from typing import BinaryIO, Dict, TextIO + +from backend.condition.lookups.uprn_lookup import UprnLookup +from utils.s3 import read_io_from_s3 + + +class UprnLookupS3(UprnLookup): + def __init__(self, bucket: str = "", key: str = ""): + self.bucket = bucket + self.key = key + + def get_property_ref_to_uprn_lookup(self) -> Dict[str, int]: + file_bytes: BytesIO = read_io_from_s3( + bucket_name=self.bucket, file_key=self.key + ) + + return self._parse_csv_bytes(file_bytes) + + def _parse_csv_bytes(self, file_stream: BinaryIO) -> Dict[str, int]: + text_stream: TextIO = TextIOWrapper(file_stream, encoding="utf-8") + mapping: Dict[str, int] = {} + reader = csv.DictReader(text_stream) + for row in reader: + if not row["reference"] or not row["out_uprn"]: + continue + mapping[row["reference"].strip()] = int(row["out_uprn"].strip()) + return mapping diff --git a/backend/condition/parsing/factory.py b/backend/condition/parsing/factory.py index 68ca0292..b5d28e18 100644 --- a/backend/condition/parsing/factory.py +++ b/backend/condition/parsing/factory.py @@ -1,27 +1,35 @@ +from typing import Optional +from backend.condition.condition_trigger_request import ConditionFileType from backend.condition.domain.mapping.lbwf.lbwf_mapper import LbwfMapper from backend.condition.domain.mapping.mapper import Mapper from backend.condition.domain.mapping.peabody.peabody_mapper import PeabodyMapper -from backend.condition.file_type import FileType +from backend.condition.lookups.uprn_lookup import UprnLookup from backend.condition.parsing.parser import Parser from backend.condition.parsing.lbwf_parser import LbwfParser from backend.condition.parsing.peabody_parser import PeabodyParser -def select_parser(file_type: FileType) -> Parser: - if file_type is FileType.LBWF: +def select_parser( + file_type: ConditionFileType, uprn_lookup: Optional[UprnLookup] = None +) -> Parser: + if file_type is ConditionFileType.LBWF: return LbwfParser() - if file_type is FileType.Peabody: - return PeabodyParser() + if file_type is ConditionFileType.Peabody: + if not uprn_lookup: + raise ValueError( + "Cannot instantiate Peabody Parser without UPRN lookup being provided" + ) + return PeabodyParser(uprn_lookup=uprn_lookup) raise ValueError("Unrecognised file type, unable to instantiate Parser") -def select_mapper(file_type: FileType) -> Mapper: - if file_type is FileType.LBWF: +def select_mapper(file_type: ConditionFileType) -> Mapper: + if file_type is ConditionFileType.LBWF: return LbwfMapper() - if file_type is FileType.Peabody: + if file_type is ConditionFileType.Peabody: return PeabodyMapper() raise ValueError("Unrecognised file type, unable to instantiate Mapper") diff --git a/backend/condition/parsing/lbwf_parser.py b/backend/condition/parsing/lbwf_parser.py index 3a23d028..a713b1ef 100644 --- a/backend/condition/parsing/lbwf_parser.py +++ b/backend/condition/parsing/lbwf_parser.py @@ -18,7 +18,6 @@ class LbwfParser(Parser): def parse( self, file_stream: BinaryIO, - location_ref_to_uprn_map: Optional[Dict[str, int]] = None, ) -> Any: wb: Workbook = load_workbook(file_stream) address_to_uprn_map: Dict[str, int] = LbwfParser._generate_address_to_uprn_dict( diff --git a/backend/condition/parsing/parser.py b/backend/condition/parsing/parser.py index 825abcd5..b160b217 100644 --- a/backend/condition/parsing/parser.py +++ b/backend/condition/parsing/parser.py @@ -8,6 +8,5 @@ class Parser(ABC): def parse( self, file_stream: BinaryIO, - location_ref_to_uprn_map: Optional[Dict[str, int]] = None, ) -> Any: pass diff --git a/backend/condition/parsing/peabody_parser.py b/backend/condition/parsing/peabody_parser.py index c53fd6d1..4620ba82 100644 --- a/backend/condition/parsing/peabody_parser.py +++ b/backend/condition/parsing/peabody_parser.py @@ -4,6 +4,7 @@ from typing import Any, BinaryIO, Dict, List, Optional, Tuple, DefaultDict from openpyxl import Workbook, load_workbook from collections import defaultdict +from backend.condition.lookups.uprn_lookup import UprnLookup from backend.condition.parsing.parser import Parser from backend.condition.parsing.records.peabody.peabody_asset_condition import ( PeabodyAssetCondition, @@ -15,42 +16,29 @@ logger = setup_logger() class PeabodyParser(Parser): + def __init__(self, uprn_lookup: UprnLookup): + self.uprn_lookup: UprnLookup = uprn_lookup # TODO: move this to the ABC? + def parse( self, file_stream: BinaryIO, - location_ref_to_uprn_map: Optional[Dict[str, int]] = None, ) -> Any: - wb: Workbook = load_workbook(file_stream) - - if location_ref_to_uprn_map is None: - location_ref_to_uprn_map: Dict[str, int] = ( - PeabodyParser._build_location_ref_to_uprn_map() - ) - + file_stream.seek(0) + logger.debug("[PeabodyParser] Loading workbook...") + wb: Workbook = load_workbook(file_stream, read_only=True, data_only=True) + logger.debug("[PeabodyParser] Successfully loaded workbook. Parsing assets...") assets = PeabodyParser._parse_assets(wb) + logger.debug( + "[PeabodyParser] Successfully parsed assets. Parsing UPRN lookup..." + ) + location_ref_to_uprn_map = self.uprn_lookup.get_property_ref_to_uprn_lookup() + logger.debug("[PeabodyParser] Successfully parsed UPRN lookup") return PeabodyParser._group_assets_into_properties( assets=assets, location_ref_to_uprn_map=location_ref_to_uprn_map, ) - @staticmethod - def _build_location_ref_to_uprn_map() -> Dict[str, int]: - location_ref_to_uprn_filepath: Path = ( - Path(__file__).resolve().parents[1] - / "sample_data" - / "peabody" - / "PeabodyPropertymatched_Dec25_propref_UPRN.csv" - ) - location_ref_to_uprn_map: Dict[str, int] = {} - - with location_ref_to_uprn_filepath.open(newline="") as f: - reader: Any = csv.DictReader(f) - for row in reader: - location_ref_to_uprn_map[row["reference"]] = int(row["out_uprn"]) - - return location_ref_to_uprn_map - @staticmethod def _parse_assets(wb: Workbook) -> List[PeabodyAssetCondition]: assets_sheet = wb["Survey Records - D & Lower"] @@ -67,7 +55,7 @@ class PeabodyParser(Parser): ) if not asset.is_block_level: # Block-level condition surveys are out of scope for now - # until we have a wider think on how to handle block + # until we have a wider think on how to handle blocks assets.append(asset) # TODO: handle block-level assets except Exception as e: @@ -92,13 +80,14 @@ class PeabodyParser(Parser): assets_by_location_reference[asset.lo_reference].append(asset) properties: List[PeabodyProperty] = [] + failed_mappings_count = 0 for location_ref, grouped_assets in assets_by_location_reference.items(): uprn = location_ref_to_uprn_map.get(location_ref) if uprn is None: - logger.warning(f"No UPRN found for Location Reference: {location_ref}") + failed_mappings_count += 1 continue properties.append( @@ -108,6 +97,7 @@ class PeabodyParser(Parser): ) ) + logger.warning(f"No UPRN found for {failed_mappings_count} Location References") return properties @staticmethod diff --git a/backend/condition/persistence/condition_postgres.py b/backend/condition/persistence/condition_postgres.py index 9d7895f0..e83df540 100644 --- a/backend/condition/persistence/condition_postgres.py +++ b/backend/condition/persistence/condition_postgres.py @@ -19,18 +19,19 @@ class ConditionPostgres: def bulk_insert_surveys( self, surveys: List[PropertyConditionSurvey], batch_size: Optional[int] = 100 ) -> None: - logger.info( - f"Preparing to load {len(surveys)} property surveys to Postgres. Mapping to SQLModel objects..." + logger.debug( + f"[ConditionPostgres] Preparing to load {len(surveys)} property surveys to Postgres. Mapping to SQLModel objects..." ) survey_models: List[PropertyConditionSurveyModel] = [ ConditionPostgres.map_survey_to_model(s) for s in surveys ] total: int = len(survey_models) - logger.info( - f"Finished mapping {total} surveys. Writing to database in batches of {batch_size}..." + logger.debug( + f"[ConditionPostgres] Finished mapping {total} surveys. Writing to database in batches of {batch_size}..." ) with db_session() as session: + logger.info("[ConditionPostgres] Successfully made connection to database") for start in range(0, total, batch_size): end = min(start + batch_size, total) batch = survey_models[start:end] diff --git a/backend/condition/processor.py b/backend/condition/processor.py index 4d8f16cf..ad5b4232 100644 --- a/backend/condition/processor.py +++ b/backend/condition/processor.py @@ -1,26 +1,31 @@ -from typing import Any, BinaryIO, List +from typing import Any, BinaryIO, List, Optional from datetime import datetime +from backend.condition.condition_trigger_request import ConditionFileType +from backend.condition.lookups.uprn_lookup import UprnLookup from utils.logger import setup_logger from backend.condition.domain.mapping.mapper import Mapper from backend.condition.domain.property_condition_survey import PropertyConditionSurvey from backend.condition.parsing.parser import Parser from backend.condition.persistence.condition_postgres import ConditionPostgres -from backend.condition.file_type import FileType, detect_file_type from backend.condition.parsing.factory import select_parser, select_mapper logger = setup_logger() -def process_file(file_stream: BinaryIO, source_key: str) -> None: - logger.info(f"[processor] Received file: {source_key}") - +def process_file( + file_stream: BinaryIO, + file_type: ConditionFileType, + uprn_lookup: Optional[UprnLookup], +) -> None: # Instantiation - file_type: FileType = detect_file_type(source_key) - parser: Parser = select_parser(file_type) + logger.debug(f"[processor] Instantiating classes...") + parser: Parser = select_parser(file_type, uprn_lookup) mapper: Mapper = select_mapper(file_type) persistence = ConditionPostgres() + logger.debug(f"[processor] Finished instantiating classes. Calling Parser...") + # Orchestration raw_properties: List[Any] = parser.parse(file_stream) diff --git a/backend/condition/tests/lookups/test_uprn_lookup_csv.py b/backend/condition/tests/lookups/test_uprn_lookup_csv.py new file mode 100644 index 00000000..d01c52c2 --- /dev/null +++ b/backend/condition/tests/lookups/test_uprn_lookup_csv.py @@ -0,0 +1,34 @@ +import pytest +from typing import Dict +from tempfile import NamedTemporaryFile + +from backend.condition.lookups.uprn_lookup_csv import UprnLookupLocal + + +@pytest.fixture +def prop_ref_uprn_csv_file() -> str: + csv_content = """reference,out_uprn + ABC123,10000000001 + DEF456,10000000002 + GHI789,10000000003 + """ + with NamedTemporaryFile(mode="w+", delete=False, suffix=".csv") as tmp: + tmp.write(csv_content) + tmp.flush() + return tmp.name + + +def test_generate_prop_ref_uprn_from_csv_file(prop_ref_uprn_csv_file: str) -> None: + # arrange + uprn_lookup = UprnLookupLocal(prop_ref_uprn_csv_file) + expected_map: Dict[str, int] = { + "ABC123": 10000000001, + "DEF456": 10000000002, + "GHI789": 10000000003, + } + + # act + actual_map: Dict[str, int] = uprn_lookup.get_property_ref_to_uprn_lookup() + + # assert + assert actual_map == expected_map diff --git a/backend/condition/tests/parsing/test_parsing_factory.py b/backend/condition/tests/parsing/test_parsing_factory.py index e2b478ff..df01eaad 100644 --- a/backend/condition/tests/parsing/test_parsing_factory.py +++ b/backend/condition/tests/parsing/test_parsing_factory.py @@ -1,11 +1,13 @@ import pytest +from backend.condition.condition_trigger_request import ConditionFileType +from backend.condition.lookups.uprn_lookup_csv import UprnLookupLocal from backend.condition.parsing.factory import select_parser -from backend.condition.file_type import FileType + def test_selects_lbwf_parser(): # arrange - file_type = FileType.LBWF + file_type = ConditionFileType.LBWF expected_class_name = "LbwfParser" # act @@ -14,13 +16,15 @@ def test_selects_lbwf_parser(): # assert assert expected_class_name == actual_class_name + def test_selects_peabody_parser(): # arrange - file_type = FileType.Peabody + file_type = ConditionFileType.Peabody expected_class_name = "PeabodyParser" + uprn_lookup = UprnLookupLocal(csv_path="test") # act - actual_class_name = select_parser(file_type).__class__.__name__ + actual_class_name = select_parser(file_type, uprn_lookup).__class__.__name__ # assert - assert expected_class_name == actual_class_name \ No newline at end of file + assert expected_class_name == actual_class_name diff --git a/backend/condition/tests/parsing/test_peabody_parser.py b/backend/condition/tests/parsing/test_peabody_parser.py index 20f7a28e..5fb42204 100644 --- a/backend/condition/tests/parsing/test_peabody_parser.py +++ b/backend/condition/tests/parsing/test_peabody_parser.py @@ -1,9 +1,11 @@ +from tempfile import NamedTemporaryFile import pytest from typing import Any, Dict from io import BytesIO from openpyxl import Workbook from datetime import datetime +from backend.condition.lookups.uprn_lookup_csv import UprnLookupLocal from backend.condition.parsing.peabody_parser import PeabodyParser from backend.condition.parsing.records.peabody.peabody_asset_condition import ( PeabodyAssetCondition, @@ -145,23 +147,28 @@ def peabody_assets_xlsx_bytes() -> BytesIO: @pytest.fixture -def location_ref_to_uprn_map() -> Dict[str, int]: - return { - "B000RAND": 1, - "B000BLOCK": 2, - "B000FAKE": 3, - "B000MIS": 4, - } +def prop_ref_uprn_csv_file() -> str: + csv_content = """reference,out_uprn + B000RAND,1 + B000BLOCK,2 + B000FAKE,3 + B000MIS,4 + """ + with NamedTemporaryFile(mode="w+", delete=False, suffix=".csv") as tmp: + tmp.write(csv_content) + tmp.flush() + return tmp.name def test_peabody_parser_parses_conditions( - peabody_assets_xlsx_bytes, location_ref_to_uprn_map + peabody_assets_xlsx_bytes, prop_ref_uprn_csv_file ): # arrange - parser = PeabodyParser() + uprn_lookup = UprnLookupLocal(csv_path=prop_ref_uprn_csv_file) + parser = PeabodyParser(uprn_lookup=uprn_lookup) # act - result: Any = parser.parse(peabody_assets_xlsx_bytes, location_ref_to_uprn_map) + result: Any = parser.parse(peabody_assets_xlsx_bytes) # assert assert len(result) == 3 diff --git a/backend/condition/tests/test_detect_file_type.py b/backend/condition/tests/test_detect_file_type.py deleted file mode 100644 index fecf22c1..00000000 --- a/backend/condition/tests/test_detect_file_type.py +++ /dev/null @@ -1,22 +0,0 @@ -import pytest - -from backend.condition.file_type import FileType, detect_file_type - -def test_detects_lbwf_file_type(): - # arrange - file_path_str = "uploads/lbwf/Exaple Asset Data.xlsx" - expected_file_type = FileType.LBWF - - # act - actual_file_type: FileType = detect_file_type(file_path_str) - - # assert - assert expected_file_type == actual_file_type - -def test_unknown_filepath_raises_value_error(): - # arrange - file_path_str = "unknown/Example Asset Data.xlsx" - - # act + assert - with pytest.raises(ValueError): - detect_file_type(file_path_str) \ No newline at end of file diff --git a/backend/postcode_splitter/hackney.xlsx b/backend/postcode_splitter/hackney.xlsx deleted file mode 100644 index 64892f3a..00000000 Binary files a/backend/postcode_splitter/hackney.xlsx and /dev/null differ diff --git a/backend/postcode_splitter/handler/Dockerfile b/backend/postcode_splitter/handler/Dockerfile new file mode 100644 index 00000000..7c1a7989 --- /dev/null +++ b/backend/postcode_splitter/handler/Dockerfile @@ -0,0 +1,9 @@ +FROM public.ecr.aws/lambda/python:3.10 + +# Set working directory (Lambda task root) +WORKDIR /var/task + +# ----------------------------- +# Lambda handler +# ----------------------------- +CMD ["main.handler"] diff --git a/backend/postcode_splitter/handler/requirements.txt b/backend/postcode_splitter/handler/requirements.txt new file mode 100644 index 00000000..e69de29b diff --git a/backend/postcode_splitter/main.py b/backend/postcode_splitter/main.py index d417c8f1..d55f618a 100644 --- a/backend/postcode_splitter/main.py +++ b/backend/postcode_splitter/main.py @@ -1,10 +1,12 @@ import pandas as pd import requests -from backend.address2UPRN.main import resolve_uprns_for_postcode_group, get_epc_data_with_postcode +from backend.address2UPRN.main import ( + resolve_uprns_for_postcode_group, + get_epc_data_with_postcode, +) from tqdm import tqdm - def sanitise_postcode(postcode: str) -> str | None: """ Normalise postcode for grouping. @@ -51,11 +53,7 @@ def main(): # --- validate AFTER grouping (save API calls) --- # Get unique, non-null postcodes - unique_postcodes = ( - df["postcode_clean"] - .dropna() - .unique() - ) + unique_postcodes = df["postcode_clean"].dropna().unique() # Validate each postcode once, TODOadd a progress bar postcode_validity = { @@ -66,7 +64,6 @@ def main(): # Map validity back onto dataframe df["postcode_valid"] = df["postcode_clean"].map(postcode_validity) - results = [] for postcode, group_df in tqdm( @@ -98,17 +95,33 @@ def main(): results.append(tmp) final_df = pd.concat(results, ignore_index=True) - a = final_df[[ - "best_match_lexiscore","Address 1", - "best_match_address", "Postcode", - "UPRN", "best_match_uprn" - ]] # add levi score to viewing - b = final_df[final_df["best_match_lexiscore"]>0] # add levi score to viewing - b = b[[ - "best_match_lexiscore","Address 1", - "best_match_address", "Postcode", - "UPRN", "best_match_uprn" - ]] + a = final_df[ + [ + "best_match_lexiscore", + "Address 1", + "best_match_address", + "Postcode", + "UPRN", + "best_match_uprn", + ] + ] # add levi score to viewing + b = final_df[final_df["best_match_lexiscore"] > 0] # add levi score to viewing + b = b[ + [ + "best_match_lexiscore", + "Address 1", + "best_match_address", + "Postcode", + "UPRN", + "best_match_uprn", + ] + ] + + +def handler(event, context): + print("hello Postcode splitter world") + return {"statusCode": 200, "body": "hello world"} + if __name__ == "__main__": main() diff --git a/infrastructure/terraform/lambda/condition-etl/main.tf b/infrastructure/terraform/lambda/condition-etl/main.tf new file mode 100644 index 00000000..4219f209 --- /dev/null +++ b/infrastructure/terraform/lambda/condition-etl/main.tf @@ -0,0 +1,43 @@ +data "aws_secretsmanager_secret_version" "db_credentials" { + secret_id = "${var.stage}/assessment_model/db_credentials" +} + +data "terraform_remote_state" "shared" { + backend = "s3" + config = { + bucket = "assessment-model-terraform-state" + key = "env:/${var.stage}/terraform.tfstate" # TODO: dont hardcode this + region = "eu-west-2" + } +} + +locals { + db_credentials = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string) +} + + +module "lambda" { + source = "../modules/lambda_with_sqs" + + name = "condition-etl" + stage = var.stage + + image_uri = local.image_uri + timeout = 180 + + + environment = merge( + { + STAGE = var.stage + LOG_LEVEL = "info" + DB_USERNAME = local.db_credentials.db_assessment_model_username + DB_PASSWORD = local.db_credentials.db_assessment_model_password + }, + ) + +} + +resource "aws_iam_role_policy_attachment" "attach_condition_etl_s3_read" { + role = module.lambda.role_name + policy_arn = data.terraform_remote_state.shared.outputs.condition_etl_s3_read_arn +} \ No newline at end of file diff --git a/infrastructure/terraform/lambda/condition-etl/provider.tf b/infrastructure/terraform/lambda/condition-etl/provider.tf new file mode 100644 index 00000000..c633d238 --- /dev/null +++ b/infrastructure/terraform/lambda/condition-etl/provider.tf @@ -0,0 +1,16 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 4.16" + } + } + + backend "s3" { + bucket = "condition-etl-terraform-state" + key = "terraform.tfstate" + region = "eu-west-2" + } + + required_version = ">= 1.2.0" +} \ No newline at end of file diff --git a/infrastructure/terraform/lambda/condition-etl/variables.tf b/infrastructure/terraform/lambda/condition-etl/variables.tf new file mode 100644 index 00000000..e4bab243 --- /dev/null +++ b/infrastructure/terraform/lambda/condition-etl/variables.tf @@ -0,0 +1,27 @@ +variable "lambda_name" { + type = string + description = "Logical name of the lambda (e.g. address2uprn)" +} + +variable "stage" { + description = "Deployment stage (e.g. dev, prod)" + type = string +} +variable "ecr_repo_url" { + type = string + description = "ECR repository URL (no tag, no digest)" +} + +variable "image_digest" { + type = string + description = "Image digest (sha256:...)" +} + + +locals { + image_uri = "${var.ecr_repo_url}@${var.image_digest}" +} + +output "resolved_image_uri" { + value = local.image_uri +} diff --git a/infrastructure/terraform/lambda/modules/lambda_with_sqs/main.tf b/infrastructure/terraform/lambda/modules/lambda_with_sqs/main.tf index 3816c206..065fb790 100644 --- a/infrastructure/terraform/lambda/modules/lambda_with_sqs/main.tf +++ b/infrastructure/terraform/lambda/modules/lambda_with_sqs/main.tf @@ -6,6 +6,10 @@ module "role" { name = "${var.name}-lambda-${var.stage}" } +output "role_name" { + value = module.role.role_name +} + ############################################ # SQS queue + DLQ ############################################ diff --git a/infrastructure/terraform/lambda/postcodeSplitter/main.tf b/infrastructure/terraform/lambda/postcodeSplitter/main.tf new file mode 100644 index 00000000..ebbdbfdc --- /dev/null +++ b/infrastructure/terraform/lambda/postcodeSplitter/main.tf @@ -0,0 +1,14 @@ +module "lambda" { + source = "../modules/lambda_with_sqs" + + name = "postcode-splitter" + stage = var.stage + + image_uri = local.image_uri + + + environment = { + STAGE = var.stage + LOG_LEVEL = "info" + } +} diff --git a/infrastructure/terraform/lambda/postcodeSplitter/provider.tf b/infrastructure/terraform/lambda/postcodeSplitter/provider.tf new file mode 100644 index 00000000..dbe323f2 --- /dev/null +++ b/infrastructure/terraform/lambda/postcodeSplitter/provider.tf @@ -0,0 +1,16 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 4.16" + } + } + + backend "s3" { + bucket = "postcode-splitter-terraform-state" + key = "terraform.tfstate" + region = "eu-west-2" + } + + required_version = ">= 1.2.0" +} \ No newline at end of file diff --git a/infrastructure/terraform/lambda/postcodeSplitter/variables.tf b/infrastructure/terraform/lambda/postcodeSplitter/variables.tf new file mode 100644 index 00000000..9ce45fa5 --- /dev/null +++ b/infrastructure/terraform/lambda/postcodeSplitter/variables.tf @@ -0,0 +1,26 @@ +variable "lambda_name" { + type = string + description = "Logical name of the lambda (e.g. address2uprn)" +} + +variable "stage" { + description = "Deployment stage (e.g. dev, prod)" + type = string +} +variable "ecr_repo_url" { + type = string + description = "ECR repository URL (no tag, no digest)" +} + +variable "image_digest" { + type = string + description = "Image digest (sha256:...)" +} + +locals { + image_uri = "${var.ecr_repo_url}@${var.image_digest}" +} + +output "resolved_image_uri" { + value = local.image_uri +} diff --git a/infrastructure/terraform/shared/main.tf b/infrastructure/terraform/shared/main.tf index a19c4e21..b1474055 100644 --- a/infrastructure/terraform/shared/main.tf +++ b/infrastructure/terraform/shared/main.tf @@ -298,10 +298,6 @@ module "address2uprn_state_bucket" { } -output "address2uprn_state_bucket_name" { - value = module.address2uprn_state_bucket.bucket_name -} - module "address2uprn_registry" { source = "../modules/container_registry" name = "address2uprn" @@ -309,6 +305,62 @@ module "address2uprn_registry" { } -output "address2uprn_repository_url" { - value = module.address2uprn_registry.repository_url +################################################ +# Condition ETL – Lambda ECR +################################################ +module "condition_etl_state_bucket" { + source = "../modules/tf_state_bucket" + bucket_name = "condition-etl-terraform-state" + +} + +module "condition_etl_registry" { + source = "../modules/container_registry" + name = "condition-etl" + stage = var.stage + +} + +################################################ +# Postcode Splitter – Lambda ECR +################################################ +module "postcode_splitter_state_bucket" { + source = "../modules/tf_state_bucket" + bucket_name = "postcode-splitter-terraform-state" + +} + +module "postcode_splitter_registry" { + source = "../modules/container_registry" + name = "postcode_splitter" + stage = var.stage + +} + +################################################ +# Conidition data – S3 bucket +################################################ +module "condition_data_bucket" { + source = "../modules/s3" + bucketname = "condition-data-${var.stage}" + allowed_origins = var.allowed_origins +} + +resource "aws_iam_policy" "condition_etl_s3_read" { + name = "ConditionETLReadS3" + description = "Allow Lambda to read objects from condition-data-${var.stage}" + policy = jsonencode({ + Version = "2012-10-17" + Statement = [ + { + Effect = "Allow" + Action = ["s3:GetObject"] + Resource = "arn:aws:s3:::condition-data-${var.stage}/*" + } + ] + }) +} + +output "condition_etl_s3_read_arn" { + value = aws_iam_policy.condition_etl_s3_read.arn } \ No newline at end of file diff --git a/sfr/principal_pitch/2_export_data.py b/sfr/principal_pitch/2_export_data.py index ae807654..a65509d5 100644 --- a/sfr/principal_pitch/2_export_data.py +++ b/sfr/principal_pitch/2_export_data.py @@ -2,6 +2,10 @@ This script prepares the data for the financial model """ +from dotenv import load_dotenv + +load_dotenv(".env.local") + import pandas as pd import numpy as np from backend.app.utils import sap_to_epc @@ -24,12 +28,12 @@ from sqlalchemy import func # PORTFOLIO_ID = 206 # SCENARIOS = [389] -PORTFOLIO_ID = 502 # Peabody +PORTFOLIO_ID = 524 SCENARIOS = [ - 986, + 1009, ] scenario_names = { - 986: "EPC C", + 1009: "EPC C; Most Economic", }