mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
Merge branch 'main' of https://github.com/Hestia-Homes/Model into debugging-ara-runs
This commit is contained in:
commit
50256d4cff
44 changed files with 754 additions and 189 deletions
|
|
@ -27,8 +27,9 @@ RUN useradd -m -s /usr/bin/bash ${USER} \
|
|||
|
||||
# # 4) Python deps - if you want to run assest list
|
||||
ENV PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1
|
||||
ADD asset_list/requirements.txt requirements.txt
|
||||
RUN pip install -r requirements.txt
|
||||
ADD .devcontainer/asset_list/requirements.txt requirements2.txt
|
||||
ADD asset_list/requirements.txt requirements1.txt
|
||||
RUN cat requirements1.txt requirements2.txt >> requirements.txt
|
||||
|
||||
RUN pip install -r requirements.txt
|
||||
# 5) Workdir
|
||||
|
|
|
|||
|
|
@ -15,10 +15,9 @@ uvicorn[standard]
|
|||
pytest==9.0.2
|
||||
pytest-cov==7.0.0
|
||||
ipykernel>=6.25,<7
|
||||
pydantic-settings<2
|
||||
pyyaml>=6.0.1
|
||||
pydantic>=1.10.7,<2
|
||||
sqlmodel
|
||||
# Formatting
|
||||
black==26.1.0
|
||||
dotenv
|
||||
pydantic-settings
|
||||
31
.github/workflows/_build_image.yml
vendored
31
.github/workflows/_build_image.yml
vendored
|
|
@ -13,6 +13,9 @@ on:
|
|||
required: false
|
||||
default: "."
|
||||
type: string
|
||||
build_args:
|
||||
required: false
|
||||
type: string
|
||||
|
||||
outputs:
|
||||
image_digest:
|
||||
|
|
@ -29,11 +32,22 @@ on:
|
|||
required: true
|
||||
AWS_REGION:
|
||||
required: true
|
||||
DEV_DB_HOST:
|
||||
required: false
|
||||
DEV_DB_PORT:
|
||||
required: false
|
||||
DEV_DB_NAME:
|
||||
required: false
|
||||
|
||||
jobs:
|
||||
build:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
env:
|
||||
DEV_DB_HOST: ${{ secrets.DEV_DB_HOST }}
|
||||
DEV_DB_PORT: ${{ secrets.DEV_DB_PORT }}
|
||||
DEV_DB_NAME: ${{ secrets.DEV_DB_NAME }}
|
||||
|
||||
outputs:
|
||||
image_digest: ${{ steps.digest.outputs.image_digest }}
|
||||
ecr_repo_url: ${{ steps.repo.outputs.ecr_repo_url }}
|
||||
|
|
@ -64,7 +78,22 @@ jobs:
|
|||
- name: Build & push image
|
||||
run: |
|
||||
IMAGE_URI="${{ steps.repo.outputs.ecr_repo_url }}:${GITHUB_SHA}"
|
||||
docker build -f ${{ inputs.dockerfile_path }} -t $IMAGE_URI ${{ inputs.build_context }}
|
||||
|
||||
# Writes build args and removes line breaks
|
||||
BUILD_ARGS=""
|
||||
while IFS= read -r line; do
|
||||
# skip empty lines
|
||||
[ -n "$line" ] || continue
|
||||
temp=$(eval echo "$line")
|
||||
BUILD_ARGS="$BUILD_ARGS --build-arg $temp"
|
||||
done <<< "${{ inputs.build_args }}"
|
||||
|
||||
docker build \
|
||||
-f ${{ inputs.dockerfile_path }} \
|
||||
$BUILD_ARGS \
|
||||
-t $IMAGE_URI \
|
||||
${{ inputs.build_context }}
|
||||
|
||||
docker push $IMAGE_URI
|
||||
|
||||
- name: Resolve image digest
|
||||
|
|
|
|||
78
.github/workflows/deploy_terraform.yml
vendored
78
.github/workflows/deploy_terraform.yml
vendored
|
|
@ -16,6 +16,7 @@ jobs:
|
|||
id: set-stage
|
||||
shell: bash
|
||||
run: |
|
||||
env
|
||||
BRANCH="${GITHUB_REF_NAME}"
|
||||
|
||||
if [[ "$BRANCH" == "prod" ]]; then
|
||||
|
|
@ -73,8 +74,8 @@ jobs:
|
|||
uses: ./.github/workflows/_build_image.yml
|
||||
with:
|
||||
ecr_repo: address2uprn-${{ needs.determine_stage.outputs.stage }}
|
||||
dockerfile_path: backend/address2UPRN/Dockerfile
|
||||
build_context: backend/address2UPRN
|
||||
dockerfile_path: backend/address2UPRN/handler/Dockerfile
|
||||
build_context: .
|
||||
secrets:
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
|
||||
|
|
@ -96,3 +97,76 @@ jobs:
|
|||
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
|
||||
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 2️⃣ Build Postcode Splitter image and Push
|
||||
# ============================================================
|
||||
postcodeSplitter_image:
|
||||
needs: [determine_stage, shared_terraform]
|
||||
uses: ./.github/workflows/_build_image.yml
|
||||
with:
|
||||
ecr_repo: postcode_splitter-${{ needs.determine_stage.outputs.stage }}
|
||||
dockerfile_path: backend/postcode_splitter/handler/Dockerfile
|
||||
build_context: .
|
||||
secrets:
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
|
||||
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
|
||||
|
||||
# ============================================================
|
||||
# 3️⃣ Deploy Postcode Splitter Lambda
|
||||
# ============================================================
|
||||
postcodeSplitter_lambda:
|
||||
needs: [postcodeSplitter_image, determine_stage]
|
||||
uses: ./.github/workflows/_deploy_lambda.yml
|
||||
with:
|
||||
lambda_name: postcodeSplitter
|
||||
lambda_path: infrastructure/terraform/lambda/postcodeSplitter
|
||||
stage: ${{ needs.determine_stage.outputs.stage }}
|
||||
ecr_repo: postcode_splitter-${{ needs.determine_stage.outputs.stage }}
|
||||
image_digest: ${{ needs.postcodeSplitter_image.outputs.image_digest }}
|
||||
secrets:
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
|
||||
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
|
||||
|
||||
# ============================================================
|
||||
# Condition ETL image and Push
|
||||
# ============================================================
|
||||
condition_etl_image:
|
||||
needs: [determine_stage, shared_terraform]
|
||||
uses: ./.github/workflows/_build_image.yml
|
||||
with:
|
||||
ecr_repo: condition-etl-${{ needs.determine_stage.outputs.stage }}
|
||||
dockerfile_path: backend/condition/handler/Dockerfile
|
||||
build_context: .
|
||||
build_args: |
|
||||
DEV_DB_HOST=$DEV_DB_HOST
|
||||
DEV_DB_PORT=$DEV_DB_PORT
|
||||
DEV_DB_NAME=$DEV_DB_NAME
|
||||
secrets:
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
|
||||
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
|
||||
DEV_DB_HOST: ${{ secrets.DEV_DB_HOST }}
|
||||
DEV_DB_PORT: ${{ secrets.DEV_DB_PORT }}
|
||||
DEV_DB_NAME: ${{ secrets.DEV_DB_NAME }}
|
||||
|
||||
# ============================================================
|
||||
# Deploy Condition ETL Lambda
|
||||
# ============================================================
|
||||
condition_etl_lambda:
|
||||
needs: [condition_etl_image, determine_stage]
|
||||
uses: ./.github/workflows/_deploy_lambda.yml
|
||||
with:
|
||||
lambda_name: condition-etl
|
||||
lambda_path: infrastructure/terraform/lambda/condition-etl
|
||||
stage: ${{ needs.determine_stage.outputs.stage }}
|
||||
ecr_repo: condition-etl-${{ needs.determine_stage.outputs.stage }}
|
||||
image_digest: ${{ needs.condition_etl_image.outputs.image_digest }}
|
||||
secrets:
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
|
||||
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
|
||||
|
||||
|
|
|
|||
|
|
@ -6,17 +6,17 @@ DB_PASSWORD=makingwarmerhomes
|
|||
|
||||
|
||||
#not used
|
||||
GOOGLE_SOLAR_API_KEY="test"
|
||||
SAP_PREDICTIONS_BUCKET="test"
|
||||
CARBON_PREDICTIONS_BUCKET="test"
|
||||
HEAT_PREDICTIONS_BUCKET="test"
|
||||
HEATING_KWH_PREDICTIONS_BUCKET="test"
|
||||
HOTWATER_KWH_PREDICTIONS_BUCKET="test"
|
||||
API_KEY="test"
|
||||
ENVIRONMENT="test"
|
||||
SECRET_KEY="test"
|
||||
PLAN_TRIGGER_BUCKET="test"
|
||||
DATA_BUCKET="test"
|
||||
EPC_AUTH_TOKEN="test"
|
||||
ENGINE_SQS_URL="test"
|
||||
ENERGY_ASSESSMENTS_BUCKET="test"
|
||||
GOOGLE_SOLAR_API_KEY=test
|
||||
SAP_PREDICTIONS_BUCKET=test
|
||||
CARBON_PREDICTIONS_BUCKET=test
|
||||
HEAT_PREDICTIONS_BUCKET=test
|
||||
HEATING_KWH_PREDICTIONS_BUCKET=test
|
||||
HOTWATER_KWH_PREDICTIONS_BUCKET=test
|
||||
API_KEY=test
|
||||
ENVIRONMENT=test
|
||||
SECRET_KEY=test
|
||||
PLAN_TRIGGER_BUCKET=test
|
||||
DATA_BUCKET=test
|
||||
EPC_AUTH_TOKEN=test
|
||||
ENGINE_SQS_URL=test
|
||||
ENERGY_ASSESSMENTS_BUCKET=test
|
||||
|
|
@ -1,7 +0,0 @@
|
|||
FROM public.ecr.aws/lambda/python:3.10
|
||||
|
||||
# Copy function code
|
||||
COPY main.py .
|
||||
|
||||
# Set the handler
|
||||
CMD ["main.handler"]
|
||||
23
backend/address2UPRN/handler/Dockerfile
Normal file
23
backend/address2UPRN/handler/Dockerfile
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
FROM public.ecr.aws/lambda/python:3.10
|
||||
|
||||
# Set working directory (Lambda task root)
|
||||
WORKDIR /var/task
|
||||
|
||||
# -----------------------------
|
||||
# Copy requirements FIRST (for Docker layer caching)
|
||||
# -----------------------------
|
||||
COPY backend/address2UPRN/handler/requirements.txt .
|
||||
|
||||
# Install dependencies into Lambda runtime
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# -----------------------------
|
||||
# Copy application code
|
||||
# -----------------------------
|
||||
COPY utils/ utils/
|
||||
COPY backend/address2UPRN/main.py .
|
||||
|
||||
# -----------------------------
|
||||
# Lambda handler
|
||||
# -----------------------------
|
||||
CMD ["main.handler"]
|
||||
3
backend/address2UPRN/handler/requirements.txt
Normal file
3
backend/address2UPRN/handler/requirements.txt
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
epc-api-python==1.0.2
|
||||
tqdm
|
||||
pandas
|
||||
|
|
@ -212,6 +212,8 @@ def get_epc_data_with_postcode(postcode, size=500, attempt=1, max_attempts=3):
|
|||
method="get",
|
||||
params={"postcode": postcode},
|
||||
)
|
||||
if not search_resp or "rows" not in search_resp:
|
||||
return pd.DataFrame()
|
||||
|
||||
results_df = pd.DataFrame(search_resp["rows"], columns=search_resp["column-names"])
|
||||
|
||||
|
|
@ -298,7 +300,7 @@ def get_uprn_candidates(
|
|||
)
|
||||
|
||||
|
||||
def get_uprn(user_inputed_address: str, postcode: str):
|
||||
def get_uprn(user_inputed_address: str, postcode: str, return_address=False):
|
||||
"""
|
||||
Return uprn (str)
|
||||
Return False if failed to find a sensible matching epc
|
||||
|
|
@ -337,6 +339,8 @@ def get_uprn(user_inputed_address: str, postcode: str):
|
|||
if found_uprn == "":
|
||||
return None
|
||||
|
||||
if return_address:
|
||||
return found_uprn, address
|
||||
return found_uprn
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,17 +1,24 @@
|
|||
import pandas as pd
|
||||
from tqdm import tqdm
|
||||
from backend.address2UPRN.main import get_uprn
|
||||
|
||||
# Enable tqdm for pandas
|
||||
tqdm.pandas()
|
||||
|
||||
df = pd.read_excel("address2.xlsx")
|
||||
|
||||
|
||||
# use Address 1
|
||||
junte_df = pd.read_excel("hackney_uprn_failures.xlsx")
|
||||
def extract_uprn(row):
|
||||
print(row["User Input"], row["Postcode"])
|
||||
result = get_uprn(row["User Input"], row["Postcode"], return_address=True)
|
||||
|
||||
if result is None:
|
||||
return pd.Series([None, None])
|
||||
|
||||
uprn, found_address = result
|
||||
return pd.Series([uprn, found_address])
|
||||
|
||||
|
||||
# use domna_address_1
|
||||
khalim_df = pd.read_excel("khalim_standard.xlsx")
|
||||
|
||||
|
||||
combined_df = junte_df.merge(khalim_df, how="left", left_on="Address 1", right_on='domna_address_1')
|
||||
|
||||
# Find the row in khalim_df that does not app
|
||||
|
||||
result = combined_df[~pd.isnull(combined_df["epc_os_uprn"])]
|
||||
df[["juntes uprn", "junte found address"]] = df.progress_apply(extract_uprn, axis=1)
|
||||
|
||||
df.to_excel("outputs2.xlsx", index=False)
|
||||
|
|
|
|||
33
backend/condition/condition_trigger_request.py
Normal file
33
backend/condition/condition_trigger_request.py
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
from enum import Enum
|
||||
from typing import Optional
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class ConditionFileType(Enum):
|
||||
LBWF = "LBWF"
|
||||
Peabody = "Peabody"
|
||||
# TODO: make these asset management systems rather than client names
|
||||
|
||||
|
||||
class ConditionTriggerRequest(BaseModel):
|
||||
file_type: ConditionFileType
|
||||
trigger_file_bucket: str # TODO: get this from settings
|
||||
trigger_file_key: str
|
||||
|
||||
uprn_lookup_file_bucket: Optional[str] = None # TODO: get this from settings
|
||||
uprn_lookup_file_key: Optional[str] = None
|
||||
|
||||
|
||||
# {
|
||||
# "file_type": "Peabody",
|
||||
# "trigger_file_bucket": "condition-data-dev",
|
||||
# "trigger_file_key": "input/peabody/2026_01_06 - Peabody - Stock Condition Data - Survey Records - D Lower.xlsx",
|
||||
# "uprn_lookup_file_bucket": "condition-data-dev",
|
||||
# "uprn_lookup_file_key": "input/peabody/uprn-lookup/PeabodyPropertymatched_Dec25_propref_UPRN.csv"
|
||||
# }
|
||||
|
||||
# {
|
||||
# "file_type": "LBWF",
|
||||
# "trigger_file_bucket": "condition-data-dev",
|
||||
# "trigger_file_key": "input/lbwf/LBWF - Example Asset Data September 2025.xlsx",
|
||||
# }
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
from typing import Any, Dict, List, Optional, Tuple
|
||||
from typing import Any, Dict, Optional, Tuple
|
||||
from datetime import date
|
||||
|
||||
from backend.condition.domain.aspect_condition import AspectCondition
|
||||
|
|
|
|||
|
|
@ -1,16 +0,0 @@
|
|||
from enum import Enum
|
||||
|
||||
class FileType(Enum):
|
||||
LBWF = "lbwf"
|
||||
Peabody = "peabody"
|
||||
|
||||
def detect_file_type(filepath: str) -> FileType:
|
||||
path = filepath.lower()
|
||||
|
||||
if "lbwf" in path:
|
||||
return FileType.LBWF
|
||||
|
||||
if "peabody" in path:
|
||||
return FileType.Peabody
|
||||
|
||||
raise ValueError("Unrecognised file path")
|
||||
|
|
@ -1,16 +0,0 @@
|
|||
from typing import Mapping, Any
|
||||
from io import BytesIO
|
||||
|
||||
from utils.logger import setup_logger
|
||||
from backend.condition.processor import process_file
|
||||
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
def handler(event: Mapping[str, Any], context: Any) -> None:
|
||||
# Temporary stub for PoC wiring
|
||||
dummy_stream = BytesIO(b"")
|
||||
|
||||
source_key = event.get("source_key", "unknown-source")
|
||||
|
||||
process_file(dummy_stream, source_key)
|
||||
48
backend/condition/handler/Dockerfile
Normal file
48
backend/condition/handler/Dockerfile
Normal file
|
|
@ -0,0 +1,48 @@
|
|||
FROM public.ecr.aws/lambda/python:3.11
|
||||
# For local running:
|
||||
# FROM python:3.11.10-bullseye
|
||||
|
||||
ARG DEV_DB_HOST
|
||||
ARG DEV_DB_PORT
|
||||
ARG DEV_DB_NAME
|
||||
|
||||
|
||||
# Set working directory (Lambda task root)
|
||||
WORKDIR /var/task
|
||||
|
||||
# Environment
|
||||
ENV DB_HOST=${DEV_DB_HOST}
|
||||
ENV DB_PORT=${DEV_DB_PORT}
|
||||
ENV DB_NAME=${DEV_DB_NAME}
|
||||
|
||||
COPY backend/.env.local backend/.env.local
|
||||
|
||||
# -----------------------------
|
||||
# Copy requirements FIRST (for Docker layer caching)
|
||||
# -----------------------------
|
||||
COPY backend/condition/handler/requirements.txt .
|
||||
|
||||
# Install dependencies into Lambda runtime
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# -----------------------------
|
||||
# Copy application code
|
||||
# -----------------------------
|
||||
COPY utils/ utils/
|
||||
COPY backend/condition/ backend/condition/
|
||||
|
||||
COPY backend/app/db/models/condition.py backend/app/db/models/condition.py
|
||||
COPY backend/app/db/connection.py backend/app/db/connection.py
|
||||
COPY backend/app/config.py backend/app/config.py
|
||||
|
||||
COPY backend/__init__.py backend/__init__.py
|
||||
COPY backend/app/__init__.py backend/app/__init__.py
|
||||
COPY backend/app/db/__init__.py backend/app/db/__init__.py
|
||||
|
||||
|
||||
# -----------------------------
|
||||
# Lambda handler
|
||||
# -----------------------------
|
||||
CMD ["backend/condition/handler/handler.handler"]
|
||||
# For local running
|
||||
# CMD ["python", "-m", "backend.condition.handler.handler"]
|
||||
51
backend/condition/handler/handler.py
Normal file
51
backend/condition/handler/handler.py
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
import json
|
||||
from typing import Mapping, Any
|
||||
from io import BytesIO
|
||||
|
||||
from backend.condition.condition_trigger_request import ConditionTriggerRequest
|
||||
from backend.condition.lookups.uprn_lookup_s3 import UprnLookupS3
|
||||
from backend.condition.processor import process_file
|
||||
from utils.logger import setup_logger
|
||||
from utils.s3 import read_io_from_s3
|
||||
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
def handler(event: Mapping[str, Any], context: Any) -> None:
|
||||
|
||||
for record in event.get("Records", []):
|
||||
try:
|
||||
body_dict = json.loads(record["body"])
|
||||
logger.debug("Validating request body")
|
||||
payload = ConditionTriggerRequest.model_validate(body_dict)
|
||||
|
||||
logger.debug("Successfully validated request body")
|
||||
|
||||
if payload.uprn_lookup_file_bucket and payload.uprn_lookup_file_key:
|
||||
logger.debug("Getting UPRN lookup file from s3")
|
||||
uprn_lookup = UprnLookupS3(
|
||||
bucket=payload.uprn_lookup_file_bucket,
|
||||
key=payload.uprn_lookup_file_key,
|
||||
) # TODO: replace with postgres implementation
|
||||
logger.debug("Successfully got UPRN lookup file from s3")
|
||||
else:
|
||||
uprn_lookup = None
|
||||
|
||||
logger.debug("Getting conditions data from s3")
|
||||
file_bytes: BytesIO = read_io_from_s3(
|
||||
bucket_name=payload.trigger_file_bucket,
|
||||
file_key=payload.trigger_file_key,
|
||||
)
|
||||
logger.debug(
|
||||
"Successfully got conditions data from s3. Moving on to process file..."
|
||||
)
|
||||
|
||||
process_file(
|
||||
file_stream=file_bytes,
|
||||
file_type=payload.file_type,
|
||||
uprn_lookup=uprn_lookup,
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to process record: {e}")
|
||||
9
backend/condition/handler/requirements.txt
Normal file
9
backend/condition/handler/requirements.txt
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
openpyxl
|
||||
sqlmodel
|
||||
pydantic-settings
|
||||
psycopg2-binary==2.9.10
|
||||
|
||||
# pandas isn't used, but needed for importing from utils.s3
|
||||
pandas==2.2.2
|
||||
numpy==1.26.4
|
||||
openpyxl
|
||||
|
|
@ -1,5 +1,7 @@
|
|||
from pathlib import Path
|
||||
|
||||
from backend.condition.condition_trigger_request import ConditionFileType
|
||||
from backend.condition.lookups.uprn_lookup_csv import UprnLookupLocal
|
||||
from backend.condition.processor import process_file
|
||||
|
||||
|
||||
|
|
@ -20,15 +22,27 @@ def main() -> None:
|
|||
/ "peabody"
|
||||
/ "2026_01_06 - Peabody - Stock Condition Data - Survey Records - D Lower.xlsx"
|
||||
)
|
||||
filepaths = [lbwf_path, peabody_path]
|
||||
# filepaths = [lbwf_path]
|
||||
peabody_uprn_lookup_path: Path = (
|
||||
path / "peabody" / "PeabodyPropertymatched_Dec25_propref_UPRN.csv"
|
||||
)
|
||||
# filepaths = [lbwf_path, peabody_path]
|
||||
filepaths = [lbwf_path]
|
||||
# filepaths = [peabody_path]
|
||||
|
||||
uprn_lookup = UprnLookupLocal(csv_path=peabody_uprn_lookup_path.as_posix())
|
||||
|
||||
def get_file_type(file_path: str) -> ConditionFileType:
|
||||
if "peabody" in file_path:
|
||||
return ConditionFileType.Peabody
|
||||
if "lbwf" in file_path:
|
||||
return ConditionFileType.LBWF
|
||||
|
||||
for fp in filepaths:
|
||||
with fp.open("rb") as f:
|
||||
process_file(
|
||||
file_stream=f,
|
||||
source_key=fp.as_posix(),
|
||||
file_type=get_file_type(fp.as_posix()),
|
||||
uprn_lookup=uprn_lookup,
|
||||
)
|
||||
|
||||
|
||||
|
|
|
|||
8
backend/condition/lookups/uprn_lookup.py
Normal file
8
backend/condition/lookups/uprn_lookup.py
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
from abc import ABC, abstractmethod
|
||||
from typing import BinaryIO, Dict
|
||||
|
||||
|
||||
class UprnLookup(ABC):
|
||||
@abstractmethod
|
||||
def get_property_ref_to_uprn_lookup(self) -> Dict[str, int]:
|
||||
pass
|
||||
23
backend/condition/lookups/uprn_lookup_csv.py
Normal file
23
backend/condition/lookups/uprn_lookup_csv.py
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
import csv
|
||||
from io import TextIOWrapper
|
||||
from typing import BinaryIO, Dict, TextIO
|
||||
from backend.condition.lookups.uprn_lookup import UprnLookup
|
||||
|
||||
|
||||
class UprnLookupLocal(UprnLookup):
|
||||
def __init__(self, csv_path: str):
|
||||
self.csv_path = csv_path
|
||||
|
||||
def get_property_ref_to_uprn_lookup(self) -> Dict[str, int]:
|
||||
with open(self.csv_path, "rb") as f:
|
||||
return self.parse_csv(f)
|
||||
|
||||
def parse_csv(self, file_stream: BinaryIO) -> Dict[str, int]:
|
||||
text_stream: TextIO = TextIOWrapper(file_stream, encoding="utf-8")
|
||||
mapping: Dict[str, int] = {}
|
||||
reader = csv.DictReader(text_stream)
|
||||
for row in reader:
|
||||
if not row["reference"] or not row["out_uprn"]:
|
||||
continue
|
||||
mapping[row["reference"].strip()] = int(row["out_uprn"].strip())
|
||||
return mapping
|
||||
29
backend/condition/lookups/uprn_lookup_s3.py
Normal file
29
backend/condition/lookups/uprn_lookup_s3.py
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
import csv
|
||||
from io import BytesIO, TextIOWrapper
|
||||
from typing import BinaryIO, Dict, TextIO
|
||||
|
||||
from backend.condition.lookups.uprn_lookup import UprnLookup
|
||||
from utils.s3 import read_io_from_s3
|
||||
|
||||
|
||||
class UprnLookupS3(UprnLookup):
|
||||
def __init__(self, bucket: str = "", key: str = ""):
|
||||
self.bucket = bucket
|
||||
self.key = key
|
||||
|
||||
def get_property_ref_to_uprn_lookup(self) -> Dict[str, int]:
|
||||
file_bytes: BytesIO = read_io_from_s3(
|
||||
bucket_name=self.bucket, file_key=self.key
|
||||
)
|
||||
|
||||
return self._parse_csv_bytes(file_bytes)
|
||||
|
||||
def _parse_csv_bytes(self, file_stream: BinaryIO) -> Dict[str, int]:
|
||||
text_stream: TextIO = TextIOWrapper(file_stream, encoding="utf-8")
|
||||
mapping: Dict[str, int] = {}
|
||||
reader = csv.DictReader(text_stream)
|
||||
for row in reader:
|
||||
if not row["reference"] or not row["out_uprn"]:
|
||||
continue
|
||||
mapping[row["reference"].strip()] = int(row["out_uprn"].strip())
|
||||
return mapping
|
||||
|
|
@ -1,27 +1,35 @@
|
|||
from typing import Optional
|
||||
from backend.condition.condition_trigger_request import ConditionFileType
|
||||
from backend.condition.domain.mapping.lbwf.lbwf_mapper import LbwfMapper
|
||||
from backend.condition.domain.mapping.mapper import Mapper
|
||||
from backend.condition.domain.mapping.peabody.peabody_mapper import PeabodyMapper
|
||||
from backend.condition.file_type import FileType
|
||||
from backend.condition.lookups.uprn_lookup import UprnLookup
|
||||
from backend.condition.parsing.parser import Parser
|
||||
from backend.condition.parsing.lbwf_parser import LbwfParser
|
||||
from backend.condition.parsing.peabody_parser import PeabodyParser
|
||||
|
||||
|
||||
def select_parser(file_type: FileType) -> Parser:
|
||||
if file_type is FileType.LBWF:
|
||||
def select_parser(
|
||||
file_type: ConditionFileType, uprn_lookup: Optional[UprnLookup] = None
|
||||
) -> Parser:
|
||||
if file_type is ConditionFileType.LBWF:
|
||||
return LbwfParser()
|
||||
|
||||
if file_type is FileType.Peabody:
|
||||
return PeabodyParser()
|
||||
if file_type is ConditionFileType.Peabody:
|
||||
if not uprn_lookup:
|
||||
raise ValueError(
|
||||
"Cannot instantiate Peabody Parser without UPRN lookup being provided"
|
||||
)
|
||||
return PeabodyParser(uprn_lookup=uprn_lookup)
|
||||
|
||||
raise ValueError("Unrecognised file type, unable to instantiate Parser")
|
||||
|
||||
|
||||
def select_mapper(file_type: FileType) -> Mapper:
|
||||
if file_type is FileType.LBWF:
|
||||
def select_mapper(file_type: ConditionFileType) -> Mapper:
|
||||
if file_type is ConditionFileType.LBWF:
|
||||
return LbwfMapper()
|
||||
|
||||
if file_type is FileType.Peabody:
|
||||
if file_type is ConditionFileType.Peabody:
|
||||
return PeabodyMapper()
|
||||
|
||||
raise ValueError("Unrecognised file type, unable to instantiate Mapper")
|
||||
|
|
|
|||
|
|
@ -18,7 +18,6 @@ class LbwfParser(Parser):
|
|||
def parse(
|
||||
self,
|
||||
file_stream: BinaryIO,
|
||||
location_ref_to_uprn_map: Optional[Dict[str, int]] = None,
|
||||
) -> Any:
|
||||
wb: Workbook = load_workbook(file_stream)
|
||||
address_to_uprn_map: Dict[str, int] = LbwfParser._generate_address_to_uprn_dict(
|
||||
|
|
|
|||
|
|
@ -8,6 +8,5 @@ class Parser(ABC):
|
|||
def parse(
|
||||
self,
|
||||
file_stream: BinaryIO,
|
||||
location_ref_to_uprn_map: Optional[Dict[str, int]] = None,
|
||||
) -> Any:
|
||||
pass
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@ from typing import Any, BinaryIO, Dict, List, Optional, Tuple, DefaultDict
|
|||
from openpyxl import Workbook, load_workbook
|
||||
from collections import defaultdict
|
||||
|
||||
from backend.condition.lookups.uprn_lookup import UprnLookup
|
||||
from backend.condition.parsing.parser import Parser
|
||||
from backend.condition.parsing.records.peabody.peabody_asset_condition import (
|
||||
PeabodyAssetCondition,
|
||||
|
|
@ -15,42 +16,29 @@ logger = setup_logger()
|
|||
|
||||
|
||||
class PeabodyParser(Parser):
|
||||
def __init__(self, uprn_lookup: UprnLookup):
|
||||
self.uprn_lookup: UprnLookup = uprn_lookup # TODO: move this to the ABC?
|
||||
|
||||
def parse(
|
||||
self,
|
||||
file_stream: BinaryIO,
|
||||
location_ref_to_uprn_map: Optional[Dict[str, int]] = None,
|
||||
) -> Any:
|
||||
wb: Workbook = load_workbook(file_stream)
|
||||
|
||||
if location_ref_to_uprn_map is None:
|
||||
location_ref_to_uprn_map: Dict[str, int] = (
|
||||
PeabodyParser._build_location_ref_to_uprn_map()
|
||||
)
|
||||
|
||||
file_stream.seek(0)
|
||||
logger.debug("[PeabodyParser] Loading workbook...")
|
||||
wb: Workbook = load_workbook(file_stream, read_only=True, data_only=True)
|
||||
logger.debug("[PeabodyParser] Successfully loaded workbook. Parsing assets...")
|
||||
assets = PeabodyParser._parse_assets(wb)
|
||||
logger.debug(
|
||||
"[PeabodyParser] Successfully parsed assets. Parsing UPRN lookup..."
|
||||
)
|
||||
|
||||
location_ref_to_uprn_map = self.uprn_lookup.get_property_ref_to_uprn_lookup()
|
||||
logger.debug("[PeabodyParser] Successfully parsed UPRN lookup")
|
||||
return PeabodyParser._group_assets_into_properties(
|
||||
assets=assets,
|
||||
location_ref_to_uprn_map=location_ref_to_uprn_map,
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _build_location_ref_to_uprn_map() -> Dict[str, int]:
|
||||
location_ref_to_uprn_filepath: Path = (
|
||||
Path(__file__).resolve().parents[1]
|
||||
/ "sample_data"
|
||||
/ "peabody"
|
||||
/ "PeabodyPropertymatched_Dec25_propref_UPRN.csv"
|
||||
)
|
||||
location_ref_to_uprn_map: Dict[str, int] = {}
|
||||
|
||||
with location_ref_to_uprn_filepath.open(newline="") as f:
|
||||
reader: Any = csv.DictReader(f)
|
||||
for row in reader:
|
||||
location_ref_to_uprn_map[row["reference"]] = int(row["out_uprn"])
|
||||
|
||||
return location_ref_to_uprn_map
|
||||
|
||||
@staticmethod
|
||||
def _parse_assets(wb: Workbook) -> List[PeabodyAssetCondition]:
|
||||
assets_sheet = wb["Survey Records - D & Lower"]
|
||||
|
|
@ -67,7 +55,7 @@ class PeabodyParser(Parser):
|
|||
)
|
||||
if not asset.is_block_level:
|
||||
# Block-level condition surveys are out of scope for now
|
||||
# until we have a wider think on how to handle block
|
||||
# until we have a wider think on how to handle blocks
|
||||
assets.append(asset) # TODO: handle block-level assets
|
||||
|
||||
except Exception as e:
|
||||
|
|
@ -92,13 +80,14 @@ class PeabodyParser(Parser):
|
|||
assets_by_location_reference[asset.lo_reference].append(asset)
|
||||
|
||||
properties: List[PeabodyProperty] = []
|
||||
failed_mappings_count = 0
|
||||
|
||||
for location_ref, grouped_assets in assets_by_location_reference.items():
|
||||
|
||||
uprn = location_ref_to_uprn_map.get(location_ref)
|
||||
|
||||
if uprn is None:
|
||||
logger.warning(f"No UPRN found for Location Reference: {location_ref}")
|
||||
failed_mappings_count += 1
|
||||
continue
|
||||
|
||||
properties.append(
|
||||
|
|
@ -108,6 +97,7 @@ class PeabodyParser(Parser):
|
|||
)
|
||||
)
|
||||
|
||||
logger.warning(f"No UPRN found for {failed_mappings_count} Location References")
|
||||
return properties
|
||||
|
||||
@staticmethod
|
||||
|
|
|
|||
|
|
@ -19,18 +19,19 @@ class ConditionPostgres:
|
|||
def bulk_insert_surveys(
|
||||
self, surveys: List[PropertyConditionSurvey], batch_size: Optional[int] = 100
|
||||
) -> None:
|
||||
logger.info(
|
||||
f"Preparing to load {len(surveys)} property surveys to Postgres. Mapping to SQLModel objects..."
|
||||
logger.debug(
|
||||
f"[ConditionPostgres] Preparing to load {len(surveys)} property surveys to Postgres. Mapping to SQLModel objects..."
|
||||
)
|
||||
survey_models: List[PropertyConditionSurveyModel] = [
|
||||
ConditionPostgres.map_survey_to_model(s) for s in surveys
|
||||
]
|
||||
total: int = len(survey_models)
|
||||
logger.info(
|
||||
f"Finished mapping {total} surveys. Writing to database in batches of {batch_size}..."
|
||||
logger.debug(
|
||||
f"[ConditionPostgres] Finished mapping {total} surveys. Writing to database in batches of {batch_size}..."
|
||||
)
|
||||
|
||||
with db_session() as session:
|
||||
logger.info("[ConditionPostgres] Successfully made connection to database")
|
||||
for start in range(0, total, batch_size):
|
||||
end = min(start + batch_size, total)
|
||||
batch = survey_models[start:end]
|
||||
|
|
|
|||
|
|
@ -1,26 +1,31 @@
|
|||
from typing import Any, BinaryIO, List
|
||||
from typing import Any, BinaryIO, List, Optional
|
||||
from datetime import datetime
|
||||
|
||||
from backend.condition.condition_trigger_request import ConditionFileType
|
||||
from backend.condition.lookups.uprn_lookup import UprnLookup
|
||||
from utils.logger import setup_logger
|
||||
from backend.condition.domain.mapping.mapper import Mapper
|
||||
from backend.condition.domain.property_condition_survey import PropertyConditionSurvey
|
||||
from backend.condition.parsing.parser import Parser
|
||||
from backend.condition.persistence.condition_postgres import ConditionPostgres
|
||||
from backend.condition.file_type import FileType, detect_file_type
|
||||
from backend.condition.parsing.factory import select_parser, select_mapper
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
def process_file(file_stream: BinaryIO, source_key: str) -> None:
|
||||
logger.info(f"[processor] Received file: {source_key}")
|
||||
|
||||
def process_file(
|
||||
file_stream: BinaryIO,
|
||||
file_type: ConditionFileType,
|
||||
uprn_lookup: Optional[UprnLookup],
|
||||
) -> None:
|
||||
# Instantiation
|
||||
file_type: FileType = detect_file_type(source_key)
|
||||
parser: Parser = select_parser(file_type)
|
||||
logger.debug(f"[processor] Instantiating classes...")
|
||||
parser: Parser = select_parser(file_type, uprn_lookup)
|
||||
mapper: Mapper = select_mapper(file_type)
|
||||
persistence = ConditionPostgres()
|
||||
|
||||
logger.debug(f"[processor] Finished instantiating classes. Calling Parser...")
|
||||
|
||||
# Orchestration
|
||||
raw_properties: List[Any] = parser.parse(file_stream)
|
||||
|
||||
|
|
|
|||
34
backend/condition/tests/lookups/test_uprn_lookup_csv.py
Normal file
34
backend/condition/tests/lookups/test_uprn_lookup_csv.py
Normal file
|
|
@ -0,0 +1,34 @@
|
|||
import pytest
|
||||
from typing import Dict
|
||||
from tempfile import NamedTemporaryFile
|
||||
|
||||
from backend.condition.lookups.uprn_lookup_csv import UprnLookupLocal
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def prop_ref_uprn_csv_file() -> str:
|
||||
csv_content = """reference,out_uprn
|
||||
ABC123,10000000001
|
||||
DEF456,10000000002
|
||||
GHI789,10000000003
|
||||
"""
|
||||
with NamedTemporaryFile(mode="w+", delete=False, suffix=".csv") as tmp:
|
||||
tmp.write(csv_content)
|
||||
tmp.flush()
|
||||
return tmp.name
|
||||
|
||||
|
||||
def test_generate_prop_ref_uprn_from_csv_file(prop_ref_uprn_csv_file: str) -> None:
|
||||
# arrange
|
||||
uprn_lookup = UprnLookupLocal(prop_ref_uprn_csv_file)
|
||||
expected_map: Dict[str, int] = {
|
||||
"ABC123": 10000000001,
|
||||
"DEF456": 10000000002,
|
||||
"GHI789": 10000000003,
|
||||
}
|
||||
|
||||
# act
|
||||
actual_map: Dict[str, int] = uprn_lookup.get_property_ref_to_uprn_lookup()
|
||||
|
||||
# assert
|
||||
assert actual_map == expected_map
|
||||
|
|
@ -1,11 +1,13 @@
|
|||
import pytest
|
||||
|
||||
from backend.condition.condition_trigger_request import ConditionFileType
|
||||
from backend.condition.lookups.uprn_lookup_csv import UprnLookupLocal
|
||||
from backend.condition.parsing.factory import select_parser
|
||||
from backend.condition.file_type import FileType
|
||||
|
||||
|
||||
def test_selects_lbwf_parser():
|
||||
# arrange
|
||||
file_type = FileType.LBWF
|
||||
file_type = ConditionFileType.LBWF
|
||||
expected_class_name = "LbwfParser"
|
||||
|
||||
# act
|
||||
|
|
@ -14,13 +16,15 @@ def test_selects_lbwf_parser():
|
|||
# assert
|
||||
assert expected_class_name == actual_class_name
|
||||
|
||||
|
||||
def test_selects_peabody_parser():
|
||||
# arrange
|
||||
file_type = FileType.Peabody
|
||||
file_type = ConditionFileType.Peabody
|
||||
expected_class_name = "PeabodyParser"
|
||||
uprn_lookup = UprnLookupLocal(csv_path="test")
|
||||
|
||||
# act
|
||||
actual_class_name = select_parser(file_type).__class__.__name__
|
||||
actual_class_name = select_parser(file_type, uprn_lookup).__class__.__name__
|
||||
|
||||
# assert
|
||||
assert expected_class_name == actual_class_name
|
||||
assert expected_class_name == actual_class_name
|
||||
|
|
|
|||
|
|
@ -1,9 +1,11 @@
|
|||
from tempfile import NamedTemporaryFile
|
||||
import pytest
|
||||
from typing import Any, Dict
|
||||
from io import BytesIO
|
||||
from openpyxl import Workbook
|
||||
from datetime import datetime
|
||||
|
||||
from backend.condition.lookups.uprn_lookup_csv import UprnLookupLocal
|
||||
from backend.condition.parsing.peabody_parser import PeabodyParser
|
||||
from backend.condition.parsing.records.peabody.peabody_asset_condition import (
|
||||
PeabodyAssetCondition,
|
||||
|
|
@ -145,23 +147,28 @@ def peabody_assets_xlsx_bytes() -> BytesIO:
|
|||
|
||||
|
||||
@pytest.fixture
|
||||
def location_ref_to_uprn_map() -> Dict[str, int]:
|
||||
return {
|
||||
"B000RAND": 1,
|
||||
"B000BLOCK": 2,
|
||||
"B000FAKE": 3,
|
||||
"B000MIS": 4,
|
||||
}
|
||||
def prop_ref_uprn_csv_file() -> str:
|
||||
csv_content = """reference,out_uprn
|
||||
B000RAND,1
|
||||
B000BLOCK,2
|
||||
B000FAKE,3
|
||||
B000MIS,4
|
||||
"""
|
||||
with NamedTemporaryFile(mode="w+", delete=False, suffix=".csv") as tmp:
|
||||
tmp.write(csv_content)
|
||||
tmp.flush()
|
||||
return tmp.name
|
||||
|
||||
|
||||
def test_peabody_parser_parses_conditions(
|
||||
peabody_assets_xlsx_bytes, location_ref_to_uprn_map
|
||||
peabody_assets_xlsx_bytes, prop_ref_uprn_csv_file
|
||||
):
|
||||
# arrange
|
||||
parser = PeabodyParser()
|
||||
uprn_lookup = UprnLookupLocal(csv_path=prop_ref_uprn_csv_file)
|
||||
parser = PeabodyParser(uprn_lookup=uprn_lookup)
|
||||
|
||||
# act
|
||||
result: Any = parser.parse(peabody_assets_xlsx_bytes, location_ref_to_uprn_map)
|
||||
result: Any = parser.parse(peabody_assets_xlsx_bytes)
|
||||
|
||||
# assert
|
||||
assert len(result) == 3
|
||||
|
|
|
|||
|
|
@ -1,22 +0,0 @@
|
|||
import pytest
|
||||
|
||||
from backend.condition.file_type import FileType, detect_file_type
|
||||
|
||||
def test_detects_lbwf_file_type():
|
||||
# arrange
|
||||
file_path_str = "uploads/lbwf/Exaple Asset Data.xlsx"
|
||||
expected_file_type = FileType.LBWF
|
||||
|
||||
# act
|
||||
actual_file_type: FileType = detect_file_type(file_path_str)
|
||||
|
||||
# assert
|
||||
assert expected_file_type == actual_file_type
|
||||
|
||||
def test_unknown_filepath_raises_value_error():
|
||||
# arrange
|
||||
file_path_str = "unknown/Example Asset Data.xlsx"
|
||||
|
||||
# act + assert
|
||||
with pytest.raises(ValueError):
|
||||
detect_file_type(file_path_str)
|
||||
Binary file not shown.
9
backend/postcode_splitter/handler/Dockerfile
Normal file
9
backend/postcode_splitter/handler/Dockerfile
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
FROM public.ecr.aws/lambda/python:3.10
|
||||
|
||||
# Set working directory (Lambda task root)
|
||||
WORKDIR /var/task
|
||||
|
||||
# -----------------------------
|
||||
# Lambda handler
|
||||
# -----------------------------
|
||||
CMD ["main.handler"]
|
||||
0
backend/postcode_splitter/handler/requirements.txt
Normal file
0
backend/postcode_splitter/handler/requirements.txt
Normal file
|
|
@ -1,10 +1,12 @@
|
|||
import pandas as pd
|
||||
import requests
|
||||
from backend.address2UPRN.main import resolve_uprns_for_postcode_group, get_epc_data_with_postcode
|
||||
from backend.address2UPRN.main import (
|
||||
resolve_uprns_for_postcode_group,
|
||||
get_epc_data_with_postcode,
|
||||
)
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
|
||||
def sanitise_postcode(postcode: str) -> str | None:
|
||||
"""
|
||||
Normalise postcode for grouping.
|
||||
|
|
@ -51,11 +53,7 @@ def main():
|
|||
# --- validate AFTER grouping (save API calls) ---
|
||||
|
||||
# Get unique, non-null postcodes
|
||||
unique_postcodes = (
|
||||
df["postcode_clean"]
|
||||
.dropna()
|
||||
.unique()
|
||||
)
|
||||
unique_postcodes = df["postcode_clean"].dropna().unique()
|
||||
|
||||
# Validate each postcode once, TODOadd a progress bar
|
||||
postcode_validity = {
|
||||
|
|
@ -66,7 +64,6 @@ def main():
|
|||
# Map validity back onto dataframe
|
||||
df["postcode_valid"] = df["postcode_clean"].map(postcode_validity)
|
||||
|
||||
|
||||
results = []
|
||||
|
||||
for postcode, group_df in tqdm(
|
||||
|
|
@ -98,17 +95,33 @@ def main():
|
|||
results.append(tmp)
|
||||
|
||||
final_df = pd.concat(results, ignore_index=True)
|
||||
a = final_df[[
|
||||
"best_match_lexiscore","Address 1",
|
||||
"best_match_address", "Postcode",
|
||||
"UPRN", "best_match_uprn"
|
||||
]] # add levi score to viewing
|
||||
b = final_df[final_df["best_match_lexiscore"]>0] # add levi score to viewing
|
||||
b = b[[
|
||||
"best_match_lexiscore","Address 1",
|
||||
"best_match_address", "Postcode",
|
||||
"UPRN", "best_match_uprn"
|
||||
]]
|
||||
a = final_df[
|
||||
[
|
||||
"best_match_lexiscore",
|
||||
"Address 1",
|
||||
"best_match_address",
|
||||
"Postcode",
|
||||
"UPRN",
|
||||
"best_match_uprn",
|
||||
]
|
||||
] # add levi score to viewing
|
||||
b = final_df[final_df["best_match_lexiscore"] > 0] # add levi score to viewing
|
||||
b = b[
|
||||
[
|
||||
"best_match_lexiscore",
|
||||
"Address 1",
|
||||
"best_match_address",
|
||||
"Postcode",
|
||||
"UPRN",
|
||||
"best_match_uprn",
|
||||
]
|
||||
]
|
||||
|
||||
|
||||
def handler(event, context):
|
||||
print("hello Postcode splitter world")
|
||||
return {"statusCode": 200, "body": "hello world"}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
|
|||
43
infrastructure/terraform/lambda/condition-etl/main.tf
Normal file
43
infrastructure/terraform/lambda/condition-etl/main.tf
Normal file
|
|
@ -0,0 +1,43 @@
|
|||
data "aws_secretsmanager_secret_version" "db_credentials" {
|
||||
secret_id = "${var.stage}/assessment_model/db_credentials"
|
||||
}
|
||||
|
||||
data "terraform_remote_state" "shared" {
|
||||
backend = "s3"
|
||||
config = {
|
||||
bucket = "assessment-model-terraform-state"
|
||||
key = "env:/${var.stage}/terraform.tfstate" # TODO: dont hardcode this
|
||||
region = "eu-west-2"
|
||||
}
|
||||
}
|
||||
|
||||
locals {
|
||||
db_credentials = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string)
|
||||
}
|
||||
|
||||
|
||||
module "lambda" {
|
||||
source = "../modules/lambda_with_sqs"
|
||||
|
||||
name = "condition-etl"
|
||||
stage = var.stage
|
||||
|
||||
image_uri = local.image_uri
|
||||
timeout = 180
|
||||
|
||||
|
||||
environment = merge(
|
||||
{
|
||||
STAGE = var.stage
|
||||
LOG_LEVEL = "info"
|
||||
DB_USERNAME = local.db_credentials.db_assessment_model_username
|
||||
DB_PASSWORD = local.db_credentials.db_assessment_model_password
|
||||
},
|
||||
)
|
||||
|
||||
}
|
||||
|
||||
resource "aws_iam_role_policy_attachment" "attach_condition_etl_s3_read" {
|
||||
role = module.lambda.role_name
|
||||
policy_arn = data.terraform_remote_state.shared.outputs.condition_etl_s3_read_arn
|
||||
}
|
||||
16
infrastructure/terraform/lambda/condition-etl/provider.tf
Normal file
16
infrastructure/terraform/lambda/condition-etl/provider.tf
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
terraform {
|
||||
required_providers {
|
||||
aws = {
|
||||
source = "hashicorp/aws"
|
||||
version = "~> 4.16"
|
||||
}
|
||||
}
|
||||
|
||||
backend "s3" {
|
||||
bucket = "condition-etl-terraform-state"
|
||||
key = "terraform.tfstate"
|
||||
region = "eu-west-2"
|
||||
}
|
||||
|
||||
required_version = ">= 1.2.0"
|
||||
}
|
||||
27
infrastructure/terraform/lambda/condition-etl/variables.tf
Normal file
27
infrastructure/terraform/lambda/condition-etl/variables.tf
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
variable "lambda_name" {
|
||||
type = string
|
||||
description = "Logical name of the lambda (e.g. address2uprn)"
|
||||
}
|
||||
|
||||
variable "stage" {
|
||||
description = "Deployment stage (e.g. dev, prod)"
|
||||
type = string
|
||||
}
|
||||
variable "ecr_repo_url" {
|
||||
type = string
|
||||
description = "ECR repository URL (no tag, no digest)"
|
||||
}
|
||||
|
||||
variable "image_digest" {
|
||||
type = string
|
||||
description = "Image digest (sha256:...)"
|
||||
}
|
||||
|
||||
|
||||
locals {
|
||||
image_uri = "${var.ecr_repo_url}@${var.image_digest}"
|
||||
}
|
||||
|
||||
output "resolved_image_uri" {
|
||||
value = local.image_uri
|
||||
}
|
||||
|
|
@ -6,6 +6,10 @@ module "role" {
|
|||
name = "${var.name}-lambda-${var.stage}"
|
||||
}
|
||||
|
||||
output "role_name" {
|
||||
value = module.role.role_name
|
||||
}
|
||||
|
||||
############################################
|
||||
# SQS queue + DLQ
|
||||
############################################
|
||||
|
|
|
|||
14
infrastructure/terraform/lambda/postcodeSplitter/main.tf
Normal file
14
infrastructure/terraform/lambda/postcodeSplitter/main.tf
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
module "lambda" {
|
||||
source = "../modules/lambda_with_sqs"
|
||||
|
||||
name = "postcode-splitter"
|
||||
stage = var.stage
|
||||
|
||||
image_uri = local.image_uri
|
||||
|
||||
|
||||
environment = {
|
||||
STAGE = var.stage
|
||||
LOG_LEVEL = "info"
|
||||
}
|
||||
}
|
||||
16
infrastructure/terraform/lambda/postcodeSplitter/provider.tf
Normal file
16
infrastructure/terraform/lambda/postcodeSplitter/provider.tf
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
terraform {
|
||||
required_providers {
|
||||
aws = {
|
||||
source = "hashicorp/aws"
|
||||
version = "~> 4.16"
|
||||
}
|
||||
}
|
||||
|
||||
backend "s3" {
|
||||
bucket = "postcode-splitter-terraform-state"
|
||||
key = "terraform.tfstate"
|
||||
region = "eu-west-2"
|
||||
}
|
||||
|
||||
required_version = ">= 1.2.0"
|
||||
}
|
||||
|
|
@ -0,0 +1,26 @@
|
|||
variable "lambda_name" {
|
||||
type = string
|
||||
description = "Logical name of the lambda (e.g. address2uprn)"
|
||||
}
|
||||
|
||||
variable "stage" {
|
||||
description = "Deployment stage (e.g. dev, prod)"
|
||||
type = string
|
||||
}
|
||||
variable "ecr_repo_url" {
|
||||
type = string
|
||||
description = "ECR repository URL (no tag, no digest)"
|
||||
}
|
||||
|
||||
variable "image_digest" {
|
||||
type = string
|
||||
description = "Image digest (sha256:...)"
|
||||
}
|
||||
|
||||
locals {
|
||||
image_uri = "${var.ecr_repo_url}@${var.image_digest}"
|
||||
}
|
||||
|
||||
output "resolved_image_uri" {
|
||||
value = local.image_uri
|
||||
}
|
||||
|
|
@ -298,10 +298,6 @@ module "address2uprn_state_bucket" {
|
|||
|
||||
}
|
||||
|
||||
output "address2uprn_state_bucket_name" {
|
||||
value = module.address2uprn_state_bucket.bucket_name
|
||||
}
|
||||
|
||||
module "address2uprn_registry" {
|
||||
source = "../modules/container_registry"
|
||||
name = "address2uprn"
|
||||
|
|
@ -309,6 +305,62 @@ module "address2uprn_registry" {
|
|||
|
||||
}
|
||||
|
||||
output "address2uprn_repository_url" {
|
||||
value = module.address2uprn_registry.repository_url
|
||||
################################################
|
||||
# Condition ETL – Lambda ECR
|
||||
################################################
|
||||
module "condition_etl_state_bucket" {
|
||||
source = "../modules/tf_state_bucket"
|
||||
bucket_name = "condition-etl-terraform-state"
|
||||
|
||||
}
|
||||
|
||||
module "condition_etl_registry" {
|
||||
source = "../modules/container_registry"
|
||||
name = "condition-etl"
|
||||
stage = var.stage
|
||||
|
||||
}
|
||||
|
||||
################################################
|
||||
# Postcode Splitter – Lambda ECR
|
||||
################################################
|
||||
module "postcode_splitter_state_bucket" {
|
||||
source = "../modules/tf_state_bucket"
|
||||
bucket_name = "postcode-splitter-terraform-state"
|
||||
|
||||
}
|
||||
|
||||
module "postcode_splitter_registry" {
|
||||
source = "../modules/container_registry"
|
||||
name = "postcode_splitter"
|
||||
stage = var.stage
|
||||
|
||||
}
|
||||
|
||||
################################################
|
||||
# Conidition data – S3 bucket
|
||||
################################################
|
||||
module "condition_data_bucket" {
|
||||
source = "../modules/s3"
|
||||
bucketname = "condition-data-${var.stage}"
|
||||
allowed_origins = var.allowed_origins
|
||||
}
|
||||
|
||||
resource "aws_iam_policy" "condition_etl_s3_read" {
|
||||
name = "ConditionETLReadS3"
|
||||
description = "Allow Lambda to read objects from condition-data-${var.stage}"
|
||||
policy = jsonencode({
|
||||
Version = "2012-10-17"
|
||||
Statement = [
|
||||
{
|
||||
Effect = "Allow"
|
||||
Action = ["s3:GetObject"]
|
||||
Resource = "arn:aws:s3:::condition-data-${var.stage}/*"
|
||||
}
|
||||
]
|
||||
})
|
||||
}
|
||||
|
||||
output "condition_etl_s3_read_arn" {
|
||||
value = aws_iam_policy.condition_etl_s3_read.arn
|
||||
}
|
||||
|
|
@ -2,6 +2,10 @@
|
|||
This script prepares the data for the financial model
|
||||
"""
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(".env.local")
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from backend.app.utils import sap_to_epc
|
||||
|
|
@ -24,12 +28,12 @@ from sqlalchemy import func
|
|||
|
||||
# PORTFOLIO_ID = 206
|
||||
# SCENARIOS = [389]
|
||||
PORTFOLIO_ID = 502 # Peabody
|
||||
PORTFOLIO_ID = 524
|
||||
SCENARIOS = [
|
||||
986,
|
||||
1009,
|
||||
]
|
||||
scenario_names = {
|
||||
986: "EPC C",
|
||||
1009: "EPC C; Most Economic",
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue