Merge branch 'feautre/postcode_splitter_address_uprn_automator_2000' into deploy-condition-processor

This commit is contained in:
Daniel Roth 2026-02-05 09:22:25 +00:00
commit 7f32f95f71
18 changed files with 231 additions and 47 deletions

View file

@ -27,8 +27,9 @@ RUN useradd -m -s /usr/bin/bash ${USER} \
# # 4) Python deps - if you want to run assest list
ENV PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1
ADD asset_list/requirements.txt requirements.txt
RUN pip install -r requirements.txt
ADD .devcontainer/asset_list/requirements.txt requirements2.txt
ADD asset_list/requirements.txt requirements1.txt
RUN cat requirements1.txt requirements2.txt >> requirements.txt
RUN pip install -r requirements.txt
# 5) Workdir

View file

@ -15,10 +15,9 @@ uvicorn[standard]
pytest==9.0.2
pytest-cov==7.0.0
ipykernel>=6.25,<7
pydantic-settings<2
pyyaml>=6.0.1
pydantic>=1.10.7,<2
sqlmodel
# Formatting
black==26.1.0
dotenv
pydantic-settings

View file

@ -73,8 +73,8 @@ jobs:
uses: ./.github/workflows/_build_image.yml
with:
ecr_repo: address2uprn-${{ needs.determine_stage.outputs.stage }}
dockerfile_path: backend/address2UPRN/Dockerfile
build_context: backend/address2UPRN
dockerfile_path: backend/address2UPRN/handler/Dockerfile
build_context: .
secrets:
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
@ -96,3 +96,38 @@ jobs:
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
# ============================================================
# 2⃣ Build Postcode Splitter image and Push
# ============================================================
postcodeSplitter_image:
needs: [determine_stage, shared_terraform]
uses: ./.github/workflows/_build_image.yml
with:
ecr_repo: postcode_splitter-${{ needs.determine_stage.outputs.stage }}
dockerfile_path: backend/postcode_splitter/handler/Dockerfile
build_context: .
secrets:
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
# ============================================================
# 3⃣ Deploy Postcode Splitter Lambda
# ============================================================
postcodeSplitter_lambda:
needs: [postcodeSplitter_image, determine_stage]
uses: ./.github/workflows/_deploy_lambda.yml
with:
lambda_name: postcodeSplitter
lambda_path: infrastructure/terraform/lambda/postcodeSplitter
stage: ${{ needs.determine_stage.outputs.stage }}
ecr_repo: postcode_splitter-${{ needs.determine_stage.outputs.stage }}
image_digest: ${{ needs.postcodeSplitter_image.outputs.image_digest }}
secrets:
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}

View file

@ -19,4 +19,22 @@ PLAN_TRIGGER_BUCKET="test"
DATA_BUCKET="test"
EPC_AUTH_TOKEN="test"
ENGINE_SQS_URL="test"
ENERGY_ASSESSMENTS_BUCKET="test"
API_KEY="test"
SECRET_KEY="test"
ENVIRONMENT="test"
DATA_BUCKET="test"
PLAN_TRIGGER_BUCKET="test"
ENGINE_SQS_URL="test"
GOOGLE_SOLAR_API_KEY="test"
DB_HOST="test"
DB_PASSWORD="test"
DB_USERNAME="test"
DB_PORT="test"
DB_NAME="test"
SAP_PREDICTIONS_BUCKET="test"
CARBON_PREDICTIONS_BUCKET="test"
HEAT_PREDICTIONS_BUCKET="test"
HEATING_KWH_PREDICTIONS_BUCKET="test"
HOTWATER_KWH_PREDICTIONS_BUCKET="test"
ENERGY_ASSESSMENTS_BUCKET="test"

View file

@ -1,7 +0,0 @@
FROM public.ecr.aws/lambda/python:3.10
# Copy function code
COPY main.py .
# Set the handler
CMD ["main.handler"]

View file

@ -0,0 +1,26 @@
FROM public.ecr.aws/lambda/python:3.10
# This is not going to be permenant - but until we solve for env variables in live prod
ENV EPC_AUTH_TOKEN=a2Nvbm5rb3dsZXNzYXJAZ21haWwuY29tOjY5MGJiMWM0NmIyOGI5ZDUxYzAxMzQzYzNiZGNlZGJjZDNmODQwMzAg
# Set working directory (Lambda task root)
WORKDIR /var/task
# -----------------------------
# Copy requirements FIRST (for Docker layer caching)
# -----------------------------
COPY backend/address2UPRN/handler/requirements.txt .
# Install dependencies into Lambda runtime
RUN pip install --no-cache-dir -r requirements.txt
# -----------------------------
# Copy application code
# -----------------------------
COPY utils/ utils/
COPY backend/address2UPRN/main.py .
# -----------------------------
# Lambda handler
# -----------------------------
CMD ["main.handler"]

View file

@ -0,0 +1,3 @@
epc-api-python==1.0.2
tqdm
pandas

View file

@ -212,6 +212,8 @@ def get_epc_data_with_postcode(postcode, size=500, attempt=1, max_attempts=3):
method="get",
params={"postcode": postcode},
)
if not search_resp or "rows" not in search_resp:
return pd.DataFrame()
results_df = pd.DataFrame(search_resp["rows"], columns=search_resp["column-names"])
@ -298,7 +300,7 @@ def get_uprn_candidates(
)
def get_uprn(user_inputed_address: str, postcode: str):
def get_uprn(user_inputed_address: str, postcode: str, return_address=False):
"""
Return uprn (str)
Return False if failed to find a sensible matching epc
@ -337,6 +339,8 @@ def get_uprn(user_inputed_address: str, postcode: str):
if found_uprn == "":
return None
if return_address:
return found_uprn, address
return found_uprn

View file

@ -1,17 +1,24 @@
import pandas as pd
from tqdm import tqdm
from backend.address2UPRN.main import get_uprn
# Enable tqdm for pandas
tqdm.pandas()
df = pd.read_excel("address2.xlsx")
# use Address 1
junte_df = pd.read_excel("hackney_uprn_failures.xlsx")
def extract_uprn(row):
print(row["User Input"], row["Postcode"])
result = get_uprn(row["User Input"], row["Postcode"], return_address=True)
if result is None:
return pd.Series([None, None])
uprn, found_address = result
return pd.Series([uprn, found_address])
# use domna_address_1
khalim_df = pd.read_excel("khalim_standard.xlsx")
combined_df = junte_df.merge(khalim_df, how="left", left_on="Address 1", right_on='domna_address_1')
# Find the row in khalim_df that does not app
result = combined_df[~pd.isnull(combined_df["epc_os_uprn"])]
df[["juntes uprn", "junte found address"]] = df.progress_apply(extract_uprn, axis=1)
df.to_excel("outputs2.xlsx", index=False)

View file

@ -0,0 +1,9 @@
FROM public.ecr.aws/lambda/python:3.10
# Set working directory (Lambda task root)
WORKDIR /var/task
# -----------------------------
# Lambda handler
# -----------------------------
CMD ["main.handler"]

View file

@ -1,10 +1,12 @@
import pandas as pd
import requests
from backend.address2UPRN.main import resolve_uprns_for_postcode_group, get_epc_data_with_postcode
from backend.address2UPRN.main import (
resolve_uprns_for_postcode_group,
get_epc_data_with_postcode,
)
from tqdm import tqdm
def sanitise_postcode(postcode: str) -> str | None:
"""
Normalise postcode for grouping.
@ -51,11 +53,7 @@ def main():
# --- validate AFTER grouping (save API calls) ---
# Get unique, non-null postcodes
unique_postcodes = (
df["postcode_clean"]
.dropna()
.unique()
)
unique_postcodes = df["postcode_clean"].dropna().unique()
# Validate each postcode once, TODOadd a progress bar
postcode_validity = {
@ -66,7 +64,6 @@ def main():
# Map validity back onto dataframe
df["postcode_valid"] = df["postcode_clean"].map(postcode_validity)
results = []
for postcode, group_df in tqdm(
@ -98,17 +95,33 @@ def main():
results.append(tmp)
final_df = pd.concat(results, ignore_index=True)
a = final_df[[
"best_match_lexiscore","Address 1",
"best_match_address", "Postcode",
"UPRN", "best_match_uprn"
]] # add levi score to viewing
b = final_df[final_df["best_match_lexiscore"]>0] # add levi score to viewing
b = b[[
"best_match_lexiscore","Address 1",
"best_match_address", "Postcode",
"UPRN", "best_match_uprn"
]]
a = final_df[
[
"best_match_lexiscore",
"Address 1",
"best_match_address",
"Postcode",
"UPRN",
"best_match_uprn",
]
] # add levi score to viewing
b = final_df[final_df["best_match_lexiscore"] > 0] # add levi score to viewing
b = b[
[
"best_match_lexiscore",
"Address 1",
"best_match_address",
"Postcode",
"UPRN",
"best_match_uprn",
]
]
def handler(event, context):
print("hello Postcode splitter world")
return {"statusCode": 200, "body": "hello world"}
if __name__ == "__main__":
main()

View file

@ -0,0 +1,14 @@
module "lambda" {
source = "../modules/lambda_with_sqs"
name = "postcode-splitter"
stage = var.stage
image_uri = local.image_uri
environment = {
STAGE = var.stage
LOG_LEVEL = "info"
}
}

View file

@ -0,0 +1,16 @@
terraform {
required_providers {
aws = {
source = "hashicorp/aws"
version = "~> 4.16"
}
}
backend "s3" {
bucket = "postcode-splitter-terraform-state"
key = "terraform.tfstate"
region = "eu-west-2"
}
required_version = ">= 1.2.0"
}

View file

@ -0,0 +1,26 @@
variable "lambda_name" {
type = string
description = "Logical name of the lambda (e.g. address2uprn)"
}
variable "stage" {
description = "Deployment stage (e.g. dev, prod)"
type = string
}
variable "ecr_repo_url" {
type = string
description = "ECR repository URL (no tag, no digest)"
}
variable "image_digest" {
type = string
description = "Image digest (sha256:...)"
}
locals {
image_uri = "${var.ecr_repo_url}@${var.image_digest}"
}
output "resolved_image_uri" {
value = local.image_uri
}

View file

@ -319,4 +319,20 @@ module "condition_etl_registry" {
name = "condition-etl"
stage = var.stage
################################################
# Postcode Splitter Lambda ECR
################################################
module "postcode_splitter_state_bucket" {
source = "../modules/tf_state_bucket"
bucket_name = "postcode-splitter-terraform-state"
}
module "postcode_splitter_registry" {
source = "../modules/container_registry"
name = "postcode_splitter"
stage = var.stage
}

View file

@ -2,6 +2,10 @@
This script prepares the data for the financial model
"""
from dotenv import load_dotenv
load_dotenv(".env.local")
import pandas as pd
import numpy as np
from backend.app.utils import sap_to_epc
@ -24,12 +28,12 @@ from sqlalchemy import func
# PORTFOLIO_ID = 206
# SCENARIOS = [389]
PORTFOLIO_ID = 502 # Peabody
PORTFOLIO_ID = 524
SCENARIOS = [
986,
1009,
]
scenario_names = {
986: "EPC C",
1009: "EPC C; Most Economic",
}