mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
Merge branch 'feautre/postcode_splitter_address_uprn_automator_2000' into deploy-condition-processor
This commit is contained in:
commit
7f32f95f71
18 changed files with 231 additions and 47 deletions
|
|
@ -27,8 +27,9 @@ RUN useradd -m -s /usr/bin/bash ${USER} \
|
|||
|
||||
# # 4) Python deps - if you want to run assest list
|
||||
ENV PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1
|
||||
ADD asset_list/requirements.txt requirements.txt
|
||||
RUN pip install -r requirements.txt
|
||||
ADD .devcontainer/asset_list/requirements.txt requirements2.txt
|
||||
ADD asset_list/requirements.txt requirements1.txt
|
||||
RUN cat requirements1.txt requirements2.txt >> requirements.txt
|
||||
|
||||
RUN pip install -r requirements.txt
|
||||
# 5) Workdir
|
||||
|
|
|
|||
|
|
@ -15,10 +15,9 @@ uvicorn[standard]
|
|||
pytest==9.0.2
|
||||
pytest-cov==7.0.0
|
||||
ipykernel>=6.25,<7
|
||||
pydantic-settings<2
|
||||
pyyaml>=6.0.1
|
||||
pydantic>=1.10.7,<2
|
||||
sqlmodel
|
||||
# Formatting
|
||||
black==26.1.0
|
||||
dotenv
|
||||
pydantic-settings
|
||||
39
.github/workflows/deploy_terraform.yml
vendored
39
.github/workflows/deploy_terraform.yml
vendored
|
|
@ -73,8 +73,8 @@ jobs:
|
|||
uses: ./.github/workflows/_build_image.yml
|
||||
with:
|
||||
ecr_repo: address2uprn-${{ needs.determine_stage.outputs.stage }}
|
||||
dockerfile_path: backend/address2UPRN/Dockerfile
|
||||
build_context: backend/address2UPRN
|
||||
dockerfile_path: backend/address2UPRN/handler/Dockerfile
|
||||
build_context: .
|
||||
secrets:
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
|
||||
|
|
@ -96,3 +96,38 @@ jobs:
|
|||
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
|
||||
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 2️⃣ Build Postcode Splitter image and Push
|
||||
# ============================================================
|
||||
postcodeSplitter_image:
|
||||
needs: [determine_stage, shared_terraform]
|
||||
uses: ./.github/workflows/_build_image.yml
|
||||
with:
|
||||
ecr_repo: postcode_splitter-${{ needs.determine_stage.outputs.stage }}
|
||||
dockerfile_path: backend/postcode_splitter/handler/Dockerfile
|
||||
build_context: .
|
||||
secrets:
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
|
||||
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
|
||||
|
||||
# ============================================================
|
||||
# 3️⃣ Deploy Postcode Splitter Lambda
|
||||
# ============================================================
|
||||
postcodeSplitter_lambda:
|
||||
needs: [postcodeSplitter_image, determine_stage]
|
||||
uses: ./.github/workflows/_deploy_lambda.yml
|
||||
with:
|
||||
lambda_name: postcodeSplitter
|
||||
lambda_path: infrastructure/terraform/lambda/postcodeSplitter
|
||||
stage: ${{ needs.determine_stage.outputs.stage }}
|
||||
ecr_repo: postcode_splitter-${{ needs.determine_stage.outputs.stage }}
|
||||
image_digest: ${{ needs.postcodeSplitter_image.outputs.image_digest }}
|
||||
secrets:
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
|
||||
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -19,4 +19,22 @@ PLAN_TRIGGER_BUCKET="test"
|
|||
DATA_BUCKET="test"
|
||||
EPC_AUTH_TOKEN="test"
|
||||
ENGINE_SQS_URL="test"
|
||||
ENERGY_ASSESSMENTS_BUCKET="test"
|
||||
API_KEY="test"
|
||||
SECRET_KEY="test"
|
||||
ENVIRONMENT="test"
|
||||
DATA_BUCKET="test"
|
||||
PLAN_TRIGGER_BUCKET="test"
|
||||
ENGINE_SQS_URL="test"
|
||||
GOOGLE_SOLAR_API_KEY="test"
|
||||
DB_HOST="test"
|
||||
DB_PASSWORD="test"
|
||||
DB_USERNAME="test"
|
||||
DB_PORT="test"
|
||||
DB_NAME="test"
|
||||
SAP_PREDICTIONS_BUCKET="test"
|
||||
CARBON_PREDICTIONS_BUCKET="test"
|
||||
HEAT_PREDICTIONS_BUCKET="test"
|
||||
HEATING_KWH_PREDICTIONS_BUCKET="test"
|
||||
HOTWATER_KWH_PREDICTIONS_BUCKET="test"
|
||||
ENERGY_ASSESSMENTS_BUCKET="test"
|
||||
|
|
@ -1,7 +0,0 @@
|
|||
FROM public.ecr.aws/lambda/python:3.10
|
||||
|
||||
# Copy function code
|
||||
COPY main.py .
|
||||
|
||||
# Set the handler
|
||||
CMD ["main.handler"]
|
||||
26
backend/address2UPRN/handler/Dockerfile
Normal file
26
backend/address2UPRN/handler/Dockerfile
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
FROM public.ecr.aws/lambda/python:3.10
|
||||
|
||||
# This is not going to be permenant - but until we solve for env variables in live prod
|
||||
ENV EPC_AUTH_TOKEN=a2Nvbm5rb3dsZXNzYXJAZ21haWwuY29tOjY5MGJiMWM0NmIyOGI5ZDUxYzAxMzQzYzNiZGNlZGJjZDNmODQwMzAg
|
||||
|
||||
# Set working directory (Lambda task root)
|
||||
WORKDIR /var/task
|
||||
|
||||
# -----------------------------
|
||||
# Copy requirements FIRST (for Docker layer caching)
|
||||
# -----------------------------
|
||||
COPY backend/address2UPRN/handler/requirements.txt .
|
||||
|
||||
# Install dependencies into Lambda runtime
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# -----------------------------
|
||||
# Copy application code
|
||||
# -----------------------------
|
||||
COPY utils/ utils/
|
||||
COPY backend/address2UPRN/main.py .
|
||||
|
||||
# -----------------------------
|
||||
# Lambda handler
|
||||
# -----------------------------
|
||||
CMD ["main.handler"]
|
||||
3
backend/address2UPRN/handler/requirements.txt
Normal file
3
backend/address2UPRN/handler/requirements.txt
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
epc-api-python==1.0.2
|
||||
tqdm
|
||||
pandas
|
||||
|
|
@ -212,6 +212,8 @@ def get_epc_data_with_postcode(postcode, size=500, attempt=1, max_attempts=3):
|
|||
method="get",
|
||||
params={"postcode": postcode},
|
||||
)
|
||||
if not search_resp or "rows" not in search_resp:
|
||||
return pd.DataFrame()
|
||||
|
||||
results_df = pd.DataFrame(search_resp["rows"], columns=search_resp["column-names"])
|
||||
|
||||
|
|
@ -298,7 +300,7 @@ def get_uprn_candidates(
|
|||
)
|
||||
|
||||
|
||||
def get_uprn(user_inputed_address: str, postcode: str):
|
||||
def get_uprn(user_inputed_address: str, postcode: str, return_address=False):
|
||||
"""
|
||||
Return uprn (str)
|
||||
Return False if failed to find a sensible matching epc
|
||||
|
|
@ -337,6 +339,8 @@ def get_uprn(user_inputed_address: str, postcode: str):
|
|||
if found_uprn == "":
|
||||
return None
|
||||
|
||||
if return_address:
|
||||
return found_uprn, address
|
||||
return found_uprn
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -1,17 +1,24 @@
|
|||
import pandas as pd
|
||||
from tqdm import tqdm
|
||||
from backend.address2UPRN.main import get_uprn
|
||||
|
||||
# Enable tqdm for pandas
|
||||
tqdm.pandas()
|
||||
|
||||
df = pd.read_excel("address2.xlsx")
|
||||
|
||||
|
||||
# use Address 1
|
||||
junte_df = pd.read_excel("hackney_uprn_failures.xlsx")
|
||||
def extract_uprn(row):
|
||||
print(row["User Input"], row["Postcode"])
|
||||
result = get_uprn(row["User Input"], row["Postcode"], return_address=True)
|
||||
|
||||
if result is None:
|
||||
return pd.Series([None, None])
|
||||
|
||||
uprn, found_address = result
|
||||
return pd.Series([uprn, found_address])
|
||||
|
||||
|
||||
# use domna_address_1
|
||||
khalim_df = pd.read_excel("khalim_standard.xlsx")
|
||||
|
||||
|
||||
combined_df = junte_df.merge(khalim_df, how="left", left_on="Address 1", right_on='domna_address_1')
|
||||
|
||||
# Find the row in khalim_df that does not app
|
||||
|
||||
result = combined_df[~pd.isnull(combined_df["epc_os_uprn"])]
|
||||
df[["juntes uprn", "junte found address"]] = df.progress_apply(extract_uprn, axis=1)
|
||||
|
||||
df.to_excel("outputs2.xlsx", index=False)
|
||||
|
|
|
|||
Binary file not shown.
9
backend/postcode_splitter/handler/Dockerfile
Normal file
9
backend/postcode_splitter/handler/Dockerfile
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
FROM public.ecr.aws/lambda/python:3.10
|
||||
|
||||
# Set working directory (Lambda task root)
|
||||
WORKDIR /var/task
|
||||
|
||||
# -----------------------------
|
||||
# Lambda handler
|
||||
# -----------------------------
|
||||
CMD ["main.handler"]
|
||||
0
backend/postcode_splitter/handler/requirements.txt
Normal file
0
backend/postcode_splitter/handler/requirements.txt
Normal file
|
|
@ -1,10 +1,12 @@
|
|||
import pandas as pd
|
||||
import requests
|
||||
from backend.address2UPRN.main import resolve_uprns_for_postcode_group, get_epc_data_with_postcode
|
||||
from backend.address2UPRN.main import (
|
||||
resolve_uprns_for_postcode_group,
|
||||
get_epc_data_with_postcode,
|
||||
)
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
|
||||
def sanitise_postcode(postcode: str) -> str | None:
|
||||
"""
|
||||
Normalise postcode for grouping.
|
||||
|
|
@ -51,11 +53,7 @@ def main():
|
|||
# --- validate AFTER grouping (save API calls) ---
|
||||
|
||||
# Get unique, non-null postcodes
|
||||
unique_postcodes = (
|
||||
df["postcode_clean"]
|
||||
.dropna()
|
||||
.unique()
|
||||
)
|
||||
unique_postcodes = df["postcode_clean"].dropna().unique()
|
||||
|
||||
# Validate each postcode once, TODOadd a progress bar
|
||||
postcode_validity = {
|
||||
|
|
@ -66,7 +64,6 @@ def main():
|
|||
# Map validity back onto dataframe
|
||||
df["postcode_valid"] = df["postcode_clean"].map(postcode_validity)
|
||||
|
||||
|
||||
results = []
|
||||
|
||||
for postcode, group_df in tqdm(
|
||||
|
|
@ -98,17 +95,33 @@ def main():
|
|||
results.append(tmp)
|
||||
|
||||
final_df = pd.concat(results, ignore_index=True)
|
||||
a = final_df[[
|
||||
"best_match_lexiscore","Address 1",
|
||||
"best_match_address", "Postcode",
|
||||
"UPRN", "best_match_uprn"
|
||||
]] # add levi score to viewing
|
||||
b = final_df[final_df["best_match_lexiscore"]>0] # add levi score to viewing
|
||||
b = b[[
|
||||
"best_match_lexiscore","Address 1",
|
||||
"best_match_address", "Postcode",
|
||||
"UPRN", "best_match_uprn"
|
||||
]]
|
||||
a = final_df[
|
||||
[
|
||||
"best_match_lexiscore",
|
||||
"Address 1",
|
||||
"best_match_address",
|
||||
"Postcode",
|
||||
"UPRN",
|
||||
"best_match_uprn",
|
||||
]
|
||||
] # add levi score to viewing
|
||||
b = final_df[final_df["best_match_lexiscore"] > 0] # add levi score to viewing
|
||||
b = b[
|
||||
[
|
||||
"best_match_lexiscore",
|
||||
"Address 1",
|
||||
"best_match_address",
|
||||
"Postcode",
|
||||
"UPRN",
|
||||
"best_match_uprn",
|
||||
]
|
||||
]
|
||||
|
||||
|
||||
def handler(event, context):
|
||||
print("hello Postcode splitter world")
|
||||
return {"statusCode": 200, "body": "hello world"}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
|
|||
14
infrastructure/terraform/lambda/postcodeSplitter/main.tf
Normal file
14
infrastructure/terraform/lambda/postcodeSplitter/main.tf
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
module "lambda" {
|
||||
source = "../modules/lambda_with_sqs"
|
||||
|
||||
name = "postcode-splitter"
|
||||
stage = var.stage
|
||||
|
||||
image_uri = local.image_uri
|
||||
|
||||
|
||||
environment = {
|
||||
STAGE = var.stage
|
||||
LOG_LEVEL = "info"
|
||||
}
|
||||
}
|
||||
16
infrastructure/terraform/lambda/postcodeSplitter/provider.tf
Normal file
16
infrastructure/terraform/lambda/postcodeSplitter/provider.tf
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
terraform {
|
||||
required_providers {
|
||||
aws = {
|
||||
source = "hashicorp/aws"
|
||||
version = "~> 4.16"
|
||||
}
|
||||
}
|
||||
|
||||
backend "s3" {
|
||||
bucket = "postcode-splitter-terraform-state"
|
||||
key = "terraform.tfstate"
|
||||
region = "eu-west-2"
|
||||
}
|
||||
|
||||
required_version = ">= 1.2.0"
|
||||
}
|
||||
|
|
@ -0,0 +1,26 @@
|
|||
variable "lambda_name" {
|
||||
type = string
|
||||
description = "Logical name of the lambda (e.g. address2uprn)"
|
||||
}
|
||||
|
||||
variable "stage" {
|
||||
description = "Deployment stage (e.g. dev, prod)"
|
||||
type = string
|
||||
}
|
||||
variable "ecr_repo_url" {
|
||||
type = string
|
||||
description = "ECR repository URL (no tag, no digest)"
|
||||
}
|
||||
|
||||
variable "image_digest" {
|
||||
type = string
|
||||
description = "Image digest (sha256:...)"
|
||||
}
|
||||
|
||||
locals {
|
||||
image_uri = "${var.ecr_repo_url}@${var.image_digest}"
|
||||
}
|
||||
|
||||
output "resolved_image_uri" {
|
||||
value = local.image_uri
|
||||
}
|
||||
|
|
@ -319,4 +319,20 @@ module "condition_etl_registry" {
|
|||
name = "condition-etl"
|
||||
stage = var.stage
|
||||
|
||||
|
||||
|
||||
################################################
|
||||
# Postcode Splitter – Lambda ECR
|
||||
################################################
|
||||
module "postcode_splitter_state_bucket" {
|
||||
source = "../modules/tf_state_bucket"
|
||||
bucket_name = "postcode-splitter-terraform-state"
|
||||
|
||||
}
|
||||
|
||||
module "postcode_splitter_registry" {
|
||||
source = "../modules/container_registry"
|
||||
name = "postcode_splitter"
|
||||
stage = var.stage
|
||||
|
||||
}
|
||||
|
|
@ -2,6 +2,10 @@
|
|||
This script prepares the data for the financial model
|
||||
"""
|
||||
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv(".env.local")
|
||||
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
from backend.app.utils import sap_to_epc
|
||||
|
|
@ -24,12 +28,12 @@ from sqlalchemy import func
|
|||
|
||||
# PORTFOLIO_ID = 206
|
||||
# SCENARIOS = [389]
|
||||
PORTFOLIO_ID = 502 # Peabody
|
||||
PORTFOLIO_ID = 524
|
||||
SCENARIOS = [
|
||||
986,
|
||||
1009,
|
||||
]
|
||||
scenario_names = {
|
||||
986: "EPC C",
|
||||
1009: "EPC C; Most Economic",
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue