Merge pull request #696 from Hestia-Homes/feature/lambda_deployment

Feature/lambda deployment
This commit is contained in:
Jun-te Kim 2026-02-04 17:42:06 +00:00 committed by GitHub
commit e8abe6b25b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
43 changed files with 1313 additions and 449 deletions

View file

@ -4,11 +4,11 @@ services:
model-sal:
user: "${UID}:${GID}"
build:
context: ..
dockerfile: .devcontainer/Dockerfile
context: ../..
dockerfile: .devcontainer/asset_list/Dockerfile
command: sleep infinity
volumes:
- ..:/workspaces/model
- ../../:/workspaces/model
networks:
- model-net

View file

@ -21,3 +21,4 @@ pydantic>=1.10.7,<2
sqlmodel
# Formatting
black==26.1.0
dotenv

78
.github/workflows/_build_image.yml vendored Normal file
View file

@ -0,0 +1,78 @@
name: Build Docker image
on:
workflow_call:
inputs:
ecr_repo:
required: true
type: string
dockerfile_path:
required: true
type: string
build_context:
required: false
default: "."
type: string
outputs:
image_digest:
description: "Pushed image digest"
value: ${{ jobs.build.outputs.image_digest }}
ecr_repo_url:
description: "ECR repository URL"
value: ${{ jobs.build.outputs.ecr_repo_url }}
secrets:
AWS_ACCESS_KEY_ID:
required: true
AWS_SECRET_ACCESS_KEY:
required: true
AWS_REGION:
required: true
jobs:
build:
runs-on: ubuntu-latest
outputs:
image_digest: ${{ steps.digest.outputs.image_digest }}
ecr_repo_url: ${{ steps.repo.outputs.ecr_repo_url }}
steps:
- uses: actions/checkout@v4
- uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ secrets.AWS_REGION }}
- uses: aws-actions/amazon-ecr-login@v2
- name: Resolve ECR repo URL
id: repo
run: |
AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
ECR_REPO_URL="${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/${{ inputs.ecr_repo }}"
echo "Resolved ECR repo URL (local var):"
echo "$ECR_REPO_URL"
echo "ecr_repo_url=$ECR_REPO_URL" >> "$GITHUB_OUTPUT"
- name: Build & push image
run: |
IMAGE_URI="${{ steps.repo.outputs.ecr_repo_url }}:${GITHUB_SHA}"
docker build -f ${{ inputs.dockerfile_path }} -t $IMAGE_URI ${{ inputs.build_context }}
docker push $IMAGE_URI
- name: Resolve image digest
id: digest
run: |
DIGEST=$(aws ecr describe-images \
--repository-name ${{ inputs.ecr_repo }} \
--image-ids imageTag=${GITHUB_SHA} \
--query 'imageDetails[0].imageDigest' \
--output text)
echo "image_digest=$DIGEST" >> "$GITHUB_OUTPUT"

91
.github/workflows/_deploy_lambda.yml vendored Normal file
View file

@ -0,0 +1,91 @@
name: Deploy Lambda (Terraform)
on:
workflow_call:
inputs:
lambda_name:
required: true
type: string
lambda_path:
required: true
type: string
stage:
required: true
type: string
ecr_repo:
required: true
type: string
image_digest:
required: true
type: string
secrets:
AWS_ACCESS_KEY_ID:
required: true
AWS_SECRET_ACCESS_KEY:
required: true
AWS_REGION:
required: true
jobs:
deploy:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Debug inputs
run: |
echo "lambda_name=${{ inputs.lambda_name }}"
echo "lambda_path=${{ inputs.lambda_path }}"
echo "stage=${{ inputs.stage }}"
echo "ecr_repo_url=${{ inputs.ecr_repo_url }}"
echo "image_digest=${{ inputs.image_digest }}"
- uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }}
aws-region: ${{ secrets.AWS_REGION }}
- uses: hashicorp/setup-terraform@v3
- uses: aws-actions/amazon-ecr-login@v2
- name: Resolve ECR repo URL
id: repo
env:
AWS_REGION: ${{ secrets.AWS_REGION }}
run: |
AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
ECR_REPO_URL="${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/${{ inputs.ecr_repo }}"
echo "ecr_repo_url=$ECR_REPO_URL" >> "$GITHUB_OUTPUT"
- name: Terraform Init
working-directory: ${{ inputs.lambda_path }}
run: terraform init -reconfigure
- name: Terraform Workspace
working-directory: ${{ inputs.lambda_path }}
run: |
terraform workspace select ${{ inputs.stage }} \
|| terraform workspace new ${{ inputs.stage }}
- name: Terraform Plan
working-directory: ${{ inputs.lambda_path }}
run: |
terraform plan \
-var="stage=${{ inputs.stage }}" \
-var="lambda_name=${{ inputs.lambda_name }}" \
-var="ecr_repo_url=${{ steps.repo.outputs.ecr_repo_url }}" \
-var="image_digest=${{ inputs.image_digest }}" \
-out=lambdaplan
- name: Terraform Apply
working-directory: ${{ inputs.lambda_path }}
run: terraform apply -auto-approve lambdaplan

View file

@ -1,80 +1,98 @@
name: Deploy terraform stack
name: Deploy infrastructure
on:
push:
branches:
- dev
- prod
- "**"
jobs:
deploy:
determine_stage:
runs-on: ubuntu-latest
outputs:
stage: ${{ steps.set-stage.outputs.stage }}
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Setup AWS credentials file
- name: Determine stage from branch
id: set-stage
shell: bash
run: |
mkdir -p ~/.aws
echo "[DevAdmin]" > ~/.aws/credentials
echo "aws_access_key_id = ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}" >> ~/.aws/credentials
echo "aws_secret_access_key = ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}" >> ~/.aws/credentials
echo "[ProdAdmin]" >> ~/.aws/credentials
echo "aws_access_key_id = ${{ secrets.PROD_AWS_ACCESS_KEY_ID }}" >> ~/.aws/credentials
echo "aws_secret_access_key = ${{ secrets.PROD_AWS_SECRET_ACCESS_KEY }}" >> ~/.aws/credentials
BRANCH="${GITHUB_REF_NAME}"
- name: Setup AWS config file
run: |
echo "[profile DevAdmin]" > ~/.aws/config
echo "region = eu-west-2" >> ~/.aws/config
echo "[profile ProdAdmin]" >> ~/.aws/config
echo "region = eu-west-2" >> ~/.aws/config
if [[ "$BRANCH" == "prod" ]]; then
echo "stage=prod" >> "$GITHUB_OUTPUT"
- name: Setup Terraform
uses: hashicorp/setup-terraform@v1
with:
terraform_version: 1.5.2
elif [[ "$BRANCH" == "dev" ]]; then
echo "stage=dev" >> "$GITHUB_OUTPUT"
- name: Configure AWS credentials (DevAdmin)
uses: aws-actions/configure-aws-credentials@v1
else
echo "stage=dev" >> "$GITHUB_OUTPUT"
fi
# ============================================================
# 1⃣ Shared Terraform (infra)
# ============================================================
shared_terraform:
needs: determine_stage
runs-on: ubuntu-latest
env:
STAGE: ${{ needs.determine_stage.outputs.stage }}
steps:
- uses: actions/checkout@v4
- uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
aws-region: eu-west-2
env:
AWS_PROFILE: "DevAdmin"
aws-region: ${{ secrets.DEV_AWS_REGION }}
- uses: hashicorp/setup-terraform@v3
- name: Terraform Init
run: cd infrastructure/terraform && terraform init
working-directory: infrastructure/terraform/shared
run: terraform init -reconfigure
- name: Terraform Workspace
run: |
BRANCH_NAME=$(echo "${{ github.ref }}" | sed -e "s/^refs\/heads\///")
cd infrastructure/terraform
terraform workspace select ${BRANCH_NAME} || terraform workspace new ${BRANCH_NAME}
working-directory: infrastructure/terraform/shared
run: terraform workspace select ${STAGE} || terraform workspace new ${STAGE}
- name: Terraform Plan
run: |
BRANCH_NAME=$(echo "${{ github.ref }}" | sed -e "s/^refs\/heads\///")
cd infrastructure/terraform && terraform plan -var-file=${BRANCH_NAME}.tfvars
working-directory: infrastructure/terraform/shared
run: terraform plan -var-file=${STAGE}.tfvars -out=tfplan
- name: Deploy to Dev
if: github.ref == 'refs/heads/dev'
run: cd infrastructure/terraform && terraform apply -var-file=dev.tfvars -auto-approve
env:
name: dev
- name: Terraform Apply
if: env.STAGE == 'prod'
working-directory: infrastructure/terraform/shared
run: terraform apply -auto-approve tfplan
- name: Configure AWS credentials (ProdAdmin)
uses: aws-actions/configure-aws-credentials@v1
with:
aws-access-key-id: ${{ secrets.PROD_AWS_ACCESS_KEY_ID }}
aws-secret-access-key: ${{ secrets.PROD_AWS_SECRET_ACCESS_KEY }}
aws-region: eu-west-2
env:
AWS_PROFILE: "ProdAdmin"
# ============================================================
# 2⃣ Build Address 2 UPRN image and Push
# ============================================================
address2uprn_image:
needs: [determine_stage, shared_terraform]
uses: ./.github/workflows/_build_image.yml
with:
ecr_repo: address2uprn-${{ needs.determine_stage.outputs.stage }}
dockerfile_path: backend/address2UPRN/Dockerfile
build_context: backend/address2UPRN
secrets:
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
- name: Deploy to Prod
if: github.ref == 'refs/heads/prod'
run: cd infrastructure/terraform && terraform apply -var-file=prod.tfvars -auto-approve
env:
name: prod
# ============================================================
# 3⃣ Deploy Address 2 UPRN Lambda
# ============================================================
address2uprn_lambda:
needs: [address2uprn_image, determine_stage]
uses: ./.github/workflows/_deploy_lambda.yml
with:
lambda_name: address2uprn
lambda_path: infrastructure/terraform/lambda/address2UPRN
stage: ${{ needs.determine_stage.outputs.stage }}
ecr_repo: address2uprn-${{ needs.determine_stage.outputs.stage }}
image_digest: ${{ needs.address2uprn_image.outputs.image_digest }}
secrets:
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}

View file

@ -12,23 +12,34 @@ from asset_list.utils import get_data
from dotenv import load_dotenv
from backend.SearchEpc import SearchEpc
load_dotenv(dotenv_path="backend/.env")
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN", "a2Nvbm5rb3dsZXNzYXJAZ21haWwuY29tOjY5MGJiMWM0NmIyOGI5ZDUxYzAxMzQzYzNiZGNlZGJjZDNmODQwMzA=")
EPC_AUTH_TOKEN = os.getenv(
"EPC_AUTH_TOKEN",
)
def extract_address1(asset_list, full_address_col, postcode_col, method="first_two_words"):
def extract_address1(
asset_list, full_address_col, postcode_col, method="first_two_words"
):
if method == "first_two_words":
asset_list["address1_extracted"] = asset_list[full_address_col].str.split(" ").str[:2].str.join(" ")
asset_list["address1_extracted"] = (
asset_list[full_address_col].str.split(" ").str[:2].str.join(" ")
)
return asset_list
if method == "first_word":
asset_list["address1_extracted"] = asset_list[full_address_col].str.split(" ").str[0]
asset_list["address1_extracted"] = (
asset_list[full_address_col].str.split(" ").str[0]
)
return asset_list
if method == "house_number_extraction":
asset_list["address1_extracted"] = asset_list.apply(
lambda x: SearchEpc.get_house_number(address=x[full_address_col], postcode=x[postcode_col]),
axis=1
lambda x: SearchEpc.get_house_number(
address=x[full_address_col], postcode=x[postcode_col]
),
axis=1,
)
return asset_list
@ -57,15 +68,11 @@ def app():
EPC recommendations
Property UPRN
"""
<<<<<<< HEAD
data_folder = ("/workspaces/model/asset_list")
data_filename = "assets.xlsx"
=======
data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Hackney"
data_filename = "Domna SHF Wave 3 (3).xlsx"
sheet_name = "Domna Wave 3"
postcode_column = 'Postcode'
postcode_column = "Postcode"
address1_column = "Address 1"
address1_method = None
fulladdress_column = None
@ -96,15 +103,16 @@ def app():
landlord_block_reference = None
# Peabody data for cleaning
data_folder = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
"Project/data_validation")
data_folder = (
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
"Project/data_validation"
)
data_filename = "to_standardise_uprns.xlsx"
>>>>>>> 3874da6177cbcc37f7a488bec0a06e387906653c
sheet_name = "Sheet1"
postcode_column = 'Postcode'
postcode_column = "Postcode"
address1_column = None
address1_method = 'house_number_extraction'
fulladdress_column = 'Address'
address1_method = "house_number_extraction"
fulladdress_column = "Address"
address_cols_to_concat = None
missing_postcodes_method = None
landlord_year_built = None
@ -155,49 +163,62 @@ def app():
landlord_existing_pv=landlord_existing_pv,
landlord_sap=landlord_sap,
landlord_block_reference=landlord_block_reference,
phase=phase
phase=phase,
)
asset_list.init_standardise()
# We produce the new maps, which can be saved for future useage
new_property_type_map = {
k: v for k, v in (
asset_list.variable_mappings[asset_list.landlord_property_type] if
asset_list.landlord_property_type else {}
k: v
for k, v in (
asset_list.variable_mappings[asset_list.landlord_property_type]
if asset_list.landlord_property_type
else {}
).items()
if k not in PROPERTY_MAPPING
}
new_built_form_map = {
k: v for k, v in (
asset_list.variable_mappings[asset_list.landlord_built_form] if
asset_list.landlord_built_form else {}
k: v
for k, v in (
asset_list.variable_mappings[asset_list.landlord_built_form]
if asset_list.landlord_built_form
else {}
).items()
if k not in BUILT_FORM_MAPPINGS
}
new_wall_map = {
k: v for k, v in (
asset_list.variable_mappings[asset_list.landlord_wall_construction] if
asset_list.landlord_wall_construction else {}
k: v
for k, v in (
asset_list.variable_mappings[asset_list.landlord_wall_construction]
if asset_list.landlord_wall_construction
else {}
).items()
if k not in WALL_CONSTRUCTION_MAPPINGS
}
new_heating_map = {
k: v for k, v in (
asset_list.variable_mappings[asset_list.landlord_heating_system] if
asset_list.landlord_heating_system else {}
k: v
for k, v in (
asset_list.variable_mappings[asset_list.landlord_heating_system]
if asset_list.landlord_heating_system
else {}
).items()
if k not in HEATING_MAPPINGS
}
new_existing_pv_map = {
k: v for k, v in (
asset_list.variable_mappings[asset_list.landlord_existing_pv] if asset_list.landlord_existing_pv else {}
k: v
for k, v in (
asset_list.variable_mappings[asset_list.landlord_existing_pv]
if asset_list.landlord_existing_pv
else {}
).items()
if k not in EXISTING_PV_MAPPINGS
}
new_roof_construction_map = {
k: v for k, v in (
asset_list.variable_mappings[asset_list.landlord_roof_construction] if
asset_list.landlord_roof_construction else {}
k: v
for k, v in (
asset_list.variable_mappings[asset_list.landlord_roof_construction]
if asset_list.landlord_roof_construction
else {}
).items()
if k not in ROOF_CONSTRUCTION_MAPPINGS
}
@ -211,7 +232,7 @@ def app():
outcomes_address=outcomes_address,
outcomes_postcode=outcomes_postcode,
outcomes_houseno=outcomes_houseno,
outcomes_id=outcomes_id
outcomes_id=outcomes_id,
)
asset_list.flag_survey_master(
@ -245,14 +266,16 @@ def app():
skip = max(chunk_indexes)
if any(x in folder_contents for x in downloaded_files):
skip = max([i for i in chunk_indexes if filename.format(i=i) in folder_contents])
skip = max(
[i for i in chunk_indexes if filename.format(i=i) in folder_contents]
)
for i in range(0, len(asset_list.standardised_asset_list), chunk_size):
print(f"Processing chunk {i} to {i + chunk_size}")
if skip is not None and not force_retrieve_data:
if i <= skip:
continue
chunk = asset_list.standardised_asset_list[i:i + chunk_size]
chunk = asset_list.standardised_asset_list[i : i + chunk_size]
epc_data_chunk, errors_chunk, no_epc_chunk = get_data(
df=chunk,
row_id_name=asset_list.DOMNA_PROPERTY_ID,
@ -264,7 +287,7 @@ def app():
built_form_column=AssetList.STANDARD_BUILT_FORM,
manual_uprn_map=manual_uprn_map,
epc_api_only=epc_api_only,
epc_auth_token=EPC_AUTH_TOKEN
epc_auth_token=EPC_AUTH_TOKEN,
)
# We now retrieve any failed properties
@ -287,7 +310,9 @@ def app():
# Append the failed data to the main data
# Store the chunk locally as a csv
pd.DataFrame(epc_data_chunk).to_csv(os.path.join(data_folder, f"Chunks/Chunk {i}.csv"), index=False)
pd.DataFrame(epc_data_chunk).to_csv(
os.path.join(data_folder, f"Chunks/Chunk {i}.csv"), index=False
)
# Store the errors and no-data locally
with open(os.path.join(data_folder, f"Chunks/Chunk {i} errors.json"), "w") as f:
json.dump(errors_chunk, f)
@ -318,7 +343,9 @@ def app():
unique_recommendations = set()
for _, row in recommendations_df.iterrows():
unique_recommendations.update([rec["improvement-summary-text"] for rec in row["recommendations"]])
unique_recommendations.update(
[rec["improvement-summary-text"] for rec in row["recommendations"]]
)
columns = [asset_list.DOMNA_PROPERTY_ID] + list(unique_recommendations)
transformed_data = []
@ -338,20 +365,24 @@ def app():
transformed_df = pd.DataFrame(transformed_data)
for col in [
"Floor insulation (solid floor)",
"Floor insulation", "Floor insulation (suspended floor)"
"Floor insulation",
"Floor insulation (suspended floor)",
]:
if col not in transformed_df.columns:
transformed_df[col] = False
transformed_df = transformed_df[
[
asset_list.DOMNA_PROPERTY_ID, "Floor insulation (solid floor)",
"Floor insulation", "Floor insulation (suspended floor)"
asset_list.DOMNA_PROPERTY_ID,
"Floor insulation (solid floor)",
"Floor insulation",
"Floor insulation (suspended floor)",
]
]
transformed_df["epc_has_floor_recommendation"] = (
transformed_df["Floor insulation (solid floor)"] | transformed_df["Floor insulation"] |
transformed_df["Floor insulation (suspended floor)"]
transformed_df["Floor insulation (solid floor)"]
| transformed_df["Floor insulation"]
| transformed_df["Floor insulation (suspended floor)"]
)
# Get the find my epc data
@ -364,21 +395,20 @@ def app():
find_my_epc_data.append(
{
asset_list.DOMNA_PROPERTY_ID: x[asset_list.DOMNA_PROPERTY_ID],
**x["find_my_epc_data"]
**x["find_my_epc_data"],
}
)
else:
find_my_epc_data.append(
{
asset_list.DOMNA_PROPERTY_ID: x[asset_list.DOMNA_PROPERTY_ID]
}
{asset_list.DOMNA_PROPERTY_ID: x[asset_list.DOMNA_PROPERTY_ID]}
)
find_my_epc_data = pd.DataFrame(find_my_epc_data)
find_my_epc_data = find_my_epc_data.merge(
transformed_df[[asset_list.DOMNA_PROPERTY_ID, "epc_has_floor_recommendation"]],
how="left", on=asset_list.DOMNA_PROPERTY_ID
how="left",
on=asset_list.DOMNA_PROPERTY_ID,
)
# We check if we get the solar pv column:
@ -388,24 +418,26 @@ def app():
# Retrieve just the data we need
epc_df = epc_df[
[asset_list.DOMNA_PROPERTY_ID] + list(asset_list.EPC_API_DATA_NAMES.keys())
].rename(
columns=asset_list.EPC_API_DATA_NAMES
)
].rename(columns=asset_list.EPC_API_DATA_NAMES)
# Look for columns not in the find my EPC data, which will have happened if we didn't
# retrieve it in the first place
missed_find_epc_cols = [c for c in list(asset_list.FIND_EPC_DATA_NAMES.keys()) if c not in find_my_epc_data.columns]
missed_find_epc_cols = [
c
for c in list(asset_list.FIND_EPC_DATA_NAMES.keys())
if c not in find_my_epc_data.columns
]
if missed_find_epc_cols:
for c in missed_find_epc_cols:
find_my_epc_data[c] = None
epc_df = epc_df.merge(
find_my_epc_data[
[asset_list.DOMNA_PROPERTY_ID, "epc_has_floor_recommendation"] + list(asset_list.FIND_EPC_DATA_NAMES.keys())
]
.rename(columns=asset_list.FIND_EPC_DATA_NAMES),
[asset_list.DOMNA_PROPERTY_ID, "epc_has_floor_recommendation"]
+ list(asset_list.FIND_EPC_DATA_NAMES.keys())
].rename(columns=asset_list.FIND_EPC_DATA_NAMES),
how="left",
on=asset_list.DOMNA_PROPERTY_ID
on=asset_list.DOMNA_PROPERTY_ID,
)
asset_list.merge_data(epc_df)
@ -422,7 +454,10 @@ def app():
asset_list.get_work_figures()
# Store as an excel
filename = os.path.join(data_folder, ".".join(data_filename.split(".")[:-1])) + " - Standardised.xlsx"
filename = (
os.path.join(data_folder, ".".join(data_filename.split(".")[:-1]))
+ " - Standardised.xlsx"
)
# Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data
# Determine inspections priority
@ -446,26 +481,42 @@ def app():
# )
with pd.ExcelWriter(filename) as writer:
asset_list.standardised_asset_list.to_excel(writer, sheet_name="Standardised Asset List", index=False)
asset_list.standardised_asset_list.to_excel(
writer, sheet_name="Standardised Asset List", index=False
)
if asset_list.block_analysis_df is not None:
asset_list.block_analysis_df.to_excel(writer, sheet_name="Block Analysis", index=False)
asset_list.block_analysis_df.to_excel(
writer, sheet_name="Block Analysis", index=False
)
# If we have outcomes, we add a tab with the outcomes
if not asset_list.outcomes_for_output.empty:
asset_list.outcomes_for_output.to_excel(writer, sheet_name="Outcomes", index=False)
asset_list.outcomes_for_output.to_excel(
writer, sheet_name="Outcomes", index=False
)
if not asset_list.unmatched_submissions.empty:
asset_list.unmatched_submissions.to_excel(writer, sheet_name="Unmatched Submissions", index=False)
asset_list.unmatched_submissions.to_excel(
writer, sheet_name="Unmatched Submissions", index=False
)
if not asset_list.outcomes_no_match.empty:
asset_list.outcomes_no_match.to_excel(writer, sheet_name="Unmatched Outcomes", index=False)
asset_list.outcomes_no_match.to_excel(
writer, sheet_name="Unmatched Outcomes", index=False
)
if not asset_list.ecosurv_no_match.empty:
asset_list.ecosurv_no_match.to_excel(writer, sheet_name="Unmatched Ecosurv", index=False)
asset_list.ecosurv_no_match.to_excel(
writer, sheet_name="Unmatched Ecosurv", index=False
)
if not asset_list.geographical_areas.empty:
asset_list.geographical_areas.to_excel(writer, sheet_name="Geographical Areas", index=False)
asset_list.geographical_areas.to_excel(
writer, sheet_name="Geographical Areas", index=False
)
# Store dupes
if asset_list.duplicated_addresses is not None:
if not asset_list.duplicated_addresses.empty:
asset_list.duplicated_addresses.to_excel(writer, sheet_name="Duplicate Properties", index=False)
asset_list.duplicated_addresses.to_excel(
writer, sheet_name="Duplicate Properties", index=False
)

View file

@ -0,0 +1,7 @@
FROM public.ecr.aws/lambda/python:3.10
# Copy function code
COPY main.py .
# Set the handler
CMD ["main.handler"]

View file

@ -14,6 +14,9 @@ EPC_AUTH_TOKEN = os.getenv(
"EPC_AUTH_TOKEN",
)
if EPC_AUTH_TOKEN is None:
raise RuntimeError("EPC_AUTH_TOKEN not defined in env")
import re
from difflib import SequenceMatcher
from typing import Set
@ -38,6 +41,34 @@ def levenshtein(a: str, b: str) -> float:
def tokenise(s: str) -> Set[str]:
return set(s.split())
def extract_building_number(s: str) -> str | None:
"""
Extract the main building number (NOT flat/unit).
Assumes formats like:
- '42 moreton road'
- 'flat 3 42 moreton road'
"""
tokens = s.split()
# remove flat/unit context
cleaned = []
skip_next = False
for t in tokens:
if t in ("flat", "apt", "apartment", "unit"):
skip_next = True
continue
if skip_next:
skip_next = False
continue
cleaned.append(t)
# first remaining number is building number
for t in cleaned:
if re.fullmatch(r"\d+[a-z]?", t):
return t
return None
a_norm = normalise_address(a)
b_norm = normalise_address(b)
@ -52,6 +83,13 @@ def levenshtein(a: str, b: str) -> float:
if nums_a and nums_b and nums_a.isdisjoint(nums_b):
return 0.0
# 🔒 HARD GUARD: building number must match
bld_a = extract_building_number(a_norm)
bld_b = extract_building_number(b_norm)
if bld_a and bld_b and bld_a != bld_b:
return 0.0
# --- order-sensitive flat/building guard ---
seq_a = extract_number_sequence(a_norm)
seq_b = extract_number_sequence(b_norm)
@ -418,6 +456,10 @@ def run_all_test():
get_uprn("46 Oswald Street", "E5 0BT"), False
) # this one return "flat 1, in 1 semley gate"
get_uprn_candidates(get_epc_data_with_postcode("e5 0bt"), "48 Oswald Street")
get_uprn_candidates(
get_epc_data_with_postcode("Cr2 7dl"),
"FLAT 3; 42 MORETON ROAD, SOUTH CROYDON, SURREY",
)
if __name__ == "__main__":
@ -511,6 +553,11 @@ if __name__ == "__main__":
)
def handler(event, context):
print("hello world")
return {"statusCode": 200, "body": "hello world"}
# TO do function dispatcher,
# get_uprn_candidates(get_epc_data_with_postcode("E9 5NH"),"Flat 1, 5 Semley Gate" and Flat 5, 1 Semley Gate)

View file

@ -115,11 +115,16 @@ FLAT 43 Goodstone Court,HA1 4FL,10070269095
8 Genteel House Samara Drive,UB1 1FJ,12189842
9 Genteel House Samara Drive,UB1 1FJ,12189843
10 Genteel House Samara Drive,UB1 1FJ,12189844
1 ASH TREE HOUSE,SE5 0TE,10009803979
3 ASH TREE HOUSE,SE5 0TE,10009803981
5 ASH TREE HOUSE,SE5 0TE,10009803983
8 ASH TREE HOUSE,SE5 0TE,10009803986
12 ASH TREE HOUSE,SE5 0TE,10009803990
1 ASH TREE HOUSE,SE5 0TE,None
"Flat 1 Ash Tree House, 2, Thompson Avenue",SE5 0TE,10009803979
3 ASH TREE HOUSE,SE5 0TE,None
Flat 3 ASH TREE HOUSE,SE5 0TE,10009803981
5 ASH TREE HOUSE,SE5 0TE,None
Flat 5 ASH TREE HOUSE,SE5 0TE,10009803983
Flat 8 ASH TREE HOUSE,SE5 0TE,10009803986
8 ASH TREE HOUSE,SE5 0TE,None
Flat 12 ASH TREE HOUSE,SE5 0TE,10009803990
12 ASH TREE HOUSE,SE5 0TE,None
FLAT 1 599 HARROW ROAD,W10 4RA,217113930
FLAT 2 599 HARROW ROAD,W10 4RA,217113931
FLAT 3 599 HARROW ROAD,W10 4RA,None
@ -164,4 +169,198 @@ FLAT 8 599 HARROW ROAD,W10 4RA,None
24b Honley Road,SE6 2HZ,None
FLAT B 158 LEAHURST ROAD,SE13 5NL,100021976974
2 COLLEGE HOUSE,CM7 1JS,100091449870
3 COLLEGE HOUSE,CM7 1JS,100091449871
3 COLLEGE HOUSE,CM7 1JS,100091449871
1 Anita Street,M4 5DU,None
2 Anita Street,M4 5DU,77123061
5 Anita Street,M4 5DU,77123081
6 Anita Street,M4 5DU,77123082
8 Anita Street,M4 5DU,None
9 Anita Street,M4 5DU,None
10 Anita Street,M4 5DU,77123051
12 Anita Street,M4 5DU,77123053
19 Anita Street,M4 5DU,None
22 Anita Street,M4 5DU,None
26 Anita Street,M4 5DU,77123068
28 Anita Street,M4 5DU,None
30 Anita Street,M4 5DU,None
32 Anita Street,M4 5DU,None
33 Anita Street,M4 5DU,77123076
34 Anita Street,M4 5DU,None
35 Anita Street,M4 5DU,77123078
36 Anita Street,M4 5DU,77123079
23 George Leigh Street,M4 5DR,77123171
25 George Leigh Street,M4 5DR,None
35 George Leigh Street,M4 5DR,77123177
39 George Leigh Street,M4 5DR,77123179
41 George Leigh Street,M4 5DR,None
43 George Leigh Street,M4 5DR,None
49 George Leigh Street,M4 5DR,None
51 George Leigh Street,M4 5DR,77123185
55 George Leigh Street,M4 5DR,None
57 George Leigh Street,M4 5DR,None
"1a, Victoria Square",M4 5DX,77211153
2a Victoria Square ,M4 5DX,None
"4a, Victoria Square",M4 5DX,77211155
5a Victoria Square,M4 5DX,77211156
6a Victoria Square,M4 5DX,77211157
7a Victoria Square,M4 5DX,77211158
8a Victoria Square,M4 5DX,77211159
9a Victoria Square,M4 5DX,77211160
10a Victoria Square,M4 5DX,77211161
11a Victoria Square,M4 5DX,77211162
12a Victoria Square,M4 5DX,77211163
13a Victoria Square,M4 5DX,77211164
14a Victoria Square,M4 5DX,77211165
15a Victoria Square,M4 5DX,77211166
16a Victoria Square,M4 5DX,77211167
17a Victoria Square,M4 5DX,77211168
18a Victoria Square,M4 5DX,77211169
19a Victoria Square,M4 5DX,77211170
20a Victoria Square,M4 5DX,77211171
21a Victoria Square,M4 5DY,77211172
22a Victoria Square,M4 5DY,None
23a Victoria Square,M4 5DY,77211174
24a Victoria Square,M4 5DY,77211175
25a Victoria Square,M4 5DY,77211176
26a Victoria Square,M4 5DY,77211177
27a Victoria Square,M4 5DY,77211178
28a Victoria Square,M4 5DY,None
29a Victoria Square,M4 5DY,77211180
30a Victoria Square,M4 5DY,77211181
31a Victoria Square,M4 5DY,77211182
32a Victoria Square,M4 5DY,77211183
33a Victoria Square,M4 5DY,77211184
34a Victoria Square,M4 5DY,77211185
35a Victoria Square,M4 5DY,None
36a Victoria Square,M4 5DY,77211187
37a Victoria Square,M4 5DY,77211188
38a Victoria Square,M4 5DY,77211189
39a Victoria Square,M4 5DY,77211190
40a Victoria Square,M4 5DY,None
41a Victoria Square,M4 5DY,77211192
42a Victoria Square,M4 5DY,77211193
43a Victoria Square,M4 5DY,77211194
44a Victoria Square,M4 5DY,77211195
45a Victoria Square,M4 5DY,77211196
46a Victoria Square,M4 5DY,77211197
47a Victoria Square,M4 5DY,77211198
48a Victoria Square,M4 5DY,77211199
49a Victoria Square,M4 5DY,77211200
50a Victoria Square,M4 5DY,77211201
51a Victoria Square,M4 5DY,77211202
52a Victoria Square,M4 5DY,77211203
53a Victoria Square,M4 5DY,77211204
54a Victoria Square,M4 5DY,77211205
55a Victoria Square,M4 5DY,77211206
56a Victoria Square,M4 5DZ,77211207
57a Victoria Square,M4 5DZ,None
58a Victoria Square,M4 5DZ,77211209
59a Victoria Square,M4 5DZ,77211210
60a Victoria Square,M4 5DZ,77211211
61a Victoria Square,M4 5DZ,77211212
62a Victoria Square,M4 5DZ,77211213
63a Victoria Square,M4 5DZ,None
64a Victoria Square,M4 5DZ,77211215
65a Victoria Square,M4 5DZ,77211216
66a Victoria Square,M4 5DZ,None
67a Victoria Square,M4 5DZ,None
68a Victoria Square,M4 5DZ,77211219
69a Victoria Square,M4 5DZ,77211220
70a Victoria Square,M4 5DZ,77211221
71a Victoria Square,M4 5DZ,77211222
72a Victoria Square,M4 5DZ,77211223
73a Victoria Square,M4 5DZ,77211224
74a Victoria Square,M4 5DZ,None
75a Victoria Square,M4 5DZ,77211226
76a Victoria Square,M4 5DZ,77211227
77a Victoria Square,M4 5DZ,None
78a Victoria Square,M4 5DZ,77211229
79a Victoria Square,M4 5DZ,77211230
80a Victoria Square,M4 5DZ,77211231
81a Victoria Square,M4 5DZ,77211232
82 Victoria Square,M4 5DZ,None
83a Victoria Square,M4 5DZ,77211234
84a Victoria Square,M4 5DZ,None
85a Victoria Square,M4 5DZ,77211236
86a Victoria Square,M4 5DZ,77211237
87a Victoria Square,M4 5DZ,77211238
88a Victoria Square,M4 5DZ,None
89a Victoria Square,M4 5DZ,77211240
90a Victoria Square,M4 5DZ,77211241
91a Victoria Square,M4 5DZ,77211242
92a Victoria Square,M4 5DZ,77211243
93a Victoria Square,M4 5EA,77211244
94a Victoria Square,M4 5EA,None
95a Victoria Square,M4 5EA,77211246
96a Victoria Square,M4 5EA,77211247
97a Victoria Square,M4 5EA,77211248
98a Victoria Square,M4 5EA,77211249
99a Victoria Square,M4 5EA,77211250
100a Victoria Square,M4 5EA,77211251
101a Victoria Square,M4 5EA,None
102a Victoria Square,M4 5EA,None
103a Victoria Square,M4 5EA,77211254
104a Victoria Square,M4 5EA,77211255
105a Victoria Square,M4 5EA,None
106a Victoria Square,M4 5EA,77211257
107a Victoria Square,M4 5EA,77211258
108a Victoria Square,M4 5EA,77211259
109a Victoria Square,M4 5EA,77211260
110a Victoria Square,M4 5EA,77211261
111a Victoria Square,M4 5EA,77211262
112a Victoria Square,M4 5EA,None
113a Victoria Square,M4 5EA,77211264
114a Victoria Square,M4 5EA,77211265
115a Victoria Square,M4 5EA,77211266
116a Victoria Square,M4 5EA,77211267
117a Victoria Square,M4 5EA,None
118a Victoria Square,M4 5EA,None
119a Victoria Square,M4 5EA,77211270
120a Victoria Square,M4 5EA,77211271
121a Victoria Square,M4 5EA,77211272
122a Victoria Square,M4 5EA,77211273
123a Victoria Square,M4 5EA,77211274
124a Victoria Square,M4 5EA,None
125a Victoria Square,M4 5EA,77211276
126a Victoria Square,M4 5EA,77211277
127a Victoria Square,M4 5EA,77211278
128a Victoria Square,M4 5EA,77211279
129a Victoria Square,M4 5EA,77211280
130a Victoria Square,M4 5FA,77211281
131a Victoria Square,M4 5FA,77211282
132a Victoria Square,M4 5FA,77211283
133a Victoria Square,M4 5FA,None
134a Victoria Square,M4 5FA,77211285
135a Victoria Square,M4 5FA,77211286
136a Victoria Square,M4 5FA,77211287
137a Victoria Square,M4 5FA,77211288
138a Victoria Square,M4 5FA,77211289
139a Victoria Square,M4 5FA,77211290
140a Victoria Square,M4 5FA,77211291
141a Victoria Square,M4 5FA,77211292
142a Victoria Square,M4 5FA,77211293
143a Victoria Square,M4 5FA,77211294
144a Victoria Square,M4 5FA,77211295
145a Victoria Square,M4 5FA,None
146a Victoria Square,M4 5FA,77211297
147a Victoria Square,M4 5FA,77211298
148a Victoria Square,M4 5FA,77211299
149a Victoria Square,M4 5FA,77211300
150a Victoria Square,M4 5FA,77211301
151a Victoria Square,M4 5FA,None
152a Victoria Square,M4 5FA,77211303
153a Victoria Square,M4 5FA,None
154a Victoria Square,M4 5FA,77211305
155a Victoria Square,M4 5FA,None
156a Victoria Square,M4 5FA,77211307
157a Victoria Square,M4 5FA,77211308
158a Victoria Square,M4 5FA,77211309
159a Victoria Square,M4 5FA,None
160a Victoria Square,M4 5FA,77211311
161a Victoria Square,M4 5FA,None
162a Victoria Square,M4 5FA,None
163a Victoria Square,M4 5FA,77211314
164a Victoria Square,M4 5FA,77211315
165a Victoria Square,M4 5FA,77211316
166a Victoria Square,M4 5FA,None
"FLAT 3; 42 MORETON ROAD, SOUTH CROYDON, SURREY",CR2 7DL,None
1 User Input Postcode Manual UPRN Code
115 8 Genteel House Samara Drive UB1 1FJ 12189842
116 9 Genteel House Samara Drive UB1 1FJ 12189843
117 10 Genteel House Samara Drive UB1 1FJ 12189844
118 1 ASH TREE HOUSE SE5 0TE 10009803979 None
119 3 ASH TREE HOUSE Flat 1 Ash Tree House, 2, Thompson Avenue SE5 0TE 10009803981 10009803979
120 5 ASH TREE HOUSE 3 ASH TREE HOUSE SE5 0TE 10009803983 None
121 8 ASH TREE HOUSE Flat 3 ASH TREE HOUSE SE5 0TE 10009803986 10009803981
122 12 ASH TREE HOUSE 5 ASH TREE HOUSE SE5 0TE 10009803990 None
123 Flat 5 ASH TREE HOUSE SE5 0TE 10009803983
124 Flat 8 ASH TREE HOUSE SE5 0TE 10009803986
125 8 ASH TREE HOUSE SE5 0TE None
126 Flat 12 ASH TREE HOUSE SE5 0TE 10009803990
127 12 ASH TREE HOUSE SE5 0TE None
128 FLAT 1 599 HARROW ROAD W10 4RA 217113930
129 FLAT 2 599 HARROW ROAD W10 4RA 217113931
130 FLAT 3 599 HARROW ROAD W10 4RA None
169 24b Honley Road SE6 2HZ None
170 FLAT B 158 LEAHURST ROAD SE13 5NL 100021976974
171 2 COLLEGE HOUSE CM7 1JS 100091449870
172 3 COLLEGE HOUSE CM7 1JS 100091449871
173 1 Anita Street M4 5DU None
174 2 Anita Street M4 5DU 77123061
175 5 Anita Street M4 5DU 77123081
176 6 Anita Street M4 5DU 77123082
177 8 Anita Street M4 5DU None
178 9 Anita Street M4 5DU None
179 10 Anita Street M4 5DU 77123051
180 12 Anita Street M4 5DU 77123053
181 19 Anita Street M4 5DU None
182 22 Anita Street M4 5DU None
183 26 Anita Street M4 5DU 77123068
184 28 Anita Street M4 5DU None
185 30 Anita Street M4 5DU None
186 32 Anita Street M4 5DU None
187 33 Anita Street M4 5DU 77123076
188 34 Anita Street M4 5DU None
189 35 Anita Street M4 5DU 77123078
190 36 Anita Street M4 5DU 77123079
191 23 George Leigh Street M4 5DR 77123171
192 25 George Leigh Street M4 5DR None
193 35 George Leigh Street M4 5DR 77123177
194 39 George Leigh Street M4 5DR 77123179
195 41 George Leigh Street M4 5DR None
196 43 George Leigh Street M4 5DR None
197 49 George Leigh Street M4 5DR None
198 51 George Leigh Street M4 5DR 77123185
199 55 George Leigh Street M4 5DR None
200 57 George Leigh Street M4 5DR None
201 1a, Victoria Square M4 5DX 77211153
202 2a Victoria Square M4 5DX None
203 4a, Victoria Square M4 5DX 77211155
204 5a Victoria Square M4 5DX 77211156
205 6a Victoria Square M4 5DX 77211157
206 7a Victoria Square M4 5DX 77211158
207 8a Victoria Square M4 5DX 77211159
208 9a Victoria Square M4 5DX 77211160
209 10a Victoria Square M4 5DX 77211161
210 11a Victoria Square M4 5DX 77211162
211 12a Victoria Square M4 5DX 77211163
212 13a Victoria Square M4 5DX 77211164
213 14a Victoria Square M4 5DX 77211165
214 15a Victoria Square M4 5DX 77211166
215 16a Victoria Square M4 5DX 77211167
216 17a Victoria Square M4 5DX 77211168
217 18a Victoria Square M4 5DX 77211169
218 19a Victoria Square M4 5DX 77211170
219 20a Victoria Square M4 5DX 77211171
220 21a Victoria Square M4 5DY 77211172
221 22a Victoria Square M4 5DY None
222 23a Victoria Square M4 5DY 77211174
223 24a Victoria Square M4 5DY 77211175
224 25a Victoria Square M4 5DY 77211176
225 26a Victoria Square M4 5DY 77211177
226 27a Victoria Square M4 5DY 77211178
227 28a Victoria Square M4 5DY None
228 29a Victoria Square M4 5DY 77211180
229 30a Victoria Square M4 5DY 77211181
230 31a Victoria Square M4 5DY 77211182
231 32a Victoria Square M4 5DY 77211183
232 33a Victoria Square M4 5DY 77211184
233 34a Victoria Square M4 5DY 77211185
234 35a Victoria Square M4 5DY None
235 36a Victoria Square M4 5DY 77211187
236 37a Victoria Square M4 5DY 77211188
237 38a Victoria Square M4 5DY 77211189
238 39a Victoria Square M4 5DY 77211190
239 40a Victoria Square M4 5DY None
240 41a Victoria Square M4 5DY 77211192
241 42a Victoria Square M4 5DY 77211193
242 43a Victoria Square M4 5DY 77211194
243 44a Victoria Square M4 5DY 77211195
244 45a Victoria Square M4 5DY 77211196
245 46a Victoria Square M4 5DY 77211197
246 47a Victoria Square M4 5DY 77211198
247 48a Victoria Square M4 5DY 77211199
248 49a Victoria Square M4 5DY 77211200
249 50a Victoria Square M4 5DY 77211201
250 51a Victoria Square M4 5DY 77211202
251 52a Victoria Square M4 5DY 77211203
252 53a Victoria Square M4 5DY 77211204
253 54a Victoria Square M4 5DY 77211205
254 55a Victoria Square M4 5DY 77211206
255 56a Victoria Square M4 5DZ 77211207
256 57a Victoria Square M4 5DZ None
257 58a Victoria Square M4 5DZ 77211209
258 59a Victoria Square M4 5DZ 77211210
259 60a Victoria Square M4 5DZ 77211211
260 61a Victoria Square M4 5DZ 77211212
261 62a Victoria Square M4 5DZ 77211213
262 63a Victoria Square M4 5DZ None
263 64a Victoria Square M4 5DZ 77211215
264 65a Victoria Square M4 5DZ 77211216
265 66a Victoria Square M4 5DZ None
266 67a Victoria Square M4 5DZ None
267 68a Victoria Square M4 5DZ 77211219
268 69a Victoria Square M4 5DZ 77211220
269 70a Victoria Square M4 5DZ 77211221
270 71a Victoria Square M4 5DZ 77211222
271 72a Victoria Square M4 5DZ 77211223
272 73a Victoria Square M4 5DZ 77211224
273 74a Victoria Square M4 5DZ None
274 75a Victoria Square M4 5DZ 77211226
275 76a Victoria Square M4 5DZ 77211227
276 77a Victoria Square M4 5DZ None
277 78a Victoria Square M4 5DZ 77211229
278 79a Victoria Square M4 5DZ 77211230
279 80a Victoria Square M4 5DZ 77211231
280 81a Victoria Square M4 5DZ 77211232
281 82 Victoria Square M4 5DZ None
282 83a Victoria Square M4 5DZ 77211234
283 84a Victoria Square M4 5DZ None
284 85a Victoria Square M4 5DZ 77211236
285 86a Victoria Square M4 5DZ 77211237
286 87a Victoria Square M4 5DZ 77211238
287 88a Victoria Square M4 5DZ None
288 89a Victoria Square M4 5DZ 77211240
289 90a Victoria Square M4 5DZ 77211241
290 91a Victoria Square M4 5DZ 77211242
291 92a Victoria Square M4 5DZ 77211243
292 93a Victoria Square M4 5EA 77211244
293 94a Victoria Square M4 5EA None
294 95a Victoria Square M4 5EA 77211246
295 96a Victoria Square M4 5EA 77211247
296 97a Victoria Square M4 5EA 77211248
297 98a Victoria Square M4 5EA 77211249
298 99a Victoria Square M4 5EA 77211250
299 100a Victoria Square M4 5EA 77211251
300 101a Victoria Square M4 5EA None
301 102a Victoria Square M4 5EA None
302 103a Victoria Square M4 5EA 77211254
303 104a Victoria Square M4 5EA 77211255
304 105a Victoria Square M4 5EA None
305 106a Victoria Square M4 5EA 77211257
306 107a Victoria Square M4 5EA 77211258
307 108a Victoria Square M4 5EA 77211259
308 109a Victoria Square M4 5EA 77211260
309 110a Victoria Square M4 5EA 77211261
310 111a Victoria Square M4 5EA 77211262
311 112a Victoria Square M4 5EA None
312 113a Victoria Square M4 5EA 77211264
313 114a Victoria Square M4 5EA 77211265
314 115a Victoria Square M4 5EA 77211266
315 116a Victoria Square M4 5EA 77211267
316 117a Victoria Square M4 5EA None
317 118a Victoria Square M4 5EA None
318 119a Victoria Square M4 5EA 77211270
319 120a Victoria Square M4 5EA 77211271
320 121a Victoria Square M4 5EA 77211272
321 122a Victoria Square M4 5EA 77211273
322 123a Victoria Square M4 5EA 77211274
323 124a Victoria Square M4 5EA None
324 125a Victoria Square M4 5EA 77211276
325 126a Victoria Square M4 5EA 77211277
326 127a Victoria Square M4 5EA 77211278
327 128a Victoria Square M4 5EA 77211279
328 129a Victoria Square M4 5EA 77211280
329 130a Victoria Square M4 5FA 77211281
330 131a Victoria Square M4 5FA 77211282
331 132a Victoria Square M4 5FA 77211283
332 133a Victoria Square M4 5FA None
333 134a Victoria Square M4 5FA 77211285
334 135a Victoria Square M4 5FA 77211286
335 136a Victoria Square M4 5FA 77211287
336 137a Victoria Square M4 5FA 77211288
337 138a Victoria Square M4 5FA 77211289
338 139a Victoria Square M4 5FA 77211290
339 140a Victoria Square M4 5FA 77211291
340 141a Victoria Square M4 5FA 77211292
341 142a Victoria Square M4 5FA 77211293
342 143a Victoria Square M4 5FA 77211294
343 144a Victoria Square M4 5FA 77211295
344 145a Victoria Square M4 5FA None
345 146a Victoria Square M4 5FA 77211297
346 147a Victoria Square M4 5FA 77211298
347 148a Victoria Square M4 5FA 77211299
348 149a Victoria Square M4 5FA 77211300
349 150a Victoria Square M4 5FA 77211301
350 151a Victoria Square M4 5FA None
351 152a Victoria Square M4 5FA 77211303
352 153a Victoria Square M4 5FA None
353 154a Victoria Square M4 5FA 77211305
354 155a Victoria Square M4 5FA None
355 156a Victoria Square M4 5FA 77211307
356 157a Victoria Square M4 5FA 77211308
357 158a Victoria Square M4 5FA 77211309
358 159a Victoria Square M4 5FA None
359 160a Victoria Square M4 5FA 77211311
360 161a Victoria Square M4 5FA None
361 162a Victoria Square M4 5FA None
362 163a Victoria Square M4 5FA 77211314
363 164a Victoria Square M4 5FA 77211315
364 165a Victoria Square M4 5FA 77211316
365 166a Victoria Square M4 5FA None
366 FLAT 3; 42 MORETON ROAD, SOUTH CROYDON, SURREY CR2 7DL None

View file

@ -0,0 +1,51 @@
## Checklist for adding a new Lambda
### 1. Create the Lambda scaffold
- Copy the template:
cp -r lambda/_template lambda/<lambda_name>
---
### 2. Add infrastructure prerequisites (shared stack)
- Add a new ECR repository in:
infrastructure/terraform/shared/main.tf
- Apply the shared stack
- This requires commenting 'if env.stage == "prod"' in .github/workflows/deploy_terraform.yml
- Verify the ECR repository exists in AWS
---
### 3. Add Docker build configuration
- Create a `Dockerfile` for the Lambda
- Verify the Dockerfile path and build context
- Add a new image build job in `deploy_terraform.yml` using `_build_image.yml`
---
### 4. Wire the Lambda deploy job (CI)
- Add a deploy job using `_deploy_lambda.yml`
- Ensure the deploy job depends on the image build job
---
### 5. Deploy
- Push changes to GitHub
- CI will:
1. Build and push the Docker image
2. Deploy the Lambda
3. Verify everything deployed. Good things to check:
- ECR with image
- SQS
- Trigger SQS
- Cloud watch logs
---
### 5. Delete
1. Delete README if you used cp -r
---
## Please feel free to update this document to make it easier for the next person

View file

@ -0,0 +1,14 @@
module "lambda" {
source = "../modules/lambda_with_sqs"
name = REPLACE ME #"address2uprn" for example
stage = var.stage
image_uri = local.image_uri
environment = {
STAGE = var.stage
LOG_LEVEL = "info"
}
}

View file

@ -0,0 +1,16 @@
terraform {
required_providers {
aws = {
source = "hashicorp/aws"
version = "~> 4.16"
}
}
backend "s3" {
bucket = REPLACE_ME
key = "terraform.tfstate"
region = "eu-west-2"
}
required_version = ">= 1.2.0"
}

View file

@ -0,0 +1,27 @@
variable "lambda_name" {
type = string
description = "Logical name of the lambda (e.g. address2uprn)"
}
variable "stage" {
description = "Deployment stage (e.g. dev, prod)"
type = string
}
variable "ecr_repo_url" {
type = string
description = "ECR repository URL (no tag, no digest)"
}
variable "image_digest" {
type = string
description = "Image digest (sha256:...)"
}
locals {
image_uri = "${var.ecr_repo_url}@${var.image_digest}"
}
output "resolved_image_uri" {
value = local.image_uri
}

View file

@ -0,0 +1,14 @@
module "address2uprn" {
source = "../modules/lambda_with_sqs"
name = "address2uprn"
stage = var.stage
image_uri = local.image_uri
environment = {
STAGE = var.stage
LOG_LEVEL = "info"
}
}

View file

@ -0,0 +1,17 @@
terraform {
required_providers {
aws = {
source = "hashicorp/aws"
version = "~> 4.16"
}
}
backend "s3" {
bucket = "address2uprn-terraform-state"
key = "terraform.tfstate"
region = "eu-west-2"
}
required_version = ">= 1.2.0"
}

View file

@ -0,0 +1,27 @@
variable "lambda_name" {
type = string
description = "Logical name of the lambda (e.g. address2uprn)"
}
variable "stage" {
description = "Deployment stage (e.g. dev, prod)"
type = string
}
variable "ecr_repo_url" {
type = string
description = "ECR repository URL (no tag, no digest)"
}
variable "image_digest" {
type = string
description = "Image digest (sha256:...)"
}
locals {
image_uri = "${var.ecr_repo_url}@${var.image_digest}"
}
output "resolved_image_uri" {
value = local.image_uri
}

View file

@ -0,0 +1,44 @@
############################################
# IAM role
############################################
module "role" {
source = "../../../modules/lambda_execution_role"
name = "${var.name}-lambda-${var.stage}"
}
############################################
# SQS queue + DLQ
############################################
module "queue" {
source = "../../../modules/sqs_queue"
name = "${var.name}-queue-${var.stage}"
}
############################################
# Lambda
############################################
module "lambda" {
source = "../../../modules/lambda_service"
name = "${var.name}-${var.stage}"
role_arn = module.role.role_arn
image_uri = var.image_uri
timeout = var.timeout
memory_size = var.memory_size
environment = var.environment
}
############################################
# SQS Lambda trigger
############################################
module "sqs_trigger" {
source = "../../../modules/lambda_sqs_trigger"
lambda_arn = module.lambda.lambda_arn
lambda_role_name = module.role.role_name
queue_arn = module.queue.queue_arn
batch_size = var.batch_size
}

View file

@ -0,0 +1,11 @@
output "lambda_arn" {
value = module.lambda.lambda_arn
}
output "queue_arn" {
value = module.queue.queue_arn
}
output "queue_url" {
value = module.queue.queue_url
}

View file

@ -0,0 +1,36 @@
variable "name" {
type = string
}
variable "stage" {
type = string
}
variable "image_uri" {
type = string
}
variable "region" {
type = string
default = "eu-west-2"
}
variable "timeout" {
type = number
default = 60
}
variable "memory_size" {
type = number
default = 1024
}
variable "environment" {
type = map(string)
default = {}
}
variable "batch_size" {
type = number
default = 10
}

View file

@ -0,0 +1,30 @@
resource "aws_ecr_repository" "this" {
name = "${var.name}-${var.stage}"
image_tag_mutability = "MUTABLE"
image_scanning_configuration {
scan_on_push = true
}
}
resource "aws_ecr_lifecycle_policy" "this" {
repository = aws_ecr_repository.this.name
policy = jsonencode({
rules = [
{
rulePriority = 1
description = "Expire old images"
selection = {
tagStatus = "any"
countType = "imageCountMoreThan"
countNumber = var.retain_count
}
action = {
type = "expire"
}
}
]
})
}

View file

@ -0,0 +1,11 @@
output "repository_name" {
value = aws_ecr_repository.this.name
}
output "repository_url" {
value = aws_ecr_repository.this.repository_url
}
output "repository_arn" {
value = aws_ecr_repository.this.arn
}

View file

@ -0,0 +1,15 @@
variable "name" {
description = "Base name of the repository (without stage)"
type = string
}
variable "stage" {
description = "Deployment stage (e.g. dev, prod)"
type = string
}
variable "retain_count" {
description = "Number of images to retain"
type = number
default = 10
}

View file

@ -1,3 +1,6 @@
# This ecr works for things deployed by serverless.
# TODO: unify ecr and container_registry to one
resource "aws_ecr_repository" "my_repository" {
name = "${var.ecr_name}"
image_tag_mutability = "MUTABLE"

View file

@ -1,4 +1,10 @@
output "ecr_repository_name" {
description = "Name of the EPR repo in AWS"
value = aws_ecr_repository.my_repository.name
}
output "ecr_repository_url" {
description = "Full ECR repository URL"
value = aws_ecr_repository.my_repository.repository_url
}

View file

@ -0,0 +1,37 @@
data "aws_iam_policy_document" "assume" {
statement {
effect = "Allow"
principals {
type = "Service"
identifiers = ["lambda.amazonaws.com"]
}
actions = ["sts:AssumeRole"]
}
}
resource "aws_iam_role" "this" {
name = var.name
assume_role_policy = data.aws_iam_policy_document.assume.json
}
resource "aws_iam_role_policy_attachment" "basic_logs" {
role = aws_iam_role.this.name
policy_arn = "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole"
}
resource "aws_iam_role_policy" "ecr_pull" {
role = aws_iam_role.this.name
policy = jsonencode({
Version = "2012-10-17"
Statement = [{
Effect = "Allow"
Action = [
"ecr:GetAuthorizationToken",
"ecr:BatchGetImage",
"ecr:GetDownloadUrlForLayer"
]
Resource = "*"
}]
})
}

View file

@ -0,0 +1,7 @@
output "role_arn" {
value = aws_iam_role.this.arn
}
output "role_name" {
value = aws_iam_role.this.name
}

View file

@ -0,0 +1,4 @@
variable "name" {
description = "IAM role name for the Lambda execution role"
type = string
}

View file

@ -0,0 +1,15 @@
resource "aws_lambda_function" "this" {
function_name = var.name
role = var.role_arn
package_type = "Image"
image_uri = var.image_uri
timeout = var.timeout
memory_size = var.memory_size
publish = true
environment {
variables = var.environment
}
}

View file

@ -0,0 +1,3 @@
output "lambda_arn" {
value = aws_lambda_function.this.arn
}

View file

@ -0,0 +1,18 @@
variable "name" { type = string }
variable "role_arn" { type = string }
variable "image_uri" { type = string }
variable "timeout" {
type = number
default = 30
}
variable "memory_size" {
type = number
default = 512
}
variable "environment" {
type = map(string)
default = {}
}

View file

@ -0,0 +1,23 @@
resource "aws_lambda_event_source_mapping" "this" {
event_source_arn = var.queue_arn
function_name = var.lambda_arn
batch_size = var.batch_size
enabled = true
}
resource "aws_iam_role_policy" "allow_sqs" {
role = var.lambda_role_name
policy = jsonencode({
Version = "2012-10-17"
Statement = [{
Effect = "Allow"
Action = [
"sqs:ReceiveMessage",
"sqs:DeleteMessage",
"sqs:GetQueueAttributes"
]
Resource = var.queue_arn
}]
})
}

View file

@ -0,0 +1,8 @@
variable "lambda_arn" { type = string }
variable "lambda_role_name" { type = string }
variable "queue_arn" { type = string }
variable "batch_size" {
type = number
default = 10
}

View file

@ -0,0 +1,14 @@
resource "aws_sqs_queue" "dlq" {
name = "${var.name}-dlq"
}
resource "aws_sqs_queue" "this" {
name = var.name
visibility_timeout_seconds = 120
redrive_policy = jsonencode({
deadLetterTargetArn = aws_sqs_queue.dlq.arn
maxReceiveCount = var.max_receive_count
})
}

View file

@ -0,0 +1,7 @@
output "queue_arn" {
value = aws_sqs_queue.this.arn
}
output "queue_url" {
value = aws_sqs_queue.this.url
}

View file

@ -0,0 +1,6 @@
variable "name" { type = string }
variable "max_receive_count" {
type = number
default = 5
}

View file

@ -0,0 +1,30 @@
resource "aws_s3_bucket" "this" {
bucket = var.bucket_name
}
resource "aws_s3_bucket_versioning" "this" {
bucket = aws_s3_bucket.this.id
versioning_configuration {
status = "Enabled"
}
}
resource "aws_s3_bucket_server_side_encryption_configuration" "this" {
bucket = aws_s3_bucket.this.id
rule {
apply_server_side_encryption_by_default {
sse_algorithm = "AES256"
}
}
}
resource "aws_s3_bucket_public_access_block" "this" {
bucket = aws_s3_bucket.this.id
block_public_acls = true
block_public_policy = true
ignore_public_acls = true
restrict_public_buckets = true
}

View file

@ -0,0 +1,7 @@
output "bucket_name" {
value = aws_s3_bucket.this.bucket
}
output "bucket_arn" {
value = aws_s3_bucket.this.arn
}

View file

@ -0,0 +1,3 @@
variable "bucket_name" {
type = string
}

View file

@ -1,5 +1,4 @@
stage = "dev"
profile = "DevAdmin"
region = "eu-west-2"
# Domain

View file

@ -8,7 +8,6 @@ terraform {
backend "s3" {
bucket = "assessment-model-terraform-state"
region = "eu-west-2"
profile = "DevAdmin"
key = "terraform.tfstate"
}
@ -16,7 +15,6 @@ terraform {
}
provider "aws" {
profile = var.profile
region = var.region
}
@ -91,101 +89,101 @@ resource "aws_db_instance" "default" {
# Set up the bucket that recieve the csv uploads of epc to be retrofit
module "s3_presignable_bucket" {
source = "./modules/s3_presignable_bucket"
source = "../modules/s3_presignable_bucket"
bucketname = "retrofit-plan-inputs-${var.stage}"
environment = var.stage
allowed_origins = var.allowed_origins
}
module "s3_due_considerations_bucket" {
source = "./modules/s3_presignable_bucket"
source = "../modules/s3_presignable_bucket"
bucketname = "retrofit-due-considerations-${var.stage}"
environment = var.stage
allowed_origins = var.allowed_origins
}
module "s3_eco_spreadseet_bucket" {
source = "./modules/s3_presignable_bucket"
source = "../modules/s3_presignable_bucket"
bucketname = "retrofit-eco-spreadsheet-${var.stage}"
environment = var.stage
allowed_origins = var.allowed_origins
}
module "s3" {
source = "./modules/s3"
source = "../modules/s3"
bucketname = "retrofit-datalake-${var.stage}"
allowed_origins = var.allowed_origins
}
module "model_directory" {
source = "./modules/s3"
source = "../modules/s3"
bucketname = "retrofit-model-directory-${var.stage}"
allowed_origins = var.allowed_origins
}
module "retrofit_sap_predictions" {
source = "./modules/s3"
source = "../modules/s3"
bucketname = "retrofit-sap-predictions-${var.stage}"
allowed_origins = var.allowed_origins
}
module "retrofit_sap_data" {
source = "./modules/s3"
source = "../modules/s3"
bucketname = "retrofit-data-${var.stage}"
allowed_origins = var.allowed_origins
}
module "retrofit_carbon_predictions" {
source = "./modules/s3"
source = "../modules/s3"
bucketname = "retrofit-carbon-predictions-${var.stage}"
allowed_origins = var.allowed_origins
}
module "retrofit_heat_predictions" {
source = "./modules/s3"
source = "../modules/s3"
bucketname = "retrofit-heat-predictions-${var.stage}"
allowed_origins = var.allowed_origins
}
module "retrofit_lighting_cost_predictions" {
source = "./modules/s3"
source = "../modules/s3"
bucketname = "retrofit-lighting-cost-predictions-${var.stage}"
allowed_origins = var.allowed_origins
}
module "retrofit_heating_cost_predictions" {
source = "./modules/s3"
source = "../modules/s3"
bucketname = "retrofit-heating-cost-predictions-${var.stage}"
allowed_origins = var.allowed_origins
}
module "retrofit_hot_water_cost_predictions" {
source = "./modules/s3"
source = "../modules/s3"
bucketname = "retrofit-hot-water-cost-predictions-${var.stage}"
allowed_origins = var.allowed_origins
}
module "retrofit_heating_kwh_predictions" {
source = "./modules/s3"
source = "../modules/s3"
bucketname = "retrofit-heating-kwh-predictions-${var.stage}"
allowed_origins = var.allowed_origins
}
module "retrofit_hotwater_kwh_predictions" {
source = "./modules/s3"
source = "../modules/s3"
bucketname = "retrofit-hotwater-kwh-predictions-${var.stage}"
allowed_origins = var.allowed_origins
}
module "retrofit_sap_baseline_predictions" {
source = "./modules/s3"
source = "../modules/s3"
bucketname = "retrofit-sap-baseline-predictions-${var.stage}"
allowed_origins = var.allowed_origins
}
// We make this bucket presignable, because we want to generate download links for the frontend
module "retrofit_energy_assessments" {
source = "./modules/s3_presignable_bucket"
source = "../modules/s3_presignable_bucket"
bucketname = "retrofit-energy-assessments-${var.stage}"
allowed_origins = var.allowed_origins
environment = var.stage
@ -193,7 +191,7 @@ module "retrofit_energy_assessments" {
# Set up the route53 record for the API
module "route53" {
source = "./modules/route53"
source = "../modules/route53"
domain_name = var.domain_name
api_url_prefix = var.api_url_prefix
providers = {
@ -201,75 +199,76 @@ module "route53" {
}
}
# Create an ECR repository for storage of the lambda's docker images
module "ecr" {
ecr_name = "fastapi-repository-${var.stage}"
source = "./modules/ecr"
source = "../modules/ecr"
}
module "lambda_sap_prediction_ecr" {
ecr_name = "lambda-sap-prediction-${var.stage}"
source = "./modules/ecr"
source = "../modules/ecr"
}
module "due_considerations_ecr" {
ecr_name = "due-considerations-${var.stage}"
source = "./modules/ecr"
source = "../modules/ecr"
}
module "eco_spreadsheet_ecr" {
ecr_name = "eco-spreadsheet-${var.stage}"
source = "./modules/ecr"
source = "../modules/ecr"
}
module "lambda_carbon_prediction_ecr" {
ecr_name = "lambda-carbon-prediction-${var.stage}"
source = "./modules/ecr"
source = "../modules/ecr"
}
module "lambda_heat_prediction_ecr" {
ecr_name = "lambda-heat-prediction-${var.stage}"
source = "./modules/ecr"
source = "../modules/ecr"
}
# ECR repos for lighting cost, heating cost and hot water cost models
module "lambda_lighting_cost_prediction_ecr" {
ecr_name = "lighting-cost-prediction-${var.stage}"
source = "./modules/ecr"
source = "../modules/ecr"
}
module "lambda_heating_cost_prediction_ecr" {
ecr_name = "heating-cost-prediction-${var.stage}"
source = "./modules/ecr"
source = "../modules/ecr"
}
module "lambda_hot_water_cost_prediction_ecr" {
ecr_name = "hot-water-cost-prediction-${var.stage}"
source = "./modules/ecr"
source = "../modules/ecr"
}
# For heating and hot water kwh models
module "lambda_heating_kwh_prediction_ecr" {
ecr_name = "heating-kwh-prediction-${var.stage}"
source = "./modules/ecr"
source = "../modules/ecr"
}
module "lambda_hotwater_kwh_prediction_ecr" {
ecr_name = "hotwater-kwh-prediction-${var.stage}"
source = "./modules/ecr"
source = "../modules/ecr"
}
# Baselining models
module "sap_baseline_ecr" {
ecr_name = "sap-baseline-prediction-${var.stage}"
source = "./modules/ecr"
source = "../modules/ecr"
}
##############################################
# CDN - Cloudfront
##############################################
module "cloudfront_distribution" {
source = "./modules/cloudfront"
source = "../modules/cloudfront"
bucket_name = module.s3.bucket_name
bucket_id = module.s3.bucket_id
bucket_arn = module.s3.bucket_arn
@ -281,11 +280,35 @@ module "cloudfront_distribution" {
# SES - Email sending
################################################
module "ses" {
source = "./modules/ses"
source = "../modules/ses"
domain_name = "domna.homes"
stage = var.stage
}
output "ses_dns_records" {
value = module.ses.dns_records
}
################################################
# Address2UPRN Lambda ECR
################################################
module "address2uprn_state_bucket" {
source = "../modules/tf_state_bucket"
bucket_name = "address2uprn-terraform-state"
}
output "address2uprn_state_bucket_name" {
value = module.address2uprn_state_bucket.bucket_name
}
module "address2uprn_registry" {
source = "../modules/container_registry"
name = "address2uprn"
stage = var.stage
}
output "address2uprn_repository_url" {
value = module.address2uprn_registry.repository_url
}

View file

@ -3,11 +3,6 @@ variable stage {
type = string
}
variable "profile" {
description = "AWS profile to use"
type = string
}
variable "region" {
description = "AWS region"
type = string

View file

@ -7,20 +7,29 @@ import numpy as np
from backend.app.utils import sap_to_epc
from sqlalchemy.orm import sessionmaker
from backend.app.db.connection import db_engine, db_read_session
from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations, RecommendationMaterials
from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel, PropertyDetailsSpatial
from backend.app.db.models.recommendations import (
Recommendation,
Plan,
PlanRecommendations,
RecommendationMaterials,
)
from backend.app.db.models.portfolio import (
PropertyModel,
PropertyDetailsEpcModel,
PropertyDetailsSpatial,
)
from backend.app.db.functions.materials_functions import get_materials
from collections import defaultdict
from sqlalchemy import func
# PORTFOLIO_ID = 206
# SCENARIOS = [389]
PORTFOLIO_ID = 485 # Peabody
PORTFOLIO_ID = 502 # Peabody
SCENARIOS = [
970,
986,
]
scenario_names = {
970: "EPC C - No solid floor, EQI, IWI",
986: "EPC C",
}
@ -31,22 +40,26 @@ def get_data(portfolio_id, scenario_ids):
# --------------------
# Properties
# --------------------
properties_query = session.query(
PropertyModel,
PropertyDetailsEpcModel
).join(
PropertyDetailsEpcModel,
PropertyModel.id == PropertyDetailsEpcModel.property_id
).filter(
PropertyModel.portfolio_id == portfolio_id
).all()
properties_query = (
session.query(PropertyModel, PropertyDetailsEpcModel)
.join(
PropertyDetailsEpcModel,
PropertyModel.id == PropertyDetailsEpcModel.property_id,
)
.filter(PropertyModel.portfolio_id == portfolio_id)
.all()
)
properties_data = [
{
**{col.name: getattr(p.PropertyModel, col.name)
for col in PropertyModel.__table__.columns},
**{col.name: getattr(p.PropertyDetailsEpcModel, col.name)
for col in PropertyDetailsEpcModel.__table__.columns},
**{
col.name: getattr(p.PropertyModel, col.name)
for col in PropertyModel.__table__.columns
},
**{
col.name: getattr(p.PropertyDetailsEpcModel, col.name)
for col in PropertyDetailsEpcModel.__table__.columns
},
}
for p in properties_query
]
@ -58,13 +71,10 @@ def get_data(portfolio_id, scenario_ids):
session.query(
Plan.scenario_id,
Plan.property_id,
func.max(Plan.created_at).label("latest_created_at")
func.max(Plan.created_at).label("latest_created_at"),
)
.filter(Plan.scenario_id.in_(scenario_ids))
.group_by(
Plan.scenario_id,
Plan.property_id
)
.group_by(Plan.scenario_id, Plan.property_id)
.subquery()
)
@ -76,9 +86,9 @@ def get_data(portfolio_id, scenario_ids):
session.query(Plan)
.join(
latest_plans_subq,
(Plan.scenario_id == latest_plans_subq.c.scenario_id) &
(Plan.property_id == latest_plans_subq.c.property_id) &
(Plan.created_at == latest_plans_subq.c.latest_created_at)
(Plan.scenario_id == latest_plans_subq.c.scenario_id)
& (Plan.property_id == latest_plans_subq.c.property_id)
& (Plan.created_at == latest_plans_subq.c.latest_created_at),
)
.all()
)
@ -103,28 +113,29 @@ def get_data(portfolio_id, scenario_ids):
# --------------------
# Recommendations (NO materials yet)
# --------------------
recommendations_query = session.query(
Recommendation,
Plan.scenario_id,
PlanRecommendations.plan_id
).join(
PlanRecommendations,
Recommendation.id == PlanRecommendations.recommendation_id
).join(
Plan,
Plan.id == PlanRecommendations.plan_id
).filter(
PlanRecommendations.plan_id.in_(plan_ids),
Recommendation.default.is_(True),
Recommendation.already_installed.is_(False)
).all()
recommendations_query = (
session.query(Recommendation, Plan.scenario_id, PlanRecommendations.plan_id)
.join(
PlanRecommendations,
Recommendation.id == PlanRecommendations.recommendation_id,
)
.join(Plan, Plan.id == PlanRecommendations.plan_id)
.filter(
PlanRecommendations.plan_id.in_(plan_ids),
Recommendation.default.is_(True),
Recommendation.already_installed.is_(False),
)
.all()
)
recommendations_data = [
{
**{col.name: getattr(r.Recommendation, col.name)
for col in Recommendation.__table__.columns},
**{
col.name: getattr(r.Recommendation, col.name)
for col in Recommendation.__table__.columns
},
"scenario_id": r.scenario_id,
"materials": [] # placeholder
"materials": [], # placeholder
}
for r in recommendations_query
]
@ -134,23 +145,25 @@ def get_data(portfolio_id, scenario_ids):
# --------------------
# Recommendation materials (SEPARATE QUERY)
# --------------------
materials_query = session.query(
RecommendationMaterials
).filter(
RecommendationMaterials.recommendation_id.in_(recommendation_ids)
).all()
materials_query = (
session.query(RecommendationMaterials)
.filter(RecommendationMaterials.recommendation_id.in_(recommendation_ids))
.all()
)
# Group materials by recommendation_id
materials_by_recommendation = defaultdict(list)
for m in materials_query:
materials_by_recommendation[m.recommendation_id].append({
"material_id": m.material_id,
"depth": m.depth,
"quantity": m.quantity,
"quantity_unit": m.quantity_unit,
"estimated_cost": m.estimated_cost,
})
materials_by_recommendation[m.recommendation_id].append(
{
"material_id": m.material_id,
"depth": m.depth,
"quantity": m.quantity,
"quantity_unit": m.quantity_unit,
"estimated_cost": m.estimated_cost,
}
)
# Attach materials safely (no filtering side effects)
for r in recommendations_data:
@ -161,7 +174,9 @@ def get_data(portfolio_id, scenario_ids):
return properties_data, plans_data, recommendations_data
properties_data, plans_data, recommendations_data = get_data(portfolio_id=PORTFOLIO_ID, scenario_ids=SCENARIOS)
properties_data, plans_data, recommendations_data = get_data(
portfolio_id=PORTFOLIO_ID, scenario_ids=SCENARIOS
)
properties_df = pd.DataFrame(properties_data)
plans_df = pd.DataFrame(plans_data)
@ -172,10 +187,8 @@ with db_read_session() as session:
materials = pd.DataFrame(materials)
material_lookup = (
materials
.set_index("id")[["type", "includes_battery"]]
.to_dict("index")
material_lookup = materials.set_index("id")[["type", "includes_battery"]].to_dict(
"index"
)
@ -189,14 +202,14 @@ def has_solar_with_battery(materials_list):
return False
recommendations_df["has_solar_with_battery"] = (
recommendations_df["materials"].apply(has_solar_with_battery)
recommendations_df["has_solar_with_battery"] = recommendations_df["materials"].apply(
has_solar_with_battery
)
recommendations_df["measure_type"] = np.where(
recommendations_df["has_solar_with_battery"] == True,
recommendations_df["measure_type"] + "_with_battery",
recommendations_df["measure_type"]
recommendations_df["measure_type"],
)
# Adjust material type to indicate if there is a battery included
@ -211,50 +224,67 @@ from utils.s3 import read_csv_from_s3, read_excel_from_s3
for scenario_id in SCENARIOS:
# Get recs for this scenario
recommended_measures_df = recommendations_df[recommendations_df["scenario_id"] == scenario_id][
["property_id", "measure_type", "estimated_cost", "default"]
recommended_measures_df = recommendations_df[
recommendations_df["scenario_id"] == scenario_id
][["property_id", "measure_type", "estimated_cost", "default"]]
recommended_measures_df = recommended_measures_df[
recommended_measures_df["default"]
]
recommended_measures_df = recommended_measures_df[recommended_measures_df["default"]]
recommended_measures_df = recommended_measures_df.drop(columns=["default"])
post_install_sap = recommendations_df[recommendations_df["scenario_id"] == scenario_id][
["property_id", "default", "sap_points"]]
post_install_sap = recommendations_df[
recommendations_df["scenario_id"] == scenario_id
][["property_id", "default", "sap_points"]]
post_install_sap = post_install_sap[post_install_sap["default"]]
# Sum up the sap points by property id
post_install_sap = post_install_sap.groupby(["property_id"])[["sap_points"]].sum().reset_index()
post_install_sap = (
post_install_sap.groupby(["property_id"])[["sap_points"]].sum().reset_index()
)
# Find dupes by property id and measure type
dupes = recommended_measures_df.duplicated(subset=["property_id", "measure_type"], keep=False)
dupes = recommended_measures_df.duplicated(
subset=["property_id", "measure_type"], keep=False
)
dupe_df = recommended_measures_df[dupes]
if dupe_df.shape:
# Drop dupes - happened due to a funny bug
recommended_measures_df = recommended_measures_df.drop_duplicates(
subset=["property_id", "measure_type"], keep='first'
subset=["property_id", "measure_type"], keep="first"
)
recommendations_measures_pivot = recommended_measures_df.pivot(
index='property_id',
columns='measure_type',
values='estimated_cost'
index="property_id", columns="measure_type", values="estimated_cost"
)
recommendations_measures_pivot = recommendations_measures_pivot.reset_index()
# Total cost is the row sum, excluding the property_id column
recommendations_measures_pivot["total_retrofit_cost"] = recommendations_measures_pivot.drop(
columns=["property_id"]
).sum(axis=1)
recommendations_measures_pivot["total_retrofit_cost"] = (
recommendations_measures_pivot.drop(columns=["property_id"]).sum(axis=1)
)
df = properties_df[
[
"landlord_property_id", "property_id", "uprn", "address", "postcode", "property_type", "walls", "roof",
"heating", "windows", "current_epc_rating", "current_sap_points", "total_floor_area", "number_of_rooms",
"id"
df = (
properties_df[
[
"landlord_property_id",
"property_id",
"uprn",
"address",
"postcode",
"property_type",
"walls",
"roof",
"heating",
"windows",
"current_epc_rating",
"current_sap_points",
"total_floor_area",
"number_of_rooms",
"id",
]
]
].merge(
recommendations_measures_pivot, how="left", on="property_id"
).merge(
post_install_sap, how="left", on="property_id"
.merge(recommendations_measures_pivot, how="left", on="property_id")
.merge(post_install_sap, how="left", on="property_id")
)
# df = df.drop(columns=["property_id"])
@ -262,21 +292,25 @@ for scenario_id in SCENARIOS:
df["predicted_post_works_sap"] = df["current_sap_points"] + df["sap_points"]
df["predicted_post_works_sap"] = df["predicted_post_works_sap"]
df["predicted_post_works_epc"] = df["predicted_post_works_sap"].apply(lambda x: sap_to_epc(x))
df["predicted_post_works_epc"] = df["predicted_post_works_sap"].apply(
lambda x: sap_to_epc(x)
)
df["uprn"] = df["uprn"].astype(str)
relevant_plans = plans_df[plans_df["scenario_id"] == scenario_id]
df2 = df.merge(
relevant_plans[["property_id", "post_sap_points", "post_epc_rating"]], how="left", on="property_id",
suffixes=("", "_plan")
relevant_plans[["property_id", "post_sap_points", "post_epc_rating"]],
how="left",
on="property_id",
suffixes=("", "_plan"),
)
print(df2["predicted_post_works_epc"].value_counts())
print(df2["post_epc_rating"].value_counts())
z = df2[
(df2["predicted_post_works_epc"] != "D") &
(df2["post_epc_rating"].astype(str) == "Epc.D")
]
(df2["predicted_post_works_epc"] != "D")
& (df2["post_epc_rating"].astype(str) == "Epc.D")
]
df2["predicted_post_works_epc"].value_counts()
df2["post_epc_rating"].astype(str).value_counts()
@ -291,189 +325,6 @@ for scenario_id in SCENARIOS:
df[df["predicted_post_works_sap"] == ""]
# Create excel to store to
<<<<<<< HEAD
filename = (f"{scenario_names[scenario_id]} - 20250113 final.xlsx")
filename = f"{scenario_names[scenario_id]} - 20250113 final.xlsx"
with pd.ExcelWriter(filename) as writer:
df.to_excel(writer, sheet_name="properties", index=False)
=======
filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
f"Project/Final SAL/scenarios/{scenario_names[scenario_id]} - 20250114 final.xlsx")
with pd.ExcelWriter(filename) as writer:
df.to_excel(writer, sheet_name="properties", index=False)
# asset_list = pd.DataFrame(asset_list)
# asset_list = asset_list.rename(
# columns={
# "postcode": "domna_postcode"
# }
# )
# if "domna_full_address":
# # For Peabody
# asset_list["domna_full_address"] = asset_list["domna_address_1"]
#
# asset_list = asset_list[["domna_full_address", "domna_postcode", "epc_os_uprn", ]].copy()
# asset_list = asset_list.rename(columns={"epc_os_uprn": "uprn"})
# asset_list["uprn"] = asset_list["uprn"].astype("Int64").astype(str)
# asset_list = asset_list.merge(
# df.drop(columns=["address", "postcode", "property_type", "total_floor_area"]),
# how="left",
# on="uprn"
# )
# Get conservation area data from property details spatial. based on the UPRNs
def get_conservation_area_data(uprns):
session = sessionmaker(bind=db_engine)()
session.begin()
# Query to get conservation area data
spatial_query = session.query(
PropertyDetailsSpatial
).filter(
PropertyDetailsSpatial.uprn.in_(uprns) # Filter by UPRNs
).all()
# Transform spatial data to include all fields dynamically
spatial_data = [
{col.name: getattr(spatial, col.name) for col in PropertyDetailsSpatial.__table__.columns}
for spatial in spatial_query
]
session.close()
return pd.DataFrame(spatial_data)
uprns = asset_list[
~pd.isna(asset_list["uprn"]) & (asset_list["uprn"] != "<NA>")
]["uprn"].astype(int).unique().tolist()
conservation_area_data = get_conservation_area_data(uprns)
conservation_area_data["uprn"] = conservation_area_data["uprn"].astype(str)
asset_list = asset_list.merge(
conservation_area_data[["uprn", "conservation_status", "is_listed_building", "is_heritage_building"]],
how="left",
on="uprn"
)
# For exporting
df.to_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lincs Rural/EPC C -without floors proposed measures - "
"with ID.xlsx",
index=False
)
# asset_list.to_excel(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lincs Rural/epc_measures.xlsx",
# index=False
# )
condition_costs = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/sfr/Spring JV/Condition costs.xlsx",
sheet_name="Prices - Khalim",
header=35
)
# Remove unnamed columns and reset index
condition_costs = condition_costs.loc[:, ~condition_costs.columns.str.contains('^Unnamed')]
condition_costs = condition_costs.reset_index(drop=True)
# We now estimate condition cost
def simulate_condition(asset_list, condition_costs):
"""
This function is for testing, and will simulate condition cost from 1-10 for each property to see what the
costing array looks like.
:param df:
:return:
"""
condition_df = []
for _, row in asset_list.iterrows():
n_bathrooms = row["bathrooms"]
conditions = {}
for condition in reversed(range(1, 11)):
condition_cost = condition_costs[
condition_costs["Condition"] == condition
].drop(columns=["Condition"]).iloc[0]
# Each cost is scaled by floor area
condition_cost = condition_cost * row["total_floor_area"]
condition_cost["Bathroom"] = condition_cost["Bathroom"] * n_bathrooms
total_condition_cost = condition_cost.sum()
conditions["Condition " + str(condition)] = (total_condition_cost)
condition_df.append(
{
"uprn": row["uprn"],
**conditions
}
)
condition_df = pd.DataFrame(condition_df)
asset_list = asset_list.merge(
condition_df,
how="left",
on="uprn"
)
return asset_list
# asset_list = simulate_condition(asset_list, condition_costs)
# We calculate the condition cost based on the condition
for _, row in asset_list.iterrows():
condition = row["condition_score"]
if condition in [None, ""]:
continue
condition = int(float(condition))
condition_cost = condition_costs[
condition_costs["Condition"] == condition
].drop(columns=["Condition"]).iloc[0]
# Each cost is scaled by floor area
condition_cost = condition_cost * float(row["total_floor_area"])
n_bathrooms = row["n_bathrooms"]
condition_cost["Bathroom"] = condition_cost["Bathroom"] * float(n_bathrooms)
total_condition_cost = condition_cost.sum()
asset_list.loc[asset_list["uprn"] == row["uprn"], "domna_condition_cost"] = total_condition_cost
# Store output
asset_list.to_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/sfr/Spring JV/20250624_portfolio_retrofit_packages.xlsx",
index=False
)
condition_cost_comparison = asset_list[
["condition_score", "decoration_sum_min ", "decoration_sum_max", "domna_condition_cost"]
]
# Testing
plans_df.head()
example = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final "
"SAL/scenarios/EPC C - no solid floor, no EWI or IWI, ashp 3.0 - 20250114 final.xlsx"
)
plans_df2 = plans_df.merge(
properties_df[["property_id", "landlord_property_id"]],
left_on="property_id",
right_on="property_id",
how="left"
)
plans_df2 = plans_df2[plans_df2["scenario_id"] == 909]
dupes = plans_df2[plans_df2["property_id"].duplicated()]
# merge on plans
example = example.merge(
plans_df, how="left",
)
>>>>>>> 3874da6177cbcc37f7a488bec0a06e387906653c