added terraform files and test plan

This commit is contained in:
Jun-te Kim 2026-02-02 16:38:17 +00:00
parent 2b01ac9f6c
commit 6c05b0d6a4
19 changed files with 417 additions and 319 deletions

View file

@ -0,0 +1,86 @@
name: "Build and Push Lambda Image to ECR"
description: "Reusable action for building and pushing lambda Docker image to ECR"
inputs:
ecr_name:
description: "Lambda name / ECR repo name"
required: true
dockerfile_path:
description: "Path to Dockerfile"
required: true
ecr_tf_dir:
description: "Path to ECR terraform directory"
required: true
lambda_tf_dir:
description: "Path to Lambda terraform directory"
required: true
aws-access-key-id:
description: "AWS access key"
required: true
aws-secret-access-key:
description: "AWS secret key"
required: true
aws-region:
description: "AWS region"
required: true
git-sha:
description: "Git commit SHA"
required: true
git-ref:
description: "Git ref name"
required: true
runs:
using: "composite"
steps:
- uses: actions/checkout@v4
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ inputs.aws-access-key-id }}
aws-secret-access-key: ${{ inputs.aws-secret-access-key }}
aws-region: ${{ inputs.aws-region }}
- name: Log in to Amazon ECR
id: login-ecr
uses: aws-actions/amazon-ecr-login@v2
- name: Deploy ECR
uses: ./.github/workflows/actions/terraform-deploy
with:
working_directory: ${{ inputs.ecr_tf_dir }}
aws-access-key-id: ${{ inputs.aws-access-key-id }}
aws-secret-access-key: ${{ inputs.aws-secret-access-key }}
aws-region: ${{ inputs.aws-region }}
- name: Set Docker image tag
id: set_tag
shell: bash
run: |
SHORT_SHA=$(echo "${{ inputs.git-sha }}" | cut -c1-7)
BRANCH=$(echo "${{ inputs.git-ref }}" | tr '/' '-')
TAG="${BRANCH}-${SHORT_SHA}"
echo "IMAGE_TAG=${TAG}" >> $GITHUB_ENV
echo "tag=$TAG" >> $GITHUB_OUTPUT
- name: Build and push Docker image
shell: bash
run: |
IMAGE_URI=${{ steps.login-ecr.outputs.registry }}/${{ inputs.ecr_name }}:${{ steps.set_tag.outputs.tag }}
echo "Building Docker image for ${{ inputs.ecr_name }}..."
docker build -t $IMAGE_URI -f ${{ inputs.dockerfile_path }} .
echo "Pushing to ECR..."
docker push $IMAGE_URI
- name: Deploy Lambda
uses: ./.github/workflows/actions/terraform-deploy
with:
working_directory: ${{ inputs.lambda_tf_dir }}
aws-access-key-id: ${{ inputs.aws-access-key-id }}
aws-secret-access-key: ${{ inputs.aws-secret-access-key }}
aws-region: ${{ inputs.aws-region }}
lambda-image-tag: ${{ steps.set_tag.outputs.tag }}

View file

@ -0,0 +1,55 @@
name: "Terraform Plan Shared Config"
description: "Plans shared Terraform config for Lambdas"
inputs:
working_directory:
description: "Directory containing Terraform config"
required: true
aws-access-key-id:
description: "AWS access key"
required: true
aws-secret-access-key:
description: "AWS secret key"
required: true
aws-region:
description: "AWS region"
required: true
lambda-image-tag:
description: "Tag of the Lambda image (e.g., GitHub SHA)"
required: false
runs:
using: "composite"
steps:
- uses: actions/checkout@v4
- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
aws-access-key-id: ${{ inputs.aws-access-key-id }}
aws-secret-access-key: ${{ inputs.aws-secret-access-key }}
aws-region: ${{ inputs.aws-region }}
- name: Setup Terraform
uses: hashicorp/setup-terraform@v3
- name: Terraform Init
working-directory: ${{ inputs.working_directory }}
shell: bash
run: terraform init -reconfigure
- name: Terraform Plan
working-directory: ${{ inputs.working_directory }}
shell: bash
run: |
if [ -n "${{ inputs.lambda-image-tag }}" ]; then
terraform plan -out=tfplan -var="lambda_image_tag=${{ inputs.lambda-image-tag }}"
else
terraform plan -out=tfplan
fi
- name: Terraform Apply
working-directory: ${{ inputs.working_directory }}
shell: bash
run: terraform apply -auto-approve tfplan

View file

@ -43,40 +43,16 @@ jobs:
env:
AWS_PROFILE: "DevAdmin"
# Deploy shared terrform things
- name: Terraform Init
run: cd infrastructure/terraform && terraform init
run: cd infrastructure/terraform/shared && terraform init
- name: Terraform Workspace
run: |
# BRANCH_NAME=$(echo "${{ github.ref }}" | sed -e "s/^refs\/heads\///")
cd infrastructure/terraform
# terraform workspace select ${BRANCH_NAME} || terraform workspace new ${BRANCH_NAME}
# Until Khalim makes a different environment for us
terraform workspace select dev
cd infrastructure/terraform/shared
terraform workspace select dev || terraform workspace new dev
- name: Terraform Plan
- name: Terraform Plan (shared)
run: |
BRANCH_NAME=$(echo "${{ github.ref }}" | sed -e "s/^refs\/heads\///")
cd infrastructure/terraform && terraform plan -var-file=dev.tfvars
- name: Deploy to Dev
if: github.ref == 'refs/heads/dev'
run: echo "hello world"
# run: cd infrastructure/terraform && terraform apply -var-file=dev.tfvars -auto-approve
env:
name: dev
# - name: Configure AWS credentials (ProdAdmin)
# uses: aws-actions/configure-aws-credentials@v1
# with:
# aws-access-key-id: ${{ secrets.PROD_AWS_ACCESS_KEY_ID }}
# aws-secret-access-key: ${{ secrets.PROD_AWS_SECRET_ACCESS_KEY }}
# aws-region: eu-west-2
# env:
# AWS_PROFILE: "ProdAdmin"
# - name: Deploy to Prod
# if: github.ref == 'refs/heads/prod'
# run: cd infrastructure/terraform && terraform apply -var-file=prod.tfvars -auto-approve
# env:
# name: prod
cd infrastructure/terraform/shared
terraform plan -var-file=dev.tfvars

View file

@ -72,21 +72,21 @@ def app():
data_folder = "/workspaces/model/asset_list"
data_filename = "assets.xlsx"
sheet_name = "Sheet1"
postcode_column = "Post Code"
postcode_column = "POSTCODE"
address1_column = None
address1_method = "house_number_extraction"
fulladdress_column = "User Input"
fulladdress_column = "ADDRESS"
address_cols_to_concat = None
missing_postcodes_method = None
landlord_year_built = None
landlord_os_uprn = "UPRN"
landlord_property_type = None
landlord_built_form = None
landlord_built_form = "BUILD FORM"
landlord_wall_construction = None
landlord_roof_construction = None
landlord_heating_system = None
landlord_existing_pv = None
landlord_property_id = "LLUPRN"
landlord_property_id = "UPRN"
landlord_sap = None
outcomes_filename = None
outcomes_sheetname = None

View file

@ -0,0 +1,7 @@
FROM public.ecr.aws/lambda/python:3.10
# Copy function code
COPY app.py ${LAMBDA_TASK_ROOT}
# Set the handler
CMD ["main.handler"]

View file

@ -14,6 +14,9 @@ EPC_AUTH_TOKEN = os.getenv(
"EPC_AUTH_TOKEN",
)
if EPC_AUTH_TOKEN is None:
raise RuntimeError("EPC_AUTH_TOKEN not defined in env")
import re
from difflib import SequenceMatcher
from typing import Set
@ -38,6 +41,34 @@ def levenshtein(a: str, b: str) -> float:
def tokenise(s: str) -> Set[str]:
return set(s.split())
def extract_building_number(s: str) -> str | None:
"""
Extract the main building number (NOT flat/unit).
Assumes formats like:
- '42 moreton road'
- 'flat 3 42 moreton road'
"""
tokens = s.split()
# remove flat/unit context
cleaned = []
skip_next = False
for t in tokens:
if t in ("flat", "apt", "apartment", "unit"):
skip_next = True
continue
if skip_next:
skip_next = False
continue
cleaned.append(t)
# first remaining number is building number
for t in cleaned:
if re.fullmatch(r"\d+[a-z]?", t):
return t
return None
a_norm = normalise_address(a)
b_norm = normalise_address(b)
@ -52,6 +83,13 @@ def levenshtein(a: str, b: str) -> float:
if nums_a and nums_b and nums_a.isdisjoint(nums_b):
return 0.0
# 🔒 HARD GUARD: building number must match
bld_a = extract_building_number(a_norm)
bld_b = extract_building_number(b_norm)
if bld_a and bld_b and bld_a != bld_b:
return 0.0
# --- order-sensitive flat/building guard ---
seq_a = extract_number_sequence(a_norm)
seq_b = extract_number_sequence(b_norm)
@ -418,6 +456,10 @@ def run_all_test():
get_uprn("46 Oswald Street", "E5 0BT"), False
) # this one return "flat 1, in 1 semley gate"
get_uprn_candidates(get_epc_data_with_postcode("e5 0bt"), "48 Oswald Street")
get_uprn_candidates(
get_epc_data_with_postcode("Cr2 7dl"),
"FLAT 3; 42 MORETON ROAD, SOUTH CROYDON, SURREY",
)
if __name__ == "__main__":
@ -511,6 +553,11 @@ if __name__ == "__main__":
)
def handler(event, context):
print("hello world")
return {"statusCode": 200, "body": "hello world"}
# TO do function dispatcher,
# get_uprn_candidates(get_epc_data_with_postcode("E9 5NH"),"Flat 1, 5 Semley Gate" and Flat 5, 1 Semley Gate)

View file

@ -115,11 +115,16 @@ FLAT 43 Goodstone Court,HA1 4FL,10070269095
8 Genteel House Samara Drive,UB1 1FJ,12189842
9 Genteel House Samara Drive,UB1 1FJ,12189843
10 Genteel House Samara Drive,UB1 1FJ,12189844
1 ASH TREE HOUSE,SE5 0TE,10009803979
3 ASH TREE HOUSE,SE5 0TE,10009803981
5 ASH TREE HOUSE,SE5 0TE,10009803983
8 ASH TREE HOUSE,SE5 0TE,10009803986
12 ASH TREE HOUSE,SE5 0TE,10009803990
1 ASH TREE HOUSE,SE5 0TE,None
"Flat 1 Ash Tree House, 2, Thompson Avenue",SE5 0TE,10009803979
3 ASH TREE HOUSE,SE5 0TE,None
Flat 3 ASH TREE HOUSE,SE5 0TE,10009803981
5 ASH TREE HOUSE,SE5 0TE,None
Flat 5 ASH TREE HOUSE,SE5 0TE,10009803983
Flat 8 ASH TREE HOUSE,SE5 0TE,10009803986
8 ASH TREE HOUSE,SE5 0TE,None
Flat 12 ASH TREE HOUSE,SE5 0TE,10009803990
12 ASH TREE HOUSE,SE5 0TE,None
FLAT 1 599 HARROW ROAD,W10 4RA,217113930
FLAT 2 599 HARROW ROAD,W10 4RA,217113931
FLAT 3 599 HARROW ROAD,W10 4RA,None
@ -332,7 +337,7 @@ FLAT B 158 LEAHURST ROAD,SE13 5NL,100021976974
138a Victoria Square,M4 5FA,77211289
139a Victoria Square,M4 5FA,77211290
140a Victoria Square,M4 5FA,77211291
141a Victoria Square,M4 5FA,None
141a Victoria Square,M4 5FA,77211292
142a Victoria Square,M4 5FA,77211293
143a Victoria Square,M4 5FA,77211294
144a Victoria Square,M4 5FA,77211295
@ -357,4 +362,5 @@ FLAT B 158 LEAHURST ROAD,SE13 5NL,100021976974
163a Victoria Square,M4 5FA,77211314
164a Victoria Square,M4 5FA,77211315
165a Victoria Square,M4 5FA,77211316
166a Victoria Square,M4 5FA,None
166a Victoria Square,M4 5FA,None
"FLAT 3; 42 MORETON ROAD, SOUTH CROYDON, SURREY",CR2 7DL,None
1 User Input Postcode Manual UPRN Code
115 8 Genteel House Samara Drive UB1 1FJ 12189842
116 9 Genteel House Samara Drive UB1 1FJ 12189843
117 10 Genteel House Samara Drive UB1 1FJ 12189844
118 1 ASH TREE HOUSE SE5 0TE 10009803979 None
119 3 ASH TREE HOUSE Flat 1 Ash Tree House, 2, Thompson Avenue SE5 0TE 10009803981 10009803979
120 5 ASH TREE HOUSE 3 ASH TREE HOUSE SE5 0TE 10009803983 None
121 8 ASH TREE HOUSE Flat 3 ASH TREE HOUSE SE5 0TE 10009803986 10009803981
122 12 ASH TREE HOUSE 5 ASH TREE HOUSE SE5 0TE 10009803990 None
123 Flat 5 ASH TREE HOUSE SE5 0TE 10009803983
124 Flat 8 ASH TREE HOUSE SE5 0TE 10009803986
125 8 ASH TREE HOUSE SE5 0TE None
126 Flat 12 ASH TREE HOUSE SE5 0TE 10009803990
127 12 ASH TREE HOUSE SE5 0TE None
128 FLAT 1 599 HARROW ROAD W10 4RA 217113930
129 FLAT 2 599 HARROW ROAD W10 4RA 217113931
130 FLAT 3 599 HARROW ROAD W10 4RA None
337 138a Victoria Square M4 5FA 77211289
338 139a Victoria Square M4 5FA 77211290
339 140a Victoria Square M4 5FA 77211291
340 141a Victoria Square M4 5FA None 77211292
341 142a Victoria Square M4 5FA 77211293
342 143a Victoria Square M4 5FA 77211294
343 144a Victoria Square M4 5FA 77211295
362 163a Victoria Square M4 5FA 77211314
363 164a Victoria Square M4 5FA 77211315
364 165a Victoria Square M4 5FA 77211316
365 166a Victoria Square M4 5FA None
366 FLAT 3; 42 MORETON ROAD, SOUTH CROYDON, SURREY CR2 7DL None

View file

View file

@ -0,0 +1,23 @@
resource "aws_sqs_queue" "this" {
name = "${var.name}-queue"
tags = var.tags
}
resource "aws_lambda_function" "this" {
function_name = var.name
role = var.lambda_role_arn
package_type = "Image"
image_uri = var.image_uri
timeout = var.timeout
tags = var.tags
}
resource "aws_lambda_event_source_mapping" "this" {
event_source_arn = aws_sqs_queue.this.arn
function_name = aws_lambda_function.this.arn
batch_size = var.sqs_batch_size
}

View file

@ -0,0 +1,15 @@
output "lambda_name" {
value = aws_lambda_function.this.function_name
}
output "lambda_arn" {
value = aws_lambda_function.this.arn
}
output "sqs_queue_url" {
value = aws_sqs_queue.this.url
}
output "sqs_queue_arn" {
value = aws_sqs_queue.this.arn
}

View file

@ -0,0 +1,32 @@
variable "name" {
description = "Base name for lambda and related resources"
type = string
}
variable "image_uri" {
description = "ECR image URI with tag"
type = string
}
variable "lambda_role_arn" {
description = "IAM role ARN for Lambda execution"
type = string
}
variable "timeout" {
description = "Lambda timeout in seconds"
type = number
default = 10
}
variable "sqs_batch_size" {
description = "Number of SQS messages per batch"
type = number
default = 1
}
variable "tags" {
description = "Tags to apply to resources"
type = map(string)
default = {}
}

View file

@ -7,20 +7,29 @@ import numpy as np
from backend.app.utils import sap_to_epc
from sqlalchemy.orm import sessionmaker
from backend.app.db.connection import db_engine, db_read_session
from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations, RecommendationMaterials
from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel, PropertyDetailsSpatial
from backend.app.db.models.recommendations import (
Recommendation,
Plan,
PlanRecommendations,
RecommendationMaterials,
)
from backend.app.db.models.portfolio import (
PropertyModel,
PropertyDetailsEpcModel,
PropertyDetailsSpatial,
)
from backend.app.db.functions.materials_functions import get_materials
from collections import defaultdict
from sqlalchemy import func
# PORTFOLIO_ID = 206
# SCENARIOS = [389]
PORTFOLIO_ID = 485 # Peabody
PORTFOLIO_ID = 502 # Peabody
SCENARIOS = [
970,
986,
]
scenario_names = {
970: "EPC C - No solid floor, EQI, IWI",
986: "EPC C",
}
@ -31,22 +40,26 @@ def get_data(portfolio_id, scenario_ids):
# --------------------
# Properties
# --------------------
properties_query = session.query(
PropertyModel,
PropertyDetailsEpcModel
).join(
PropertyDetailsEpcModel,
PropertyModel.id == PropertyDetailsEpcModel.property_id
).filter(
PropertyModel.portfolio_id == portfolio_id
).all()
properties_query = (
session.query(PropertyModel, PropertyDetailsEpcModel)
.join(
PropertyDetailsEpcModel,
PropertyModel.id == PropertyDetailsEpcModel.property_id,
)
.filter(PropertyModel.portfolio_id == portfolio_id)
.all()
)
properties_data = [
{
**{col.name: getattr(p.PropertyModel, col.name)
for col in PropertyModel.__table__.columns},
**{col.name: getattr(p.PropertyDetailsEpcModel, col.name)
for col in PropertyDetailsEpcModel.__table__.columns},
**{
col.name: getattr(p.PropertyModel, col.name)
for col in PropertyModel.__table__.columns
},
**{
col.name: getattr(p.PropertyDetailsEpcModel, col.name)
for col in PropertyDetailsEpcModel.__table__.columns
},
}
for p in properties_query
]
@ -58,13 +71,10 @@ def get_data(portfolio_id, scenario_ids):
session.query(
Plan.scenario_id,
Plan.property_id,
func.max(Plan.created_at).label("latest_created_at")
func.max(Plan.created_at).label("latest_created_at"),
)
.filter(Plan.scenario_id.in_(scenario_ids))
.group_by(
Plan.scenario_id,
Plan.property_id
)
.group_by(Plan.scenario_id, Plan.property_id)
.subquery()
)
@ -76,9 +86,9 @@ def get_data(portfolio_id, scenario_ids):
session.query(Plan)
.join(
latest_plans_subq,
(Plan.scenario_id == latest_plans_subq.c.scenario_id) &
(Plan.property_id == latest_plans_subq.c.property_id) &
(Plan.created_at == latest_plans_subq.c.latest_created_at)
(Plan.scenario_id == latest_plans_subq.c.scenario_id)
& (Plan.property_id == latest_plans_subq.c.property_id)
& (Plan.created_at == latest_plans_subq.c.latest_created_at),
)
.all()
)
@ -103,28 +113,29 @@ def get_data(portfolio_id, scenario_ids):
# --------------------
# Recommendations (NO materials yet)
# --------------------
recommendations_query = session.query(
Recommendation,
Plan.scenario_id,
PlanRecommendations.plan_id
).join(
PlanRecommendations,
Recommendation.id == PlanRecommendations.recommendation_id
).join(
Plan,
Plan.id == PlanRecommendations.plan_id
).filter(
PlanRecommendations.plan_id.in_(plan_ids),
Recommendation.default.is_(True),
Recommendation.already_installed.is_(False)
).all()
recommendations_query = (
session.query(Recommendation, Plan.scenario_id, PlanRecommendations.plan_id)
.join(
PlanRecommendations,
Recommendation.id == PlanRecommendations.recommendation_id,
)
.join(Plan, Plan.id == PlanRecommendations.plan_id)
.filter(
PlanRecommendations.plan_id.in_(plan_ids),
Recommendation.default.is_(True),
Recommendation.already_installed.is_(False),
)
.all()
)
recommendations_data = [
{
**{col.name: getattr(r.Recommendation, col.name)
for col in Recommendation.__table__.columns},
**{
col.name: getattr(r.Recommendation, col.name)
for col in Recommendation.__table__.columns
},
"scenario_id": r.scenario_id,
"materials": [] # placeholder
"materials": [], # placeholder
}
for r in recommendations_query
]
@ -134,23 +145,25 @@ def get_data(portfolio_id, scenario_ids):
# --------------------
# Recommendation materials (SEPARATE QUERY)
# --------------------
materials_query = session.query(
RecommendationMaterials
).filter(
RecommendationMaterials.recommendation_id.in_(recommendation_ids)
).all()
materials_query = (
session.query(RecommendationMaterials)
.filter(RecommendationMaterials.recommendation_id.in_(recommendation_ids))
.all()
)
# Group materials by recommendation_id
materials_by_recommendation = defaultdict(list)
for m in materials_query:
materials_by_recommendation[m.recommendation_id].append({
"material_id": m.material_id,
"depth": m.depth,
"quantity": m.quantity,
"quantity_unit": m.quantity_unit,
"estimated_cost": m.estimated_cost,
})
materials_by_recommendation[m.recommendation_id].append(
{
"material_id": m.material_id,
"depth": m.depth,
"quantity": m.quantity,
"quantity_unit": m.quantity_unit,
"estimated_cost": m.estimated_cost,
}
)
# Attach materials safely (no filtering side effects)
for r in recommendations_data:
@ -161,7 +174,9 @@ def get_data(portfolio_id, scenario_ids):
return properties_data, plans_data, recommendations_data
properties_data, plans_data, recommendations_data = get_data(portfolio_id=PORTFOLIO_ID, scenario_ids=SCENARIOS)
properties_data, plans_data, recommendations_data = get_data(
portfolio_id=PORTFOLIO_ID, scenario_ids=SCENARIOS
)
properties_df = pd.DataFrame(properties_data)
plans_df = pd.DataFrame(plans_data)
@ -172,10 +187,8 @@ with db_read_session() as session:
materials = pd.DataFrame(materials)
material_lookup = (
materials
.set_index("id")[["type", "includes_battery"]]
.to_dict("index")
material_lookup = materials.set_index("id")[["type", "includes_battery"]].to_dict(
"index"
)
@ -189,14 +202,14 @@ def has_solar_with_battery(materials_list):
return False
recommendations_df["has_solar_with_battery"] = (
recommendations_df["materials"].apply(has_solar_with_battery)
recommendations_df["has_solar_with_battery"] = recommendations_df["materials"].apply(
has_solar_with_battery
)
recommendations_df["measure_type"] = np.where(
recommendations_df["has_solar_with_battery"] == True,
recommendations_df["measure_type"] + "_with_battery",
recommendations_df["measure_type"]
recommendations_df["measure_type"],
)
# Adjust material type to indicate if there is a battery included
@ -211,50 +224,67 @@ from utils.s3 import read_csv_from_s3, read_excel_from_s3
for scenario_id in SCENARIOS:
# Get recs for this scenario
recommended_measures_df = recommendations_df[recommendations_df["scenario_id"] == scenario_id][
["property_id", "measure_type", "estimated_cost", "default"]
recommended_measures_df = recommendations_df[
recommendations_df["scenario_id"] == scenario_id
][["property_id", "measure_type", "estimated_cost", "default"]]
recommended_measures_df = recommended_measures_df[
recommended_measures_df["default"]
]
recommended_measures_df = recommended_measures_df[recommended_measures_df["default"]]
recommended_measures_df = recommended_measures_df.drop(columns=["default"])
post_install_sap = recommendations_df[recommendations_df["scenario_id"] == scenario_id][
["property_id", "default", "sap_points"]]
post_install_sap = recommendations_df[
recommendations_df["scenario_id"] == scenario_id
][["property_id", "default", "sap_points"]]
post_install_sap = post_install_sap[post_install_sap["default"]]
# Sum up the sap points by property id
post_install_sap = post_install_sap.groupby(["property_id"])[["sap_points"]].sum().reset_index()
post_install_sap = (
post_install_sap.groupby(["property_id"])[["sap_points"]].sum().reset_index()
)
# Find dupes by property id and measure type
dupes = recommended_measures_df.duplicated(subset=["property_id", "measure_type"], keep=False)
dupes = recommended_measures_df.duplicated(
subset=["property_id", "measure_type"], keep=False
)
dupe_df = recommended_measures_df[dupes]
if dupe_df.shape:
# Drop dupes - happened due to a funny bug
recommended_measures_df = recommended_measures_df.drop_duplicates(
subset=["property_id", "measure_type"], keep='first'
subset=["property_id", "measure_type"], keep="first"
)
recommendations_measures_pivot = recommended_measures_df.pivot(
index='property_id',
columns='measure_type',
values='estimated_cost'
index="property_id", columns="measure_type", values="estimated_cost"
)
recommendations_measures_pivot = recommendations_measures_pivot.reset_index()
# Total cost is the row sum, excluding the property_id column
recommendations_measures_pivot["total_retrofit_cost"] = recommendations_measures_pivot.drop(
columns=["property_id"]
).sum(axis=1)
recommendations_measures_pivot["total_retrofit_cost"] = (
recommendations_measures_pivot.drop(columns=["property_id"]).sum(axis=1)
)
df = properties_df[
[
"landlord_property_id", "property_id", "uprn", "address", "postcode", "property_type", "walls", "roof",
"heating", "windows", "current_epc_rating", "current_sap_points", "total_floor_area", "number_of_rooms",
"id"
df = (
properties_df[
[
"landlord_property_id",
"property_id",
"uprn",
"address",
"postcode",
"property_type",
"walls",
"roof",
"heating",
"windows",
"current_epc_rating",
"current_sap_points",
"total_floor_area",
"number_of_rooms",
"id",
]
]
].merge(
recommendations_measures_pivot, how="left", on="property_id"
).merge(
post_install_sap, how="left", on="property_id"
.merge(recommendations_measures_pivot, how="left", on="property_id")
.merge(post_install_sap, how="left", on="property_id")
)
# df = df.drop(columns=["property_id"])
@ -262,21 +292,25 @@ for scenario_id in SCENARIOS:
df["predicted_post_works_sap"] = df["current_sap_points"] + df["sap_points"]
df["predicted_post_works_sap"] = df["predicted_post_works_sap"]
df["predicted_post_works_epc"] = df["predicted_post_works_sap"].apply(lambda x: sap_to_epc(x))
df["predicted_post_works_epc"] = df["predicted_post_works_sap"].apply(
lambda x: sap_to_epc(x)
)
df["uprn"] = df["uprn"].astype(str)
relevant_plans = plans_df[plans_df["scenario_id"] == scenario_id]
df2 = df.merge(
relevant_plans[["property_id", "post_sap_points", "post_epc_rating"]], how="left", on="property_id",
suffixes=("", "_plan")
relevant_plans[["property_id", "post_sap_points", "post_epc_rating"]],
how="left",
on="property_id",
suffixes=("", "_plan"),
)
print(df2["predicted_post_works_epc"].value_counts())
print(df2["post_epc_rating"].value_counts())
z = df2[
(df2["predicted_post_works_epc"] != "D") &
(df2["post_epc_rating"].astype(str) == "Epc.D")
]
(df2["predicted_post_works_epc"] != "D")
& (df2["post_epc_rating"].astype(str) == "Epc.D")
]
df2["predicted_post_works_epc"].value_counts()
df2["post_epc_rating"].astype(str).value_counts()
@ -291,189 +325,6 @@ for scenario_id in SCENARIOS:
df[df["predicted_post_works_sap"] == ""]
# Create excel to store to
<<<<<<< HEAD
filename = (f"{scenario_names[scenario_id]} - 20250113 final.xlsx")
filename = f"{scenario_names[scenario_id]} - 20250113 final.xlsx"
with pd.ExcelWriter(filename) as writer:
df.to_excel(writer, sheet_name="properties", index=False)
=======
filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
f"Project/Final SAL/scenarios/{scenario_names[scenario_id]} - 20250114 final.xlsx")
with pd.ExcelWriter(filename) as writer:
df.to_excel(writer, sheet_name="properties", index=False)
# asset_list = pd.DataFrame(asset_list)
# asset_list = asset_list.rename(
# columns={
# "postcode": "domna_postcode"
# }
# )
# if "domna_full_address":
# # For Peabody
# asset_list["domna_full_address"] = asset_list["domna_address_1"]
#
# asset_list = asset_list[["domna_full_address", "domna_postcode", "epc_os_uprn", ]].copy()
# asset_list = asset_list.rename(columns={"epc_os_uprn": "uprn"})
# asset_list["uprn"] = asset_list["uprn"].astype("Int64").astype(str)
# asset_list = asset_list.merge(
# df.drop(columns=["address", "postcode", "property_type", "total_floor_area"]),
# how="left",
# on="uprn"
# )
# Get conservation area data from property details spatial. based on the UPRNs
def get_conservation_area_data(uprns):
session = sessionmaker(bind=db_engine)()
session.begin()
# Query to get conservation area data
spatial_query = session.query(
PropertyDetailsSpatial
).filter(
PropertyDetailsSpatial.uprn.in_(uprns) # Filter by UPRNs
).all()
# Transform spatial data to include all fields dynamically
spatial_data = [
{col.name: getattr(spatial, col.name) for col in PropertyDetailsSpatial.__table__.columns}
for spatial in spatial_query
]
session.close()
return pd.DataFrame(spatial_data)
uprns = asset_list[
~pd.isna(asset_list["uprn"]) & (asset_list["uprn"] != "<NA>")
]["uprn"].astype(int).unique().tolist()
conservation_area_data = get_conservation_area_data(uprns)
conservation_area_data["uprn"] = conservation_area_data["uprn"].astype(str)
asset_list = asset_list.merge(
conservation_area_data[["uprn", "conservation_status", "is_listed_building", "is_heritage_building"]],
how="left",
on="uprn"
)
# For exporting
df.to_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lincs Rural/EPC C -without floors proposed measures - "
"with ID.xlsx",
index=False
)
# asset_list.to_excel(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lincs Rural/epc_measures.xlsx",
# index=False
# )
condition_costs = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/sfr/Spring JV/Condition costs.xlsx",
sheet_name="Prices - Khalim",
header=35
)
# Remove unnamed columns and reset index
condition_costs = condition_costs.loc[:, ~condition_costs.columns.str.contains('^Unnamed')]
condition_costs = condition_costs.reset_index(drop=True)
# We now estimate condition cost
def simulate_condition(asset_list, condition_costs):
"""
This function is for testing, and will simulate condition cost from 1-10 for each property to see what the
costing array looks like.
:param df:
:return:
"""
condition_df = []
for _, row in asset_list.iterrows():
n_bathrooms = row["bathrooms"]
conditions = {}
for condition in reversed(range(1, 11)):
condition_cost = condition_costs[
condition_costs["Condition"] == condition
].drop(columns=["Condition"]).iloc[0]
# Each cost is scaled by floor area
condition_cost = condition_cost * row["total_floor_area"]
condition_cost["Bathroom"] = condition_cost["Bathroom"] * n_bathrooms
total_condition_cost = condition_cost.sum()
conditions["Condition " + str(condition)] = (total_condition_cost)
condition_df.append(
{
"uprn": row["uprn"],
**conditions
}
)
condition_df = pd.DataFrame(condition_df)
asset_list = asset_list.merge(
condition_df,
how="left",
on="uprn"
)
return asset_list
# asset_list = simulate_condition(asset_list, condition_costs)
# We calculate the condition cost based on the condition
for _, row in asset_list.iterrows():
condition = row["condition_score"]
if condition in [None, ""]:
continue
condition = int(float(condition))
condition_cost = condition_costs[
condition_costs["Condition"] == condition
].drop(columns=["Condition"]).iloc[0]
# Each cost is scaled by floor area
condition_cost = condition_cost * float(row["total_floor_area"])
n_bathrooms = row["n_bathrooms"]
condition_cost["Bathroom"] = condition_cost["Bathroom"] * float(n_bathrooms)
total_condition_cost = condition_cost.sum()
asset_list.loc[asset_list["uprn"] == row["uprn"], "domna_condition_cost"] = total_condition_cost
# Store output
asset_list.to_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/sfr/Spring JV/20250624_portfolio_retrofit_packages.xlsx",
index=False
)
condition_cost_comparison = asset_list[
["condition_score", "decoration_sum_min ", "decoration_sum_max", "domna_condition_cost"]
]
# Testing
plans_df.head()
example = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final "
"SAL/scenarios/EPC C - no solid floor, no EWI or IWI, ashp 3.0 - 20250114 final.xlsx"
)
plans_df2 = plans_df.merge(
properties_df[["property_id", "landlord_property_id"]],
left_on="property_id",
right_on="property_id",
how="left"
)
plans_df2 = plans_df2[plans_df2["scenario_id"] == 909]
dupes = plans_df2[plans_df2["property_id"].duplicated()]
# merge on plans
example = example.merge(
plans_df, how="left",
)
>>>>>>> 3874da6177cbcc37f7a488bec0a06e387906653c