mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
added terraform files and test plan
This commit is contained in:
parent
2b01ac9f6c
commit
6c05b0d6a4
19 changed files with 417 additions and 319 deletions
86
.github/workflows/actions/actions/lambda-deploy/action.yml
vendored
Normal file
86
.github/workflows/actions/actions/lambda-deploy/action.yml
vendored
Normal file
|
|
@ -0,0 +1,86 @@
|
|||
name: "Build and Push Lambda Image to ECR"
|
||||
description: "Reusable action for building and pushing lambda Docker image to ECR"
|
||||
|
||||
inputs:
|
||||
ecr_name:
|
||||
description: "Lambda name / ECR repo name"
|
||||
required: true
|
||||
dockerfile_path:
|
||||
description: "Path to Dockerfile"
|
||||
required: true
|
||||
ecr_tf_dir:
|
||||
description: "Path to ECR terraform directory"
|
||||
required: true
|
||||
lambda_tf_dir:
|
||||
description: "Path to Lambda terraform directory"
|
||||
required: true
|
||||
aws-access-key-id:
|
||||
description: "AWS access key"
|
||||
required: true
|
||||
aws-secret-access-key:
|
||||
description: "AWS secret key"
|
||||
required: true
|
||||
aws-region:
|
||||
description: "AWS region"
|
||||
required: true
|
||||
git-sha:
|
||||
description: "Git commit SHA"
|
||||
required: true
|
||||
git-ref:
|
||||
description: "Git ref name"
|
||||
required: true
|
||||
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Configure AWS credentials
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
aws-access-key-id: ${{ inputs.aws-access-key-id }}
|
||||
aws-secret-access-key: ${{ inputs.aws-secret-access-key }}
|
||||
aws-region: ${{ inputs.aws-region }}
|
||||
|
||||
- name: Log in to Amazon ECR
|
||||
id: login-ecr
|
||||
uses: aws-actions/amazon-ecr-login@v2
|
||||
|
||||
- name: Deploy ECR
|
||||
uses: ./.github/workflows/actions/terraform-deploy
|
||||
with:
|
||||
working_directory: ${{ inputs.ecr_tf_dir }}
|
||||
aws-access-key-id: ${{ inputs.aws-access-key-id }}
|
||||
aws-secret-access-key: ${{ inputs.aws-secret-access-key }}
|
||||
aws-region: ${{ inputs.aws-region }}
|
||||
- name: Set Docker image tag
|
||||
id: set_tag
|
||||
shell: bash
|
||||
run: |
|
||||
SHORT_SHA=$(echo "${{ inputs.git-sha }}" | cut -c1-7)
|
||||
BRANCH=$(echo "${{ inputs.git-ref }}" | tr '/' '-')
|
||||
TAG="${BRANCH}-${SHORT_SHA}"
|
||||
echo "IMAGE_TAG=${TAG}" >> $GITHUB_ENV
|
||||
echo "tag=$TAG" >> $GITHUB_OUTPUT
|
||||
|
||||
- name: Build and push Docker image
|
||||
shell: bash
|
||||
run: |
|
||||
IMAGE_URI=${{ steps.login-ecr.outputs.registry }}/${{ inputs.ecr_name }}:${{ steps.set_tag.outputs.tag }}
|
||||
echo "Building Docker image for ${{ inputs.ecr_name }}..."
|
||||
docker build -t $IMAGE_URI -f ${{ inputs.dockerfile_path }} .
|
||||
|
||||
echo "Pushing to ECR..."
|
||||
docker push $IMAGE_URI
|
||||
|
||||
- name: Deploy Lambda
|
||||
uses: ./.github/workflows/actions/terraform-deploy
|
||||
with:
|
||||
working_directory: ${{ inputs.lambda_tf_dir }}
|
||||
aws-access-key-id: ${{ inputs.aws-access-key-id }}
|
||||
aws-secret-access-key: ${{ inputs.aws-secret-access-key }}
|
||||
aws-region: ${{ inputs.aws-region }}
|
||||
lambda-image-tag: ${{ steps.set_tag.outputs.tag }}
|
||||
|
||||
|
||||
|
||||
55
.github/workflows/actions/actions/terraform-deploy/action.yml
vendored
Normal file
55
.github/workflows/actions/actions/terraform-deploy/action.yml
vendored
Normal file
|
|
@ -0,0 +1,55 @@
|
|||
name: "Terraform Plan Shared Config"
|
||||
description: "Plans shared Terraform config for Lambdas"
|
||||
|
||||
inputs:
|
||||
working_directory:
|
||||
description: "Directory containing Terraform config"
|
||||
required: true
|
||||
aws-access-key-id:
|
||||
description: "AWS access key"
|
||||
required: true
|
||||
aws-secret-access-key:
|
||||
description: "AWS secret key"
|
||||
required: true
|
||||
aws-region:
|
||||
description: "AWS region"
|
||||
required: true
|
||||
lambda-image-tag:
|
||||
description: "Tag of the Lambda image (e.g., GitHub SHA)"
|
||||
required: false
|
||||
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
- name: Configure AWS credentials
|
||||
uses: aws-actions/configure-aws-credentials@v4
|
||||
with:
|
||||
aws-access-key-id: ${{ inputs.aws-access-key-id }}
|
||||
aws-secret-access-key: ${{ inputs.aws-secret-access-key }}
|
||||
aws-region: ${{ inputs.aws-region }}
|
||||
|
||||
- name: Setup Terraform
|
||||
uses: hashicorp/setup-terraform@v3
|
||||
|
||||
- name: Terraform Init
|
||||
working-directory: ${{ inputs.working_directory }}
|
||||
shell: bash
|
||||
run: terraform init -reconfigure
|
||||
|
||||
- name: Terraform Plan
|
||||
working-directory: ${{ inputs.working_directory }}
|
||||
shell: bash
|
||||
run: |
|
||||
if [ -n "${{ inputs.lambda-image-tag }}" ]; then
|
||||
terraform plan -out=tfplan -var="lambda_image_tag=${{ inputs.lambda-image-tag }}"
|
||||
else
|
||||
terraform plan -out=tfplan
|
||||
fi
|
||||
|
||||
- name: Terraform Apply
|
||||
working-directory: ${{ inputs.working_directory }}
|
||||
shell: bash
|
||||
run: terraform apply -auto-approve tfplan
|
||||
|
||||
38
.github/workflows/deploy_terraform.yml
vendored
38
.github/workflows/deploy_terraform.yml
vendored
|
|
@ -43,40 +43,16 @@ jobs:
|
|||
env:
|
||||
AWS_PROFILE: "DevAdmin"
|
||||
|
||||
# Deploy shared terrform things
|
||||
- name: Terraform Init
|
||||
run: cd infrastructure/terraform && terraform init
|
||||
run: cd infrastructure/terraform/shared && terraform init
|
||||
|
||||
- name: Terraform Workspace
|
||||
run: |
|
||||
# BRANCH_NAME=$(echo "${{ github.ref }}" | sed -e "s/^refs\/heads\///")
|
||||
cd infrastructure/terraform
|
||||
# terraform workspace select ${BRANCH_NAME} || terraform workspace new ${BRANCH_NAME}
|
||||
# Until Khalim makes a different environment for us
|
||||
terraform workspace select dev
|
||||
cd infrastructure/terraform/shared
|
||||
terraform workspace select dev || terraform workspace new dev
|
||||
|
||||
- name: Terraform Plan
|
||||
- name: Terraform Plan (shared)
|
||||
run: |
|
||||
BRANCH_NAME=$(echo "${{ github.ref }}" | sed -e "s/^refs\/heads\///")
|
||||
cd infrastructure/terraform && terraform plan -var-file=dev.tfvars
|
||||
|
||||
- name: Deploy to Dev
|
||||
if: github.ref == 'refs/heads/dev'
|
||||
run: echo "hello world"
|
||||
# run: cd infrastructure/terraform && terraform apply -var-file=dev.tfvars -auto-approve
|
||||
env:
|
||||
name: dev
|
||||
|
||||
# - name: Configure AWS credentials (ProdAdmin)
|
||||
# uses: aws-actions/configure-aws-credentials@v1
|
||||
# with:
|
||||
# aws-access-key-id: ${{ secrets.PROD_AWS_ACCESS_KEY_ID }}
|
||||
# aws-secret-access-key: ${{ secrets.PROD_AWS_SECRET_ACCESS_KEY }}
|
||||
# aws-region: eu-west-2
|
||||
# env:
|
||||
# AWS_PROFILE: "ProdAdmin"
|
||||
|
||||
# - name: Deploy to Prod
|
||||
# if: github.ref == 'refs/heads/prod'
|
||||
# run: cd infrastructure/terraform && terraform apply -var-file=prod.tfvars -auto-approve
|
||||
# env:
|
||||
# name: prod
|
||||
cd infrastructure/terraform/shared
|
||||
terraform plan -var-file=dev.tfvars
|
||||
|
|
@ -72,21 +72,21 @@ def app():
|
|||
data_folder = "/workspaces/model/asset_list"
|
||||
data_filename = "assets.xlsx"
|
||||
sheet_name = "Sheet1"
|
||||
postcode_column = "Post Code"
|
||||
postcode_column = "POSTCODE"
|
||||
address1_column = None
|
||||
address1_method = "house_number_extraction"
|
||||
fulladdress_column = "User Input"
|
||||
fulladdress_column = "ADDRESS"
|
||||
address_cols_to_concat = None
|
||||
missing_postcodes_method = None
|
||||
landlord_year_built = None
|
||||
landlord_os_uprn = "UPRN"
|
||||
landlord_property_type = None
|
||||
landlord_built_form = None
|
||||
landlord_built_form = "BUILD FORM"
|
||||
landlord_wall_construction = None
|
||||
landlord_roof_construction = None
|
||||
landlord_heating_system = None
|
||||
landlord_existing_pv = None
|
||||
landlord_property_id = "LLUPRN"
|
||||
landlord_property_id = "UPRN"
|
||||
landlord_sap = None
|
||||
outcomes_filename = None
|
||||
outcomes_sheetname = None
|
||||
|
|
|
|||
7
backend/address2UPRN/Dockerfile
Normal file
7
backend/address2UPRN/Dockerfile
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
FROM public.ecr.aws/lambda/python:3.10
|
||||
|
||||
# Copy function code
|
||||
COPY app.py ${LAMBDA_TASK_ROOT}
|
||||
|
||||
# Set the handler
|
||||
CMD ["main.handler"]
|
||||
|
|
@ -14,6 +14,9 @@ EPC_AUTH_TOKEN = os.getenv(
|
|||
"EPC_AUTH_TOKEN",
|
||||
)
|
||||
|
||||
if EPC_AUTH_TOKEN is None:
|
||||
raise RuntimeError("EPC_AUTH_TOKEN not defined in env")
|
||||
|
||||
import re
|
||||
from difflib import SequenceMatcher
|
||||
from typing import Set
|
||||
|
|
@ -38,6 +41,34 @@ def levenshtein(a: str, b: str) -> float:
|
|||
def tokenise(s: str) -> Set[str]:
|
||||
return set(s.split())
|
||||
|
||||
def extract_building_number(s: str) -> str | None:
|
||||
"""
|
||||
Extract the main building number (NOT flat/unit).
|
||||
Assumes formats like:
|
||||
- '42 moreton road'
|
||||
- 'flat 3 42 moreton road'
|
||||
"""
|
||||
tokens = s.split()
|
||||
|
||||
# remove flat/unit context
|
||||
cleaned = []
|
||||
skip_next = False
|
||||
for t in tokens:
|
||||
if t in ("flat", "apt", "apartment", "unit"):
|
||||
skip_next = True
|
||||
continue
|
||||
if skip_next:
|
||||
skip_next = False
|
||||
continue
|
||||
cleaned.append(t)
|
||||
|
||||
# first remaining number is building number
|
||||
for t in cleaned:
|
||||
if re.fullmatch(r"\d+[a-z]?", t):
|
||||
return t
|
||||
|
||||
return None
|
||||
|
||||
a_norm = normalise_address(a)
|
||||
b_norm = normalise_address(b)
|
||||
|
||||
|
|
@ -52,6 +83,13 @@ def levenshtein(a: str, b: str) -> float:
|
|||
if nums_a and nums_b and nums_a.isdisjoint(nums_b):
|
||||
return 0.0
|
||||
|
||||
# 🔒 HARD GUARD: building number must match
|
||||
bld_a = extract_building_number(a_norm)
|
||||
bld_b = extract_building_number(b_norm)
|
||||
|
||||
if bld_a and bld_b and bld_a != bld_b:
|
||||
return 0.0
|
||||
|
||||
# --- order-sensitive flat/building guard ---
|
||||
seq_a = extract_number_sequence(a_norm)
|
||||
seq_b = extract_number_sequence(b_norm)
|
||||
|
|
@ -418,6 +456,10 @@ def run_all_test():
|
|||
get_uprn("46 Oswald Street", "E5 0BT"), False
|
||||
) # this one return "flat 1, in 1 semley gate"
|
||||
get_uprn_candidates(get_epc_data_with_postcode("e5 0bt"), "48 Oswald Street")
|
||||
get_uprn_candidates(
|
||||
get_epc_data_with_postcode("Cr2 7dl"),
|
||||
"FLAT 3; 42 MORETON ROAD, SOUTH CROYDON, SURREY",
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
|
@ -511,6 +553,11 @@ if __name__ == "__main__":
|
|||
)
|
||||
|
||||
|
||||
def handler(event, context):
|
||||
print("hello world")
|
||||
return {"statusCode": 200, "body": "hello world"}
|
||||
|
||||
|
||||
# TO do function dispatcher,
|
||||
|
||||
# get_uprn_candidates(get_epc_data_with_postcode("E9 5NH"),"Flat 1, 5 Semley Gate" and Flat 5, 1 Semley Gate)
|
||||
|
|
|
|||
|
|
@ -115,11 +115,16 @@ FLAT 43 Goodstone Court,HA1 4FL,10070269095
|
|||
8 Genteel House Samara Drive,UB1 1FJ,12189842
|
||||
9 Genteel House Samara Drive,UB1 1FJ,12189843
|
||||
10 Genteel House Samara Drive,UB1 1FJ,12189844
|
||||
1 ASH TREE HOUSE,SE5 0TE,10009803979
|
||||
3 ASH TREE HOUSE,SE5 0TE,10009803981
|
||||
5 ASH TREE HOUSE,SE5 0TE,10009803983
|
||||
8 ASH TREE HOUSE,SE5 0TE,10009803986
|
||||
12 ASH TREE HOUSE,SE5 0TE,10009803990
|
||||
1 ASH TREE HOUSE,SE5 0TE,None
|
||||
"Flat 1 Ash Tree House, 2, Thompson Avenue",SE5 0TE,10009803979
|
||||
3 ASH TREE HOUSE,SE5 0TE,None
|
||||
Flat 3 ASH TREE HOUSE,SE5 0TE,10009803981
|
||||
5 ASH TREE HOUSE,SE5 0TE,None
|
||||
Flat 5 ASH TREE HOUSE,SE5 0TE,10009803983
|
||||
Flat 8 ASH TREE HOUSE,SE5 0TE,10009803986
|
||||
8 ASH TREE HOUSE,SE5 0TE,None
|
||||
Flat 12 ASH TREE HOUSE,SE5 0TE,10009803990
|
||||
12 ASH TREE HOUSE,SE5 0TE,None
|
||||
FLAT 1 599 HARROW ROAD,W10 4RA,217113930
|
||||
FLAT 2 599 HARROW ROAD,W10 4RA,217113931
|
||||
FLAT 3 599 HARROW ROAD,W10 4RA,None
|
||||
|
|
@ -332,7 +337,7 @@ FLAT B 158 LEAHURST ROAD,SE13 5NL,100021976974
|
|||
138a Victoria Square,M4 5FA,77211289
|
||||
139a Victoria Square,M4 5FA,77211290
|
||||
140a Victoria Square,M4 5FA,77211291
|
||||
141a Victoria Square,M4 5FA,None
|
||||
141a Victoria Square,M4 5FA,77211292
|
||||
142a Victoria Square,M4 5FA,77211293
|
||||
143a Victoria Square,M4 5FA,77211294
|
||||
144a Victoria Square,M4 5FA,77211295
|
||||
|
|
@ -357,4 +362,5 @@ FLAT B 158 LEAHURST ROAD,SE13 5NL,100021976974
|
|||
163a Victoria Square,M4 5FA,77211314
|
||||
164a Victoria Square,M4 5FA,77211315
|
||||
165a Victoria Square,M4 5FA,77211316
|
||||
166a Victoria Square,M4 5FA,None
|
||||
166a Victoria Square,M4 5FA,None
|
||||
"FLAT 3; 42 MORETON ROAD, SOUTH CROYDON, SURREY",CR2 7DL,None
|
||||
|
0
infrastructure/terraform/lamdas/backend.tf
Normal file
0
infrastructure/terraform/lamdas/backend.tf
Normal file
0
infrastructure/terraform/lamdas/dev.tfvars
Normal file
0
infrastructure/terraform/lamdas/dev.tfvars
Normal file
0
infrastructure/terraform/lamdas/main.tf
Normal file
0
infrastructure/terraform/lamdas/main.tf
Normal file
0
infrastructure/terraform/lamdas/variables.tf
Normal file
0
infrastructure/terraform/lamdas/variables.tf
Normal file
23
infrastructure/terraform/modules/lambda_with_sqs/main.tf
Normal file
23
infrastructure/terraform/modules/lambda_with_sqs/main.tf
Normal file
|
|
@ -0,0 +1,23 @@
|
|||
resource "aws_sqs_queue" "this" {
|
||||
name = "${var.name}-queue"
|
||||
tags = var.tags
|
||||
}
|
||||
|
||||
resource "aws_lambda_function" "this" {
|
||||
function_name = var.name
|
||||
role = var.lambda_role_arn
|
||||
|
||||
package_type = "Image"
|
||||
image_uri = var.image_uri
|
||||
|
||||
timeout = var.timeout
|
||||
|
||||
tags = var.tags
|
||||
}
|
||||
|
||||
resource "aws_lambda_event_source_mapping" "this" {
|
||||
event_source_arn = aws_sqs_queue.this.arn
|
||||
function_name = aws_lambda_function.this.arn
|
||||
|
||||
batch_size = var.sqs_batch_size
|
||||
}
|
||||
15
infrastructure/terraform/modules/lambda_with_sqs/outputs.tf
Normal file
15
infrastructure/terraform/modules/lambda_with_sqs/outputs.tf
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
output "lambda_name" {
|
||||
value = aws_lambda_function.this.function_name
|
||||
}
|
||||
|
||||
output "lambda_arn" {
|
||||
value = aws_lambda_function.this.arn
|
||||
}
|
||||
|
||||
output "sqs_queue_url" {
|
||||
value = aws_sqs_queue.this.url
|
||||
}
|
||||
|
||||
output "sqs_queue_arn" {
|
||||
value = aws_sqs_queue.this.arn
|
||||
}
|
||||
|
|
@ -0,0 +1,32 @@
|
|||
variable "name" {
|
||||
description = "Base name for lambda and related resources"
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "image_uri" {
|
||||
description = "ECR image URI with tag"
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "lambda_role_arn" {
|
||||
description = "IAM role ARN for Lambda execution"
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "timeout" {
|
||||
description = "Lambda timeout in seconds"
|
||||
type = number
|
||||
default = 10
|
||||
}
|
||||
|
||||
variable "sqs_batch_size" {
|
||||
description = "Number of SQS messages per batch"
|
||||
type = number
|
||||
default = 1
|
||||
}
|
||||
|
||||
variable "tags" {
|
||||
description = "Tags to apply to resources"
|
||||
type = map(string)
|
||||
default = {}
|
||||
}
|
||||
|
|
@ -7,20 +7,29 @@ import numpy as np
|
|||
from backend.app.utils import sap_to_epc
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
from backend.app.db.connection import db_engine, db_read_session
|
||||
from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations, RecommendationMaterials
|
||||
from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel, PropertyDetailsSpatial
|
||||
from backend.app.db.models.recommendations import (
|
||||
Recommendation,
|
||||
Plan,
|
||||
PlanRecommendations,
|
||||
RecommendationMaterials,
|
||||
)
|
||||
from backend.app.db.models.portfolio import (
|
||||
PropertyModel,
|
||||
PropertyDetailsEpcModel,
|
||||
PropertyDetailsSpatial,
|
||||
)
|
||||
from backend.app.db.functions.materials_functions import get_materials
|
||||
from collections import defaultdict
|
||||
from sqlalchemy import func
|
||||
|
||||
# PORTFOLIO_ID = 206
|
||||
# SCENARIOS = [389]
|
||||
PORTFOLIO_ID = 485 # Peabody
|
||||
PORTFOLIO_ID = 502 # Peabody
|
||||
SCENARIOS = [
|
||||
970,
|
||||
986,
|
||||
]
|
||||
scenario_names = {
|
||||
970: "EPC C - No solid floor, EQI, IWI",
|
||||
986: "EPC C",
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -31,22 +40,26 @@ def get_data(portfolio_id, scenario_ids):
|
|||
# --------------------
|
||||
# Properties
|
||||
# --------------------
|
||||
properties_query = session.query(
|
||||
PropertyModel,
|
||||
PropertyDetailsEpcModel
|
||||
).join(
|
||||
PropertyDetailsEpcModel,
|
||||
PropertyModel.id == PropertyDetailsEpcModel.property_id
|
||||
).filter(
|
||||
PropertyModel.portfolio_id == portfolio_id
|
||||
).all()
|
||||
properties_query = (
|
||||
session.query(PropertyModel, PropertyDetailsEpcModel)
|
||||
.join(
|
||||
PropertyDetailsEpcModel,
|
||||
PropertyModel.id == PropertyDetailsEpcModel.property_id,
|
||||
)
|
||||
.filter(PropertyModel.portfolio_id == portfolio_id)
|
||||
.all()
|
||||
)
|
||||
|
||||
properties_data = [
|
||||
{
|
||||
**{col.name: getattr(p.PropertyModel, col.name)
|
||||
for col in PropertyModel.__table__.columns},
|
||||
**{col.name: getattr(p.PropertyDetailsEpcModel, col.name)
|
||||
for col in PropertyDetailsEpcModel.__table__.columns},
|
||||
**{
|
||||
col.name: getattr(p.PropertyModel, col.name)
|
||||
for col in PropertyModel.__table__.columns
|
||||
},
|
||||
**{
|
||||
col.name: getattr(p.PropertyDetailsEpcModel, col.name)
|
||||
for col in PropertyDetailsEpcModel.__table__.columns
|
||||
},
|
||||
}
|
||||
for p in properties_query
|
||||
]
|
||||
|
|
@ -58,13 +71,10 @@ def get_data(portfolio_id, scenario_ids):
|
|||
session.query(
|
||||
Plan.scenario_id,
|
||||
Plan.property_id,
|
||||
func.max(Plan.created_at).label("latest_created_at")
|
||||
func.max(Plan.created_at).label("latest_created_at"),
|
||||
)
|
||||
.filter(Plan.scenario_id.in_(scenario_ids))
|
||||
.group_by(
|
||||
Plan.scenario_id,
|
||||
Plan.property_id
|
||||
)
|
||||
.group_by(Plan.scenario_id, Plan.property_id)
|
||||
.subquery()
|
||||
)
|
||||
|
||||
|
|
@ -76,9 +86,9 @@ def get_data(portfolio_id, scenario_ids):
|
|||
session.query(Plan)
|
||||
.join(
|
||||
latest_plans_subq,
|
||||
(Plan.scenario_id == latest_plans_subq.c.scenario_id) &
|
||||
(Plan.property_id == latest_plans_subq.c.property_id) &
|
||||
(Plan.created_at == latest_plans_subq.c.latest_created_at)
|
||||
(Plan.scenario_id == latest_plans_subq.c.scenario_id)
|
||||
& (Plan.property_id == latest_plans_subq.c.property_id)
|
||||
& (Plan.created_at == latest_plans_subq.c.latest_created_at),
|
||||
)
|
||||
.all()
|
||||
)
|
||||
|
|
@ -103,28 +113,29 @@ def get_data(portfolio_id, scenario_ids):
|
|||
# --------------------
|
||||
# Recommendations (NO materials yet)
|
||||
# --------------------
|
||||
recommendations_query = session.query(
|
||||
Recommendation,
|
||||
Plan.scenario_id,
|
||||
PlanRecommendations.plan_id
|
||||
).join(
|
||||
PlanRecommendations,
|
||||
Recommendation.id == PlanRecommendations.recommendation_id
|
||||
).join(
|
||||
Plan,
|
||||
Plan.id == PlanRecommendations.plan_id
|
||||
).filter(
|
||||
PlanRecommendations.plan_id.in_(plan_ids),
|
||||
Recommendation.default.is_(True),
|
||||
Recommendation.already_installed.is_(False)
|
||||
).all()
|
||||
recommendations_query = (
|
||||
session.query(Recommendation, Plan.scenario_id, PlanRecommendations.plan_id)
|
||||
.join(
|
||||
PlanRecommendations,
|
||||
Recommendation.id == PlanRecommendations.recommendation_id,
|
||||
)
|
||||
.join(Plan, Plan.id == PlanRecommendations.plan_id)
|
||||
.filter(
|
||||
PlanRecommendations.plan_id.in_(plan_ids),
|
||||
Recommendation.default.is_(True),
|
||||
Recommendation.already_installed.is_(False),
|
||||
)
|
||||
.all()
|
||||
)
|
||||
|
||||
recommendations_data = [
|
||||
{
|
||||
**{col.name: getattr(r.Recommendation, col.name)
|
||||
for col in Recommendation.__table__.columns},
|
||||
**{
|
||||
col.name: getattr(r.Recommendation, col.name)
|
||||
for col in Recommendation.__table__.columns
|
||||
},
|
||||
"scenario_id": r.scenario_id,
|
||||
"materials": [] # placeholder
|
||||
"materials": [], # placeholder
|
||||
}
|
||||
for r in recommendations_query
|
||||
]
|
||||
|
|
@ -134,23 +145,25 @@ def get_data(portfolio_id, scenario_ids):
|
|||
# --------------------
|
||||
# Recommendation materials (SEPARATE QUERY)
|
||||
# --------------------
|
||||
materials_query = session.query(
|
||||
RecommendationMaterials
|
||||
).filter(
|
||||
RecommendationMaterials.recommendation_id.in_(recommendation_ids)
|
||||
).all()
|
||||
materials_query = (
|
||||
session.query(RecommendationMaterials)
|
||||
.filter(RecommendationMaterials.recommendation_id.in_(recommendation_ids))
|
||||
.all()
|
||||
)
|
||||
|
||||
# Group materials by recommendation_id
|
||||
materials_by_recommendation = defaultdict(list)
|
||||
|
||||
for m in materials_query:
|
||||
materials_by_recommendation[m.recommendation_id].append({
|
||||
"material_id": m.material_id,
|
||||
"depth": m.depth,
|
||||
"quantity": m.quantity,
|
||||
"quantity_unit": m.quantity_unit,
|
||||
"estimated_cost": m.estimated_cost,
|
||||
})
|
||||
materials_by_recommendation[m.recommendation_id].append(
|
||||
{
|
||||
"material_id": m.material_id,
|
||||
"depth": m.depth,
|
||||
"quantity": m.quantity,
|
||||
"quantity_unit": m.quantity_unit,
|
||||
"estimated_cost": m.estimated_cost,
|
||||
}
|
||||
)
|
||||
|
||||
# Attach materials safely (no filtering side effects)
|
||||
for r in recommendations_data:
|
||||
|
|
@ -161,7 +174,9 @@ def get_data(portfolio_id, scenario_ids):
|
|||
return properties_data, plans_data, recommendations_data
|
||||
|
||||
|
||||
properties_data, plans_data, recommendations_data = get_data(portfolio_id=PORTFOLIO_ID, scenario_ids=SCENARIOS)
|
||||
properties_data, plans_data, recommendations_data = get_data(
|
||||
portfolio_id=PORTFOLIO_ID, scenario_ids=SCENARIOS
|
||||
)
|
||||
|
||||
properties_df = pd.DataFrame(properties_data)
|
||||
plans_df = pd.DataFrame(plans_data)
|
||||
|
|
@ -172,10 +187,8 @@ with db_read_session() as session:
|
|||
|
||||
materials = pd.DataFrame(materials)
|
||||
|
||||
material_lookup = (
|
||||
materials
|
||||
.set_index("id")[["type", "includes_battery"]]
|
||||
.to_dict("index")
|
||||
material_lookup = materials.set_index("id")[["type", "includes_battery"]].to_dict(
|
||||
"index"
|
||||
)
|
||||
|
||||
|
||||
|
|
@ -189,14 +202,14 @@ def has_solar_with_battery(materials_list):
|
|||
return False
|
||||
|
||||
|
||||
recommendations_df["has_solar_with_battery"] = (
|
||||
recommendations_df["materials"].apply(has_solar_with_battery)
|
||||
recommendations_df["has_solar_with_battery"] = recommendations_df["materials"].apply(
|
||||
has_solar_with_battery
|
||||
)
|
||||
|
||||
recommendations_df["measure_type"] = np.where(
|
||||
recommendations_df["has_solar_with_battery"] == True,
|
||||
recommendations_df["measure_type"] + "_with_battery",
|
||||
recommendations_df["measure_type"]
|
||||
recommendations_df["measure_type"],
|
||||
)
|
||||
|
||||
# Adjust material type to indicate if there is a battery included
|
||||
|
|
@ -211,50 +224,67 @@ from utils.s3 import read_csv_from_s3, read_excel_from_s3
|
|||
|
||||
for scenario_id in SCENARIOS:
|
||||
# Get recs for this scenario
|
||||
recommended_measures_df = recommendations_df[recommendations_df["scenario_id"] == scenario_id][
|
||||
["property_id", "measure_type", "estimated_cost", "default"]
|
||||
recommended_measures_df = recommendations_df[
|
||||
recommendations_df["scenario_id"] == scenario_id
|
||||
][["property_id", "measure_type", "estimated_cost", "default"]]
|
||||
recommended_measures_df = recommended_measures_df[
|
||||
recommended_measures_df["default"]
|
||||
]
|
||||
recommended_measures_df = recommended_measures_df[recommended_measures_df["default"]]
|
||||
recommended_measures_df = recommended_measures_df.drop(columns=["default"])
|
||||
|
||||
post_install_sap = recommendations_df[recommendations_df["scenario_id"] == scenario_id][
|
||||
["property_id", "default", "sap_points"]]
|
||||
post_install_sap = recommendations_df[
|
||||
recommendations_df["scenario_id"] == scenario_id
|
||||
][["property_id", "default", "sap_points"]]
|
||||
post_install_sap = post_install_sap[post_install_sap["default"]]
|
||||
# Sum up the sap points by property id
|
||||
post_install_sap = post_install_sap.groupby(["property_id"])[["sap_points"]].sum().reset_index()
|
||||
post_install_sap = (
|
||||
post_install_sap.groupby(["property_id"])[["sap_points"]].sum().reset_index()
|
||||
)
|
||||
|
||||
# Find dupes by property id and measure type
|
||||
dupes = recommended_measures_df.duplicated(subset=["property_id", "measure_type"], keep=False)
|
||||
dupes = recommended_measures_df.duplicated(
|
||||
subset=["property_id", "measure_type"], keep=False
|
||||
)
|
||||
dupe_df = recommended_measures_df[dupes]
|
||||
|
||||
if dupe_df.shape:
|
||||
# Drop dupes - happened due to a funny bug
|
||||
recommended_measures_df = recommended_measures_df.drop_duplicates(
|
||||
subset=["property_id", "measure_type"], keep='first'
|
||||
subset=["property_id", "measure_type"], keep="first"
|
||||
)
|
||||
|
||||
recommendations_measures_pivot = recommended_measures_df.pivot(
|
||||
index='property_id',
|
||||
columns='measure_type',
|
||||
values='estimated_cost'
|
||||
index="property_id", columns="measure_type", values="estimated_cost"
|
||||
)
|
||||
recommendations_measures_pivot = recommendations_measures_pivot.reset_index()
|
||||
|
||||
# Total cost is the row sum, excluding the property_id column
|
||||
recommendations_measures_pivot["total_retrofit_cost"] = recommendations_measures_pivot.drop(
|
||||
columns=["property_id"]
|
||||
).sum(axis=1)
|
||||
recommendations_measures_pivot["total_retrofit_cost"] = (
|
||||
recommendations_measures_pivot.drop(columns=["property_id"]).sum(axis=1)
|
||||
)
|
||||
|
||||
df = properties_df[
|
||||
[
|
||||
"landlord_property_id", "property_id", "uprn", "address", "postcode", "property_type", "walls", "roof",
|
||||
"heating", "windows", "current_epc_rating", "current_sap_points", "total_floor_area", "number_of_rooms",
|
||||
"id"
|
||||
df = (
|
||||
properties_df[
|
||||
[
|
||||
"landlord_property_id",
|
||||
"property_id",
|
||||
"uprn",
|
||||
"address",
|
||||
"postcode",
|
||||
"property_type",
|
||||
"walls",
|
||||
"roof",
|
||||
"heating",
|
||||
"windows",
|
||||
"current_epc_rating",
|
||||
"current_sap_points",
|
||||
"total_floor_area",
|
||||
"number_of_rooms",
|
||||
"id",
|
||||
]
|
||||
]
|
||||
].merge(
|
||||
recommendations_measures_pivot, how="left", on="property_id"
|
||||
).merge(
|
||||
post_install_sap, how="left", on="property_id"
|
||||
.merge(recommendations_measures_pivot, how="left", on="property_id")
|
||||
.merge(post_install_sap, how="left", on="property_id")
|
||||
)
|
||||
|
||||
# df = df.drop(columns=["property_id"])
|
||||
|
|
@ -262,21 +292,25 @@ for scenario_id in SCENARIOS:
|
|||
|
||||
df["predicted_post_works_sap"] = df["current_sap_points"] + df["sap_points"]
|
||||
df["predicted_post_works_sap"] = df["predicted_post_works_sap"]
|
||||
df["predicted_post_works_epc"] = df["predicted_post_works_sap"].apply(lambda x: sap_to_epc(x))
|
||||
df["predicted_post_works_epc"] = df["predicted_post_works_sap"].apply(
|
||||
lambda x: sap_to_epc(x)
|
||||
)
|
||||
df["uprn"] = df["uprn"].astype(str)
|
||||
|
||||
relevant_plans = plans_df[plans_df["scenario_id"] == scenario_id]
|
||||
df2 = df.merge(
|
||||
relevant_plans[["property_id", "post_sap_points", "post_epc_rating"]], how="left", on="property_id",
|
||||
suffixes=("", "_plan")
|
||||
relevant_plans[["property_id", "post_sap_points", "post_epc_rating"]],
|
||||
how="left",
|
||||
on="property_id",
|
||||
suffixes=("", "_plan"),
|
||||
)
|
||||
print(df2["predicted_post_works_epc"].value_counts())
|
||||
print(df2["post_epc_rating"].value_counts())
|
||||
|
||||
z = df2[
|
||||
(df2["predicted_post_works_epc"] != "D") &
|
||||
(df2["post_epc_rating"].astype(str) == "Epc.D")
|
||||
]
|
||||
(df2["predicted_post_works_epc"] != "D")
|
||||
& (df2["post_epc_rating"].astype(str) == "Epc.D")
|
||||
]
|
||||
|
||||
df2["predicted_post_works_epc"].value_counts()
|
||||
df2["post_epc_rating"].astype(str).value_counts()
|
||||
|
|
@ -291,189 +325,6 @@ for scenario_id in SCENARIOS:
|
|||
df[df["predicted_post_works_sap"] == ""]
|
||||
|
||||
# Create excel to store to
|
||||
<<<<<<< HEAD
|
||||
filename = (f"{scenario_names[scenario_id]} - 20250113 final.xlsx")
|
||||
filename = f"{scenario_names[scenario_id]} - 20250113 final.xlsx"
|
||||
with pd.ExcelWriter(filename) as writer:
|
||||
df.to_excel(writer, sheet_name="properties", index=False)
|
||||
=======
|
||||
filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting "
|
||||
f"Project/Final SAL/scenarios/{scenario_names[scenario_id]} - 20250114 final.xlsx")
|
||||
with pd.ExcelWriter(filename) as writer:
|
||||
df.to_excel(writer, sheet_name="properties", index=False)
|
||||
|
||||
|
||||
# asset_list = pd.DataFrame(asset_list)
|
||||
# asset_list = asset_list.rename(
|
||||
# columns={
|
||||
# "postcode": "domna_postcode"
|
||||
# }
|
||||
# )
|
||||
# if "domna_full_address":
|
||||
# # For Peabody
|
||||
# asset_list["domna_full_address"] = asset_list["domna_address_1"]
|
||||
#
|
||||
# asset_list = asset_list[["domna_full_address", "domna_postcode", "epc_os_uprn", ]].copy()
|
||||
# asset_list = asset_list.rename(columns={"epc_os_uprn": "uprn"})
|
||||
# asset_list["uprn"] = asset_list["uprn"].astype("Int64").astype(str)
|
||||
# asset_list = asset_list.merge(
|
||||
# df.drop(columns=["address", "postcode", "property_type", "total_floor_area"]),
|
||||
# how="left",
|
||||
# on="uprn"
|
||||
# )
|
||||
|
||||
|
||||
# Get conservation area data from property details spatial. based on the UPRNs
|
||||
def get_conservation_area_data(uprns):
|
||||
session = sessionmaker(bind=db_engine)()
|
||||
session.begin()
|
||||
|
||||
# Query to get conservation area data
|
||||
spatial_query = session.query(
|
||||
PropertyDetailsSpatial
|
||||
).filter(
|
||||
PropertyDetailsSpatial.uprn.in_(uprns) # Filter by UPRNs
|
||||
).all()
|
||||
|
||||
# Transform spatial data to include all fields dynamically
|
||||
spatial_data = [
|
||||
{col.name: getattr(spatial, col.name) for col in PropertyDetailsSpatial.__table__.columns}
|
||||
for spatial in spatial_query
|
||||
]
|
||||
|
||||
session.close()
|
||||
return pd.DataFrame(spatial_data)
|
||||
|
||||
|
||||
uprns = asset_list[
|
||||
~pd.isna(asset_list["uprn"]) & (asset_list["uprn"] != "<NA>")
|
||||
]["uprn"].astype(int).unique().tolist()
|
||||
conservation_area_data = get_conservation_area_data(uprns)
|
||||
conservation_area_data["uprn"] = conservation_area_data["uprn"].astype(str)
|
||||
asset_list = asset_list.merge(
|
||||
conservation_area_data[["uprn", "conservation_status", "is_listed_building", "is_heritage_building"]],
|
||||
how="left",
|
||||
on="uprn"
|
||||
)
|
||||
|
||||
# For exporting
|
||||
df.to_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lincs Rural/EPC C -without floors proposed measures - "
|
||||
"with ID.xlsx",
|
||||
index=False
|
||||
)
|
||||
# asset_list.to_excel(
|
||||
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lincs Rural/epc_measures.xlsx",
|
||||
# index=False
|
||||
# )
|
||||
|
||||
condition_costs = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/sfr/Spring JV/Condition costs.xlsx",
|
||||
sheet_name="Prices - Khalim",
|
||||
header=35
|
||||
)
|
||||
# Remove unnamed columns and reset index
|
||||
condition_costs = condition_costs.loc[:, ~condition_costs.columns.str.contains('^Unnamed')]
|
||||
condition_costs = condition_costs.reset_index(drop=True)
|
||||
|
||||
|
||||
# We now estimate condition cost
|
||||
def simulate_condition(asset_list, condition_costs):
|
||||
"""
|
||||
This function is for testing, and will simulate condition cost from 1-10 for each property to see what the
|
||||
costing array looks like.
|
||||
:param df:
|
||||
:return:
|
||||
"""
|
||||
|
||||
condition_df = []
|
||||
for _, row in asset_list.iterrows():
|
||||
|
||||
n_bathrooms = row["bathrooms"]
|
||||
|
||||
conditions = {}
|
||||
for condition in reversed(range(1, 11)):
|
||||
condition_cost = condition_costs[
|
||||
condition_costs["Condition"] == condition
|
||||
].drop(columns=["Condition"]).iloc[0]
|
||||
|
||||
# Each cost is scaled by floor area
|
||||
condition_cost = condition_cost * row["total_floor_area"]
|
||||
condition_cost["Bathroom"] = condition_cost["Bathroom"] * n_bathrooms
|
||||
|
||||
total_condition_cost = condition_cost.sum()
|
||||
conditions["Condition " + str(condition)] = (total_condition_cost)
|
||||
|
||||
condition_df.append(
|
||||
{
|
||||
"uprn": row["uprn"],
|
||||
**conditions
|
||||
}
|
||||
)
|
||||
|
||||
condition_df = pd.DataFrame(condition_df)
|
||||
|
||||
asset_list = asset_list.merge(
|
||||
condition_df,
|
||||
how="left",
|
||||
on="uprn"
|
||||
)
|
||||
|
||||
return asset_list
|
||||
|
||||
|
||||
# asset_list = simulate_condition(asset_list, condition_costs)
|
||||
|
||||
# We calculate the condition cost based on the condition
|
||||
for _, row in asset_list.iterrows():
|
||||
|
||||
condition = row["condition_score"]
|
||||
if condition in [None, ""]:
|
||||
continue
|
||||
condition = int(float(condition))
|
||||
|
||||
condition_cost = condition_costs[
|
||||
condition_costs["Condition"] == condition
|
||||
].drop(columns=["Condition"]).iloc[0]
|
||||
|
||||
# Each cost is scaled by floor area
|
||||
condition_cost = condition_cost * float(row["total_floor_area"])
|
||||
n_bathrooms = row["n_bathrooms"]
|
||||
condition_cost["Bathroom"] = condition_cost["Bathroom"] * float(n_bathrooms)
|
||||
|
||||
total_condition_cost = condition_cost.sum()
|
||||
asset_list.loc[asset_list["uprn"] == row["uprn"], "domna_condition_cost"] = total_condition_cost
|
||||
|
||||
# Store output
|
||||
asset_list.to_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/sfr/Spring JV/20250624_portfolio_retrofit_packages.xlsx",
|
||||
index=False
|
||||
)
|
||||
|
||||
condition_cost_comparison = asset_list[
|
||||
["condition_score", "decoration_sum_min ", "decoration_sum_max", "domna_condition_cost"]
|
||||
]
|
||||
|
||||
# Testing
|
||||
plans_df.head()
|
||||
|
||||
example = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final "
|
||||
"SAL/scenarios/EPC C - no solid floor, no EWI or IWI, ashp 3.0 - 20250114 final.xlsx"
|
||||
)
|
||||
|
||||
plans_df2 = plans_df.merge(
|
||||
properties_df[["property_id", "landlord_property_id"]],
|
||||
left_on="property_id",
|
||||
right_on="property_id",
|
||||
how="left"
|
||||
)
|
||||
|
||||
plans_df2 = plans_df2[plans_df2["scenario_id"] == 909]
|
||||
|
||||
dupes = plans_df2[plans_df2["property_id"].duplicated()]
|
||||
|
||||
# merge on plans
|
||||
example = example.merge(
|
||||
plans_df, how="left",
|
||||
)
|
||||
>>>>>>> 3874da6177cbcc37f7a488bec0a06e387906653c
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue