mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
Merge pull request #712 from Hestia-Homes/main
Lambda deployment of postcode splitter
This commit is contained in:
commit
7007c93ca1
70 changed files with 4250 additions and 1838 deletions
|
|
@ -22,7 +22,9 @@
|
|||
"jgclark.vscode-todo-highlight",
|
||||
"corentinartaud.pdfpreview",
|
||||
"ms-python.vscode-python-envs",
|
||||
"ms-python.black-formatter"
|
||||
"ms-python.black-formatter",
|
||||
"GrapeCity.gc-excelviewer",
|
||||
"jakobhoeg.vscode-pokemon"
|
||||
],
|
||||
"settings": {
|
||||
"files.defaultWorkspace": "/workspaces/model",
|
||||
|
|
|
|||
|
|
@ -43,4 +43,17 @@ WORKDIR /workspaces/model
|
|||
|
||||
# 6) Make Python find your package
|
||||
# Add project root to PYTHONPATH for all processes
|
||||
ENV PYTHONPATH=/workspaces/model:${PYTHONPATH}
|
||||
ENV PYTHONPATH=/workspaces/model:${PYTHONPATH}
|
||||
|
||||
|
||||
# Install terraform
|
||||
RUN apt-get update && sudo apt-get install -y gnupg software-properties-common
|
||||
RUN wget -O- https://apt.releases.hashicorp.com/gpg | \
|
||||
gpg --dearmor | \
|
||||
sudo tee /usr/share/keyrings/hashicorp-archive-keyring.gpg > /dev/null
|
||||
RUN echo "deb [signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] \
|
||||
https://apt.releases.hashicorp.com $(lsb_release -cs) main" | \
|
||||
tee /etc/apt/sources.list.d/hashicorp.list
|
||||
RUN apt update
|
||||
RUN apt-get install terraform
|
||||
RUN terraform -install-autocomplete
|
||||
|
|
@ -6,7 +6,7 @@
|
|||
"workspaceFolder": "/workspaces/model",
|
||||
"postStartCommand": "bash .devcontainer/backend/post-install.sh",
|
||||
"mounts": [
|
||||
"source=${localEnv:HOME},target=/workspaces/home,type=bind"
|
||||
"source=${localEnv:HOME},target=/home/vscode,type=bind"
|
||||
],
|
||||
"customizations": {
|
||||
"vscode": {
|
||||
|
|
@ -22,7 +22,11 @@
|
|||
"corentinartaud.pdfpreview",
|
||||
"ms-python.vscode-python-envs",
|
||||
"ms-python.black-formatter",
|
||||
"waderyan.gitblame"
|
||||
"waderyan.gitblame",
|
||||
"GrapeCity.gc-excelviewer",
|
||||
"jakobhoeg.vscode-pokemon",
|
||||
"github.vscode-github-actions",
|
||||
"me-dutour-mathieu.vscode-github-actions"
|
||||
],
|
||||
"settings": {
|
||||
"files.defaultWorkspace": "/workspaces/model",
|
||||
|
|
@ -38,3 +42,4 @@
|
|||
"PYTHONFLAGS": "-Xfrozen_modules=off"
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -9,7 +9,7 @@ mangum==0.19.0
|
|||
# AWS
|
||||
boto3==1.35.44
|
||||
# Data
|
||||
openpyxl==3.1.2
|
||||
openpyxl==3.1.5
|
||||
# Basic
|
||||
pytz
|
||||
uvicorn[standard]
|
||||
|
|
|
|||
14
.github/workflows/_build_image.yml
vendored
14
.github/workflows/_build_image.yml
vendored
|
|
@ -38,6 +38,8 @@ on:
|
|||
required: false
|
||||
DEV_DB_NAME:
|
||||
required: false
|
||||
EPC_AUTH_TOKEN:
|
||||
required: false
|
||||
|
||||
jobs:
|
||||
build:
|
||||
|
|
@ -47,6 +49,7 @@ jobs:
|
|||
DEV_DB_HOST: ${{ secrets.DEV_DB_HOST }}
|
||||
DEV_DB_PORT: ${{ secrets.DEV_DB_PORT }}
|
||||
DEV_DB_NAME: ${{ secrets.DEV_DB_NAME }}
|
||||
EPC_AUTH_TOKEN: ${{ secrets.EPC_AUTH_TOKEN }}
|
||||
|
||||
outputs:
|
||||
image_digest: ${{ steps.digest.outputs.image_digest }}
|
||||
|
|
@ -87,14 +90,17 @@ jobs:
|
|||
temp=$(eval echo "$line")
|
||||
BUILD_ARGS="$BUILD_ARGS --build-arg $temp"
|
||||
done <<< "${{ inputs.build_args }}"
|
||||
|
||||
docker build \
|
||||
|
||||
docker buildx build \
|
||||
--no-cache \
|
||||
--platform linux/amd64 \
|
||||
--provenance=false \
|
||||
--sbom=false \
|
||||
--push \
|
||||
-f ${{ inputs.dockerfile_path }} \
|
||||
$BUILD_ARGS \
|
||||
-t $IMAGE_URI \
|
||||
${{ inputs.build_context }}
|
||||
|
||||
docker push $IMAGE_URI
|
||||
|
||||
- name: Resolve image digest
|
||||
id: digest
|
||||
|
|
|
|||
8
.github/workflows/_deploy_lambda.yml
vendored
8
.github/workflows/_deploy_lambda.yml
vendored
|
|
@ -106,4 +106,10 @@ jobs:
|
|||
- name: Terraform Destroy
|
||||
if: inputs.terraform_destroy == 'true' && inputs.terraform_apply != 'true'
|
||||
working-directory: ${{ inputs.lambda_path }}
|
||||
run: terraform destroy -auto-approve
|
||||
run: |
|
||||
terraform destroy -auto-approve \
|
||||
-var="stage=${{ inputs.stage }}" \
|
||||
-var="lambda_name=${{ inputs.lambda_name }}" \
|
||||
-var="ecr_repo_url=${{ steps.repo.outputs.ecr_repo_url }}" \
|
||||
-var="image_digest=${{ inputs.image_digest }}"
|
||||
|
||||
|
|
|
|||
1
.github/workflows/deploy_fastapi_backend.yml
vendored
1
.github/workflows/deploy_fastapi_backend.yml
vendored
|
|
@ -141,3 +141,4 @@ jobs:
|
|||
|
||||
# Deploy to AWS Lambda via Serverless
|
||||
sls deploy --stage ${{ github.ref_name }} --verbose
|
||||
|
||||
|
|
|
|||
19
.github/workflows/deploy_terraform.yml
vendored
19
.github/workflows/deploy_terraform.yml
vendored
|
|
@ -9,6 +9,7 @@ on:
|
|||
- '.github/workflows/deploy_terraform.yml'
|
||||
- '.github/workflows/_build_image.yml'
|
||||
- '.github/workflows/_deploy_lambda.yml'
|
||||
workflow_dispatch:
|
||||
|
||||
jobs:
|
||||
determine_stage:
|
||||
|
|
@ -76,10 +77,10 @@ jobs:
|
|||
run: terraform plan -var-file=${STAGE}.tfvars -out=tfplan
|
||||
|
||||
- name: Terraform Apply
|
||||
if: env.STAGE == 'prod'
|
||||
if: env.TERRAFORM_APPLY == 'true'
|
||||
working-directory: infrastructure/terraform/shared
|
||||
run: terraform apply -auto-approve tfplan
|
||||
|
||||
|
||||
# ============================================================
|
||||
# 2️⃣ Build Address 2 UPRN image and Push
|
||||
# ============================================================
|
||||
|
|
@ -90,10 +91,19 @@ jobs:
|
|||
ecr_repo: address2uprn-${{ needs.determine_stage.outputs.stage }}
|
||||
dockerfile_path: backend/address2UPRN/handler/Dockerfile
|
||||
build_context: .
|
||||
build_args: |
|
||||
DEV_DB_HOST=$DEV_DB_HOST
|
||||
DEV_DB_PORT=$DEV_DB_PORT
|
||||
DEV_DB_NAME=$DEV_DB_NAME
|
||||
EPC_AUTH_TOKEN=$EPC_AUTH_TOKEN
|
||||
secrets:
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
|
||||
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
|
||||
DEV_DB_HOST: ${{ secrets.DEV_DB_HOST }}
|
||||
DEV_DB_PORT: ${{ secrets.DEV_DB_PORT }}
|
||||
DEV_DB_NAME: ${{ secrets.DEV_DB_NAME }}
|
||||
EPC_AUTH_TOKEN: ${{ secrets.DEV_EPC_AUTH_TOKEN }}
|
||||
|
||||
# ============================================================
|
||||
# 3️⃣ Deploy Address 2 UPRN Lambda
|
||||
|
|
@ -140,7 +150,7 @@ jobs:
|
|||
# 3️⃣ Deploy Postcode Splitter Lambda
|
||||
# ============================================================
|
||||
postcodeSplitter_lambda:
|
||||
needs: [postcodeSplitter_image, determine_stage]
|
||||
needs: [postcodeSplitter_image, determine_stage, address2uprn_lambda]
|
||||
uses: ./.github/workflows/_deploy_lambda.yml
|
||||
with:
|
||||
lambda_name: postcodeSplitter
|
||||
|
|
@ -192,4 +202,5 @@ jobs:
|
|||
secrets:
|
||||
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
|
||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
|
||||
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
|
||||
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
|
||||
|
||||
|
|
|
|||
10
.vscode/settings.json
vendored
10
.vscode/settings.json
vendored
|
|
@ -9,12 +9,14 @@
|
|||
"path": "/bin/bash"
|
||||
}
|
||||
},
|
||||
<<<<<<< HEAD
|
||||
=======
|
||||
"python.testing.unittestEnabled": false,
|
||||
"python.testing.pytestEnabled": true,
|
||||
"python.testing.pytestArgs": ["-s", "-q", "--no-cov"]
|
||||
>>>>>>> 11b482838efcf46f376fd3ecbf2c1bb0be6d097d
|
||||
"python.testing.pytestArgs": ["-s", "-q", "--no-cov"],
|
||||
|
||||
"python.languageServer": "Pylance",
|
||||
"python.analysis.typeCheckingMode": "strict",
|
||||
"python.analysis.autoSearchPaths": true,
|
||||
"python.analysis.extraPaths": ["./src"]
|
||||
|
||||
// Hot reload setting that needs to be in user settings
|
||||
// "jupyter.runStartupCommands": [
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@ from etl.epc_clean.epc_attributes.WallAttributes import WallAttributes
|
|||
logger = setup_logger()
|
||||
|
||||
# OpenAI API Key (set this in your environment variables for security)
|
||||
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "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")
|
||||
OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY")
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -13,11 +13,15 @@ from asset_list.utils import get_data
|
|||
from dotenv import load_dotenv
|
||||
from backend.SearchEpc import SearchEpc
|
||||
|
||||
load_dotenv(dotenv_path="backend/.env")
|
||||
load_dotenv(dotenv_path="../backend/.env")
|
||||
EPC_AUTH_TOKEN = os.getenv(
|
||||
"EPC_AUTH_TOKEN",
|
||||
)
|
||||
|
||||
OPENAI_API_KEY = os.getenv(
|
||||
"OPENAI_API_KEY",
|
||||
)
|
||||
|
||||
|
||||
def extract_address1(
|
||||
asset_list, full_address_col, postcode_col, method="first_two_words"
|
||||
|
|
@ -109,21 +113,21 @@ def app():
|
|||
)
|
||||
data_filename = "to_standardise_uprns.xlsx"
|
||||
sheet_name = "Sheet1"
|
||||
postcode_column = "Postcode"
|
||||
postcode_column = "POSTCODE"
|
||||
address1_column = None
|
||||
address1_method = "house_number_extraction"
|
||||
fulladdress_column = "Address"
|
||||
address_cols_to_concat = None
|
||||
fulladdress_column = "ADDRESS"
|
||||
address_cols_to_concat = []
|
||||
missing_postcodes_method = None
|
||||
landlord_year_built = None
|
||||
landlord_os_uprn = None
|
||||
landlord_property_type = None
|
||||
landlord_built_form = None
|
||||
landlord_wall_construction = None
|
||||
landlord_roof_construction = None
|
||||
landlord_heating_system = None
|
||||
landlord_property_type = "PROPERTY TYPE"
|
||||
landlord_built_form = None # Skipped as empty
|
||||
landlord_wall_construction = "wall combined" # combin F + G
|
||||
landlord_roof_construction = "HEATING SYSTEM" # Combine I + J
|
||||
landlord_heating_system = None # Check with Khalim
|
||||
landlord_existing_pv = None
|
||||
landlord_property_id = "LLUPRN"
|
||||
landlord_property_id = "UPRN"
|
||||
landlord_sap = None
|
||||
outcomes_filename = None
|
||||
outcomes_sheetname = None
|
||||
|
|
@ -275,7 +279,7 @@ def app():
|
|||
if skip is not None and not force_retrieve_data:
|
||||
if i <= skip:
|
||||
continue
|
||||
chunk = asset_list.standardised_asset_list[i: i + chunk_size]
|
||||
chunk = asset_list.standardised_asset_list[i : i + chunk_size]
|
||||
epc_data_chunk, errors_chunk, no_epc_chunk = get_data(
|
||||
df=chunk,
|
||||
row_id_name=asset_list.DOMNA_PROPERTY_ID,
|
||||
|
|
@ -418,7 +422,7 @@ def app():
|
|||
# Retrieve just the data we need
|
||||
epc_df = epc_df[
|
||||
[asset_list.DOMNA_PROPERTY_ID] + list(asset_list.EPC_API_DATA_NAMES.keys())
|
||||
].rename(columns=asset_list.EPC_API_DATA_NAMES)
|
||||
].rename(columns=asset_list.EPC_API_DATA_NAMES)
|
||||
|
||||
# Look for columns not in the find my EPC data, which will have happened if we didn't
|
||||
# retrieve it in the first place
|
||||
|
|
@ -435,7 +439,7 @@ def app():
|
|||
find_my_epc_data[
|
||||
[asset_list.DOMNA_PROPERTY_ID, "epc_has_floor_recommendation"]
|
||||
+ list(asset_list.FIND_EPC_DATA_NAMES.keys())
|
||||
].rename(columns=asset_list.FIND_EPC_DATA_NAMES),
|
||||
].rename(columns=asset_list.FIND_EPC_DATA_NAMES),
|
||||
how="left",
|
||||
on=asset_list.DOMNA_PROPERTY_ID,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -19,4 +19,4 @@ PLAN_TRIGGER_BUCKET=test
|
|||
DATA_BUCKET=test
|
||||
EPC_AUTH_TOKEN=test
|
||||
ENGINE_SQS_URL=test
|
||||
ENERGY_ASSESSMENTS_BUCKET=test
|
||||
ENERGY_ASSESSMENTS_BUCKET=test
|
||||
|
|
|
|||
|
|
@ -8,7 +8,11 @@ from utils.s3 import read_from_s3, save_excel_to_s3
|
|||
from backend.app.utils import sap_to_epc
|
||||
from backend.app.db.connection import db_engine
|
||||
from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
|
||||
from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations
|
||||
from backend.app.db.models.recommendations import (
|
||||
Recommendation,
|
||||
PlanModel,
|
||||
PlanRecommendations,
|
||||
)
|
||||
|
||||
|
||||
class Outputs:
|
||||
|
|
@ -42,7 +46,7 @@ class Outputs:
|
|||
"flat_roof_insulation": "Flat roof (Out of scope - prov sum only)",
|
||||
"room_in_roof_insulation": "RIR (POA - Prov sum only)",
|
||||
"ev_charging": "EV Charging",
|
||||
"battery": "Battery"
|
||||
"battery": "Battery",
|
||||
}
|
||||
|
||||
def __init__(self, format, portfolio_id):
|
||||
|
|
@ -67,28 +71,38 @@ class Outputs:
|
|||
# Download cleaned data
|
||||
self.cleaned_epc_lookup = read_from_s3(
|
||||
s3_file_name="cleaned_epc_data/cleaned.bson",
|
||||
bucket_name="retrofit-data-dev"
|
||||
bucket_name="retrofit-data-dev",
|
||||
)
|
||||
|
||||
self.cleaned_epc_lookup = msgpack.unpackb(self.cleaned_epc_lookup, raw=False)
|
||||
|
||||
def get_properties_from_db(self):
|
||||
# Get properties and their details for a specific portfolio
|
||||
properties_query = self.session.query(
|
||||
PropertyModel,
|
||||
PropertyDetailsEpcModel
|
||||
).join(
|
||||
PropertyDetailsEpcModel,
|
||||
PropertyModel.id == PropertyDetailsEpcModel.property_id
|
||||
).filter(
|
||||
PropertyModel.portfolio_id == self.portfolio_id # Filter by portfolio ID
|
||||
).all()
|
||||
properties_query = (
|
||||
self.session.query(PropertyModel, PropertyDetailsEpcModel)
|
||||
.join(
|
||||
PropertyDetailsEpcModel,
|
||||
PropertyModel.id == PropertyDetailsEpcModel.property_id,
|
||||
)
|
||||
.filter(
|
||||
PropertyModel.portfolio_id
|
||||
== self.portfolio_id # Filter by portfolio ID
|
||||
)
|
||||
.all()
|
||||
)
|
||||
|
||||
# Transform properties data to include all fields dynamically
|
||||
properties_data = [
|
||||
{**{col.name: getattr(prop.PropertyModel, col.name) for col in PropertyModel.__table__.columns},
|
||||
**{col.name: getattr(prop.PropertyDetailsEpcModel, col.name) for col in
|
||||
PropertyDetailsEpcModel.__table__.columns}}
|
||||
{
|
||||
**{
|
||||
col.name: getattr(prop.PropertyModel, col.name)
|
||||
for col in PropertyModel.__table__.columns
|
||||
},
|
||||
**{
|
||||
col.name: getattr(prop.PropertyDetailsEpcModel, col.name)
|
||||
for col in PropertyDetailsEpcModel.__table__.columns
|
||||
},
|
||||
}
|
||||
for prop in properties_query
|
||||
]
|
||||
|
||||
|
|
@ -96,10 +110,14 @@ class Outputs:
|
|||
|
||||
def get_plans_from_db(self):
|
||||
|
||||
plans_query = self.session.query(Plan).filter(Plan.portfolio_id == self.portfolio_id).all()
|
||||
plans_query = (
|
||||
self.session.query(PlanModel)
|
||||
.filter(PlanModel.portfolio_id == self.portfolio_id)
|
||||
.all()
|
||||
)
|
||||
# Transform plans data to include all fields dynamically
|
||||
plans_data = [
|
||||
{col.name: getattr(plan, col.name) for col in Plan.__table__.columns}
|
||||
{col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns}
|
||||
for plan in plans_query
|
||||
]
|
||||
|
||||
|
|
@ -107,28 +125,38 @@ class Outputs:
|
|||
|
||||
def get_recommendations_from_db(self, plan_ids):
|
||||
# Get recommendations through PlanRecommendations for those plans and that are default
|
||||
recommendations_query = self.session.query(
|
||||
Recommendation,
|
||||
Plan.scenario_id
|
||||
).join(
|
||||
PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id
|
||||
).join(
|
||||
Plan, Plan.id == PlanRecommendations.plan_id # Join with Plan to access scenario_id
|
||||
).filter(
|
||||
PlanRecommendations.plan_id.in_(plan_ids),
|
||||
Recommendation.default == True # Filtering for default recommendations
|
||||
).all()
|
||||
recommendations_query = (
|
||||
self.session.query(Recommendation, PlanModel.scenario_id)
|
||||
.join(
|
||||
PlanRecommendations,
|
||||
Recommendation.id == PlanRecommendations.recommendation_id,
|
||||
)
|
||||
.join(
|
||||
PlanModel,
|
||||
PlanModel.id
|
||||
== PlanRecommendations.plan_id, # Join with Plan to access scenario_id
|
||||
)
|
||||
.filter(
|
||||
PlanRecommendations.plan_id.in_(plan_ids),
|
||||
Recommendation.default == True, # Filtering for default recommendations
|
||||
)
|
||||
.all()
|
||||
)
|
||||
|
||||
# Transform recommendations data to include all fields dynamically and include scenario_id
|
||||
recommendations_data = [
|
||||
{
|
||||
**{
|
||||
col.name: getattr(rec.Recommendation, col.name) if
|
||||
hasattr(rec, 'Recommendation') else getattr(rec, col.name)
|
||||
col.name: (
|
||||
getattr(rec.Recommendation, col.name)
|
||||
if hasattr(rec, "Recommendation")
|
||||
else getattr(rec, col.name)
|
||||
)
|
||||
for col in Recommendation.__table__.columns
|
||||
},
|
||||
"Scenario ID": rec.scenario_id
|
||||
} for rec in recommendations_query
|
||||
"Scenario ID": rec.scenario_id,
|
||||
}
|
||||
for rec in recommendations_query
|
||||
]
|
||||
|
||||
return recommendations_data
|
||||
|
|
@ -148,7 +176,9 @@ class Outputs:
|
|||
measure_label = self.MDS_MEASURE_MAPPING.get(measure_type, None)
|
||||
|
||||
# If the property_id already exists in the collected rows, update it
|
||||
existing_row = next((item for item in rows if item["property_id"] == property_id), None)
|
||||
existing_row = next(
|
||||
(item for item in rows if item["property_id"] == property_id), None
|
||||
)
|
||||
if existing_row is None:
|
||||
# Create a new row if the property_id doesn't exist
|
||||
new_row = {measure: None for measure in all_measures}
|
||||
|
|
@ -196,7 +226,7 @@ class Outputs:
|
|||
properties_data = self.get_properties_from_db()
|
||||
|
||||
plans_data = self.get_plans_from_db()
|
||||
plan_ids = [plan['id'] for plan in plans_data]
|
||||
plan_ids = [plan["id"] for plan in plans_data]
|
||||
|
||||
recommendations_data = self.get_recommendations_from_db(plan_ids)
|
||||
self.session.close()
|
||||
|
|
@ -209,50 +239,54 @@ class Outputs:
|
|||
scenario_ids = plans_df["scenario_id"].unique()
|
||||
|
||||
# We start to create the MDS sheet
|
||||
mds = properties_df[
|
||||
[
|
||||
"property_id",
|
||||
"address",
|
||||
"postcode",
|
||||
"uprn",
|
||||
"current_epc_rating",
|
||||
"current_sap_points",
|
||||
"primary_energy_consumption",
|
||||
"property_type",
|
||||
"built_form",
|
||||
"total_floor_area",
|
||||
"walls",
|
||||
"tenure",
|
||||
"mainfuel",
|
||||
# The bills columns are split out - we include them and aggregate, without appliances
|
||||
"heating_cost_current",
|
||||
"hot_water_cost_current",
|
||||
"lighting_cost_current",
|
||||
"gas_standing_charge",
|
||||
"electricity_standing_charge"
|
||||
mds = (
|
||||
properties_df[
|
||||
[
|
||||
"property_id",
|
||||
"address",
|
||||
"postcode",
|
||||
"uprn",
|
||||
"current_epc_rating",
|
||||
"current_sap_points",
|
||||
"primary_energy_consumption",
|
||||
"property_type",
|
||||
"built_form",
|
||||
"total_floor_area",
|
||||
"walls",
|
||||
"tenure",
|
||||
"mainfuel",
|
||||
# The bills columns are split out - we include them and aggregate, without appliances
|
||||
"heating_cost_current",
|
||||
"hot_water_cost_current",
|
||||
"lighting_cost_current",
|
||||
"gas_standing_charge",
|
||||
"electricity_standing_charge",
|
||||
]
|
||||
]
|
||||
].copy().rename(
|
||||
columns={
|
||||
"address": "Address",
|
||||
"postcode": "Postcode",
|
||||
"uprn": "UPRN",
|
||||
"current_epc_rating": "Pre EPC",
|
||||
"current_sap_points": "EPC Source",
|
||||
"primary_energy_consumption": "Existing Heating Demand Kwh/m2/y",
|
||||
"property_type": "Property Type",
|
||||
"built_form": "Built Form",
|
||||
"total_floor_area": "Floor area m2 (If known)",
|
||||
"walls": "Wall Type (Mandatory field)",
|
||||
"tenure": "Tenure",
|
||||
}
|
||||
.copy()
|
||||
.rename(
|
||||
columns={
|
||||
"address": "Address",
|
||||
"postcode": "Postcode",
|
||||
"uprn": "UPRN",
|
||||
"current_epc_rating": "Pre EPC",
|
||||
"current_sap_points": "EPC Source",
|
||||
"primary_energy_consumption": "Existing Heating Demand Kwh/m2/y",
|
||||
"property_type": "Property Type",
|
||||
"built_form": "Built Form",
|
||||
"total_floor_area": "Floor area m2 (If known)",
|
||||
"walls": "Wall Type (Mandatory field)",
|
||||
"tenure": "Tenure",
|
||||
}
|
||||
)
|
||||
)
|
||||
|
||||
mds["Estimated bill (£ per year)"] = (
|
||||
mds["heating_cost_current"] +
|
||||
mds["hot_water_cost_current"] +
|
||||
mds["lighting_cost_current"] +
|
||||
mds["gas_standing_charge"] +
|
||||
mds["electricity_standing_charge"]
|
||||
mds["heating_cost_current"]
|
||||
+ mds["hot_water_cost_current"]
|
||||
+ mds["lighting_cost_current"]
|
||||
+ mds["gas_standing_charge"]
|
||||
+ mds["electricity_standing_charge"]
|
||||
)
|
||||
|
||||
mds = mds.drop(
|
||||
|
|
@ -261,65 +295,84 @@ class Outputs:
|
|||
"hot_water_cost_current",
|
||||
"lighting_cost_current",
|
||||
"gas_standing_charge",
|
||||
"electricity_standing_charge"
|
||||
"electricity_standing_charge",
|
||||
]
|
||||
)
|
||||
|
||||
# Formatting - Pre EPC is an enum
|
||||
mds["Pre EPC"] = [x.value for x in mds["Pre EPC"].values]
|
||||
mds["Wall Type (Mandatory field)"] = mds["Wall Type (Mandatory field)"].str.split(",").str[0]
|
||||
mds["Wall Type (Mandatory field)"] = (
|
||||
mds["Wall Type (Mandatory field)"].str.split(",").str[0]
|
||||
)
|
||||
# Remove average thermal transmittance field
|
||||
mds["Wall Type (Mandatory field)"] = np.where(
|
||||
mds["Wall Type (Mandatory field)"].str.contains("Average thermal transmittance"),
|
||||
mds["Wall Type (Mandatory field)"].str.contains(
|
||||
"Average thermal transmittance"
|
||||
),
|
||||
"",
|
||||
mds["Wall Type (Mandatory field)"]
|
||||
mds["Wall Type (Mandatory field)"],
|
||||
)
|
||||
|
||||
mds = mds.merge(
|
||||
pd.DataFrame(self.cleaned_epc_lookup["main-fuel"])[["clean_description", "fuel_type"]],
|
||||
pd.DataFrame(self.cleaned_epc_lookup["main-fuel"])[
|
||||
["clean_description", "fuel_type"]
|
||||
],
|
||||
left_on="mainfuel",
|
||||
right_on="clean_description",
|
||||
how="left"
|
||||
how="left",
|
||||
)
|
||||
mds = mds.rename(columns={"fuel_type": "Existing Fuel Type"}).drop(
|
||||
columns=["clean_description", "mainfuel"]
|
||||
)
|
||||
mds = mds.rename(columns={"fuel_type": "Existing Fuel Type"}).drop(columns=["clean_description", "mainfuel"])
|
||||
|
||||
mds["Existing Fuel Type"].value_counts()
|
||||
|
||||
mds_output_by_scenario = {}
|
||||
for scenario_id in scenario_ids:
|
||||
scenario_recommendations = recommendations_df[recommendations_df["Scenario ID"] == scenario_id]
|
||||
scenario_recommendations = recommendations_df[
|
||||
recommendations_df["Scenario ID"] == scenario_id
|
||||
]
|
||||
|
||||
# For each measure, we create the measure matrix
|
||||
scenario_measure_matrix = self.make_mds_measure_matrix(scenario_recommendations)
|
||||
scenario_measure_matrix = self.make_mds_measure_matrix(
|
||||
scenario_recommendations
|
||||
)
|
||||
|
||||
# Calculate the predicted impact on: SAP, heat demand, bills, kwh
|
||||
recommendation_impacts = scenario_recommendations.groupby("property_id")[
|
||||
["sap_points", "heat_demand", "kwh_savings", "energy_cost_savings"]
|
||||
].sum().reset_index()
|
||||
recommendation_impacts = (
|
||||
scenario_recommendations.groupby("property_id")[
|
||||
["sap_points", "heat_demand", "kwh_savings", "energy_cost_savings"]
|
||||
]
|
||||
.sum()
|
||||
.reset_index()
|
||||
)
|
||||
|
||||
scenario_mds = mds.merge(
|
||||
scenario_measure_matrix, how="left", on="property_id"
|
||||
).merge(
|
||||
recommendation_impacts, how="left", on="property_id"
|
||||
)
|
||||
).merge(recommendation_impacts, how="left", on="property_id")
|
||||
# If we have no recommendations, sap_points, kwh_savings, head_demand will be NaN
|
||||
to_clean = [c for c in recommendation_impacts.columns if c != "property_id"]
|
||||
for col in to_clean:
|
||||
scenario_mds[col].fillna(0, inplace=True)
|
||||
scenario_mds.fillna(0, inplace=True)
|
||||
scenario_mds["Post SAP"] = scenario_mds["EPC Source"] + scenario_mds["sap_points"]
|
||||
scenario_mds["Post SAP"] = (
|
||||
scenario_mds["EPC Source"] + scenario_mds["sap_points"]
|
||||
)
|
||||
# Round Post SAP down to the nearest integer
|
||||
scenario_mds["Post SAP"] = scenario_mds["Post SAP"].apply(lambda x: int(x))
|
||||
scenario_mds["Post EPC"] = scenario_mds["Post SAP"].apply(lambda x: sap_to_epc(x))
|
||||
scenario_mds["Post EPC"] = scenario_mds["Post SAP"].apply(
|
||||
lambda x: sap_to_epc(x)
|
||||
)
|
||||
scenario_mds["Heating Demand Kwh/m2/y"] = (
|
||||
scenario_mds["Existing Heating Demand Kwh/m2/y"] - scenario_mds["heat_demand"]
|
||||
scenario_mds["Existing Heating Demand Kwh/m2/y"]
|
||||
- scenario_mds["heat_demand"]
|
||||
)
|
||||
|
||||
scenario_mds = scenario_mds.rename(
|
||||
columns={
|
||||
"sap_points": "Predicted SAP Points",
|
||||
"kwh_savings": "Energy Saving (Kwh)",
|
||||
"energy_cost_savings": "Bill Reduction (£ per yr)"
|
||||
"energy_cost_savings": "Bill Reduction (£ per yr)",
|
||||
}
|
||||
)
|
||||
|
||||
|
|
@ -330,7 +383,7 @@ class Outputs:
|
|||
save_excel_to_s3(
|
||||
df=scenario_mds,
|
||||
file_key=f"engine_outputs/{self.format}/{self.today}_scenario_id={scenario_id}.xlsx",
|
||||
bucket_name="retrofit-data-dev"
|
||||
bucket_name="retrofit-data-dev",
|
||||
)
|
||||
|
||||
def export(self):
|
||||
|
|
|
|||
|
|
@ -1,4 +1,17 @@
|
|||
FROM public.ecr.aws/lambda/python:3.10
|
||||
# FROM python:3.11.10-bullseye
|
||||
|
||||
|
||||
ARG DEV_DB_HOST
|
||||
ARG DEV_DB_PORT
|
||||
ARG DEV_DB_NAME
|
||||
ARG EPC_AUTH_TOKEN
|
||||
|
||||
ENV DB_HOST=${DEV_DB_HOST}
|
||||
ENV DB_PORT=${DEV_DB_PORT}
|
||||
ENV DB_NAME=${DEV_DB_NAME}
|
||||
ENV EPC_AUTH_TOKEN=${EPC_AUTH_TOKEN}
|
||||
|
||||
|
||||
# Set working directory (Lambda task root)
|
||||
WORKDIR /var/task
|
||||
|
|
@ -8,13 +21,17 @@ WORKDIR /var/task
|
|||
# -----------------------------
|
||||
COPY backend/address2UPRN/handler/requirements.txt .
|
||||
|
||||
|
||||
# Install dependencies into Lambda runtime
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# -----------------------------
|
||||
# Copy application code
|
||||
# -----------------------------
|
||||
|
||||
# Copy necessary files for database and utility imports
|
||||
COPY utils/ utils/
|
||||
COPY backend/ backend/
|
||||
COPY datatypes/ datatypes/
|
||||
|
||||
# Copy the handler
|
||||
COPY backend/address2UPRN/main.py .
|
||||
|
||||
# -----------------------------
|
||||
|
|
|
|||
|
|
@ -1,3 +1,11 @@
|
|||
epc-api-python==1.0.2
|
||||
pandas==2.2.2
|
||||
numpy<2.0
|
||||
requests
|
||||
tqdm
|
||||
pandas
|
||||
openpyxl
|
||||
epc-api-python==1.0.2
|
||||
boto3==1.35.44
|
||||
sqlmodel
|
||||
sqlalchemy==2.0.36
|
||||
psycopg2-binary==2.9.10
|
||||
pydantic-settings==2.6.0
|
||||
|
|
@ -3,12 +3,23 @@ import os
|
|||
from urllib.parse import urlencode
|
||||
import pandas as pd
|
||||
from difflib import SequenceMatcher
|
||||
from tqdm import tqdm
|
||||
from utils.logger import setup_logger
|
||||
import re
|
||||
from typing import Set
|
||||
import json
|
||||
import requests
|
||||
from uuid import UUID
|
||||
import uuid
|
||||
from backend.app.db.functions.tasks.Tasks import SubTaskInterface
|
||||
from utils.s3 import (
|
||||
save_csv_to_s3,
|
||||
read_csv_from_s3 as read_csv_from_s3_dict,
|
||||
parse_s3_uri,
|
||||
)
|
||||
from datetime import datetime
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
import re
|
||||
|
||||
EPC_AUTH_TOKEN = os.getenv(
|
||||
"EPC_AUTH_TOKEN",
|
||||
|
|
@ -17,9 +28,28 @@ EPC_AUTH_TOKEN = os.getenv(
|
|||
if EPC_AUTH_TOKEN is None:
|
||||
raise RuntimeError("EPC_AUTH_TOKEN not defined in env")
|
||||
|
||||
import re
|
||||
from difflib import SequenceMatcher
|
||||
from typing import Set
|
||||
|
||||
def is_valid_postcode(postcode_clean: str) -> bool:
|
||||
"""
|
||||
Validate postcode using postcodes.io.
|
||||
|
||||
Expects a sanitised postcode (e.g. E84SQ).
|
||||
Returns True if valid, False otherwise.
|
||||
"""
|
||||
POSTCODES_IO_VALIDATE_URL = "https://api.postcodes.io/postcodes/{postcode}/validate"
|
||||
if not postcode_clean:
|
||||
return False
|
||||
|
||||
try:
|
||||
resp = requests.get(
|
||||
POSTCODES_IO_VALIDATE_URL.format(postcode=postcode_clean),
|
||||
timeout=5,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
return resp.json().get("result", False)
|
||||
except requests.RequestException:
|
||||
# Network issues, rate limits, etc.
|
||||
return False
|
||||
|
||||
|
||||
def levenshtein(a: str, b: str) -> float:
|
||||
|
|
@ -300,27 +330,29 @@ def get_uprn_candidates(
|
|||
)
|
||||
|
||||
|
||||
def get_uprn(user_inputed_address: str, postcode: str, return_address=False):
|
||||
def get_uprn_with_epc_df(
|
||||
user_inputed_address: str,
|
||||
epc_df: pd.DataFrame,
|
||||
verbose: bool = False,
|
||||
):
|
||||
"""
|
||||
Return uprn (str)
|
||||
Return False if failed to find a sensible matching epc
|
||||
Return Nons when epc found but no UPRN
|
||||
Return uprn (str) using a pre-fetched EPC dataframe.
|
||||
This avoids calling the API multiple times for the same postcode.
|
||||
"""
|
||||
df = get_epc_data_with_postcode(postcode=postcode)
|
||||
|
||||
if df.empty:
|
||||
if epc_df.empty:
|
||||
return None
|
||||
|
||||
scored_df = get_uprn_candidates(
|
||||
df,
|
||||
epc_df,
|
||||
user_address=user_inputed_address,
|
||||
)
|
||||
|
||||
# Best score
|
||||
best_score = scored_df.iloc[0]["lexiscore"]
|
||||
|
||||
if best_score <= 0:
|
||||
return None
|
||||
# # Return None if score is below threshold
|
||||
# if best_score < 0.7:
|
||||
# return None
|
||||
|
||||
# All rank-1 rows (possible draw)
|
||||
top_rank_df = scored_df[scored_df["lexirank"] == 1]
|
||||
|
|
@ -330,18 +362,41 @@ def get_uprn(user_inputed_address: str, postcode: str, return_address=False):
|
|||
return None
|
||||
|
||||
address = top_rank_df["address"].values[0]
|
||||
lexiscore = float(top_rank_df["lexiscore"].values[0])
|
||||
score = float(top_rank_df["lexiscore"].values[0])
|
||||
|
||||
logger.info(f"Address found to be: {address}, with lexiscore {lexiscore}")
|
||||
logger.info(f"Address found to be: {address}, with lexiscore {score}")
|
||||
# Safe to return the agreed UPRN
|
||||
found_uprn = top_rank_df.iloc[0]["uprn"]
|
||||
|
||||
if found_uprn == "":
|
||||
return None
|
||||
|
||||
if return_address:
|
||||
return found_uprn, address
|
||||
return found_uprn
|
||||
if verbose:
|
||||
return (found_uprn, address, score)
|
||||
else:
|
||||
return found_uprn
|
||||
|
||||
|
||||
def get_uprn(
|
||||
user_inputed_address: str,
|
||||
postcode: str,
|
||||
verbose: bool = False,
|
||||
):
|
||||
"""
|
||||
Return uprn (str)
|
||||
Return False if failed to find a sensible matching epc
|
||||
Return None when epc found but no UPRN
|
||||
|
||||
This function fetches EPC data via API for a single postcode.
|
||||
For processing multiple addresses in the same postcode, use get_uprn_with_epc_df instead.
|
||||
"""
|
||||
df = get_epc_data_with_postcode(postcode=postcode)
|
||||
|
||||
return get_uprn_with_epc_df(
|
||||
user_inputed_address=user_inputed_address,
|
||||
epc_df=df,
|
||||
verbose=verbose,
|
||||
)
|
||||
|
||||
|
||||
def resolve_uprns_for_postcode_group(
|
||||
|
|
@ -424,148 +479,302 @@ def resolve_uprns_for_postcode_group(
|
|||
)
|
||||
|
||||
|
||||
def test(a, b):
|
||||
assert a == b, f"erorr: {a}{type(a)} != {b}: {type(b)}"
|
||||
def save_results_to_s3(
|
||||
results_df: pd.DataFrame, task_id: str, sub_task_id: str, bucket_name: str = None
|
||||
) -> bool:
|
||||
"""
|
||||
Save results DataFrame to S3 as CSV.
|
||||
|
||||
:param results_df: The DataFrame containing results
|
||||
:param task_id: The task ID (used for file naming)
|
||||
:param bucket_name: The S3 bucket name (defaults to env variable)
|
||||
:return: True if successful, False otherwise
|
||||
"""
|
||||
if bucket_name is None:
|
||||
bucket_name = os.getenv("S3_BUCKET_NAME")
|
||||
|
||||
if not bucket_name:
|
||||
logger.error(
|
||||
"S3 bucket name not provided and S3_BUCKET_NAME environment variable not set"
|
||||
)
|
||||
return False
|
||||
|
||||
try:
|
||||
# Create a filename with the task ID
|
||||
file_name = f"{datetime.now().isoformat()}_{str(uuid.uuid4())[:8]}"
|
||||
file_key = f"ara_raw_outputs/{task_id}/{sub_task_id}/{file_name}.csv"
|
||||
|
||||
# Save to S3
|
||||
success = save_csv_to_s3(results_df, bucket_name, file_key)
|
||||
|
||||
if success:
|
||||
logger.info(f"Successfully saved results to s3://{bucket_name}/{file_key}")
|
||||
return True
|
||||
else:
|
||||
logger.error(f"Failed to save results to S3")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error saving results to S3: {str(e)}")
|
||||
return False
|
||||
|
||||
|
||||
def run_all_test():
|
||||
# Basic usage with different post codes styles
|
||||
test(get_epc_data_with_postcode("b93 8sy").shape[0], 63)
|
||||
test(get_epc_data_with_postcode("B938sy").shape[0], 63)
|
||||
test(get_epc_data_with_postcode("b93 8Sy").shape[0], 63)
|
||||
test(get_epc_data_with_postcode("b93 8Sy").shape[0], 63)
|
||||
def handler(event, context, local=False):
|
||||
print("=== Address2UPRN Lambda Handler ===")
|
||||
print(f"Function: {context.function_name}")
|
||||
print(f"Request ID: {context.aws_request_id}")
|
||||
|
||||
test(get_uprn("68", "b93 8sy"), "100070989938")
|
||||
test(get_uprn("68 Glendon Way", "b93 8sy"), "100070989938")
|
||||
test(get_uprn("Flat A, 28, Nelgarde Road", "se6 4tf"), "100023278633")
|
||||
test(get_uprn("28 A", "se6 4tf"), "100023278633")
|
||||
test(get_uprn("28A", "se6 4tf"), "100023278633")
|
||||
test(get_uprn("6 Aitken Close", "E8 4SQ"), False)
|
||||
# Handle local testing
|
||||
if local is True:
|
||||
event = {
|
||||
"Records": [
|
||||
{
|
||||
"body": json.dumps(
|
||||
{
|
||||
"task_id": "e31f2f21-175b-4a91-a3ec-a6baa325e917",
|
||||
"sub_task_id": "6a427b6e-1ece-4983-b1e5-9bffccc53d1d",
|
||||
"s3_uri": "s3://retrofit-data-dev/ara_postcode_splitter_batches/e31f2f21-175b-4a91-a3ec-a6baa325e917/8673913b-1a88-42d7-8578-0449123d94b0/2026-02-16T12:00:20.257856_7b520c0e.csv",
|
||||
}
|
||||
)
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
# unique case
|
||||
test(get_uprn("Flat 5, 1, Semley Gate", "e9 5nh"), "10008238198")
|
||||
test(get_uprn("5 , 1 Semley Gate", "e9 5nh"), "10008238198")
|
||||
test(get_uprn("5 Semley Gate", "e9 5nh"), "10008238198")
|
||||
test(get_uprn("1, 5 Semley Gate", "e9 5nh"), False)
|
||||
test(
|
||||
get_uprn("1 Semley Gate", "e9 5nh"), "10008238188"
|
||||
) # this one return "flat 1, in 1 semley gate"
|
||||
test(
|
||||
get_uprn("48 Oswald Street", "E5 0BT"), False
|
||||
) # this one return "flat 1, in 1 semley gate"
|
||||
test(
|
||||
get_uprn("42 Oswald Street", "E5 0BT"), False
|
||||
) # this one return "flat 1, in 1 semley gate"
|
||||
test(
|
||||
get_uprn("46 Oswald Street", "E5 0BT"), False
|
||||
) # this one return "flat 1, in 1 semley gate"
|
||||
get_uprn_candidates(get_epc_data_with_postcode("e5 0bt"), "48 Oswald Street")
|
||||
get_uprn_candidates(
|
||||
get_epc_data_with_postcode("Cr2 7dl"),
|
||||
"FLAT 3; 42 MORETON ROAD, SOUTH CROYDON, SURREY",
|
||||
)
|
||||
print(f"Event: {json.dumps(event, indent=2, default=str)}")
|
||||
print("===================================")
|
||||
|
||||
# Handle both single event and batch events (SQS, etc.)
|
||||
records = event.get("Records", [event])
|
||||
results = []
|
||||
errors = []
|
||||
subtask_interface = SubTaskInterface()
|
||||
|
||||
if __name__ == "__main__":
|
||||
INPUT_FILE = "hackney.xlsx"
|
||||
|
||||
ADDRESS_COL = "Address 1"
|
||||
POSTCODE_COL = "Postcode"
|
||||
UPRN_COL = "UPRN"
|
||||
|
||||
df = pd.read_excel(INPUT_FILE)
|
||||
|
||||
failures = []
|
||||
|
||||
for _, row in tqdm(
|
||||
df.iterrows(),
|
||||
total=len(df),
|
||||
desc="Auditing UPRNs",
|
||||
):
|
||||
input_address = str(row[ADDRESS_COL]).strip()
|
||||
postcode = str(row[POSTCODE_COL]).strip()
|
||||
|
||||
expected_uprn = None if pd.isna(row[UPRN_COL]) else str(int(row[UPRN_COL]))
|
||||
|
||||
for record in records:
|
||||
task_id = None
|
||||
subtask_id = None
|
||||
try:
|
||||
epc_df = get_epc_data_with_postcode(postcode)
|
||||
# Parse body (inputs)
|
||||
if isinstance(record.get("body"), str):
|
||||
body = json.loads(record["body"])
|
||||
else:
|
||||
body = record.get("body", {})
|
||||
|
||||
if epc_df.empty:
|
||||
failures.append(
|
||||
{
|
||||
**row.to_dict(),
|
||||
"found_uprn": None,
|
||||
"best_match_uprn": None,
|
||||
"best_match_address": None,
|
||||
"best_match_lexiscore": None,
|
||||
"status": "no_epc_results",
|
||||
}
|
||||
# Validate required fields
|
||||
task_id = body.get("task_id")
|
||||
subtask_id = body.get("sub_task_id")
|
||||
s3_uri = body.get("s3_uri")
|
||||
|
||||
if not task_id:
|
||||
errors.append({"error": "Missing required field: task_id"})
|
||||
continue
|
||||
|
||||
if not subtask_id:
|
||||
errors.append({"error": "Missing required field: sub_task_id"})
|
||||
continue
|
||||
|
||||
if not s3_uri:
|
||||
errors.append({"error": "Missing required field: s3_uri"})
|
||||
continue
|
||||
|
||||
# Convert task_id to UUID
|
||||
try:
|
||||
task_id = UUID(task_id) if isinstance(task_id, str) else task_id
|
||||
except ValueError as e:
|
||||
errors.append({"error": f"Invalid UUID format for task_id: {str(e)}"})
|
||||
continue
|
||||
|
||||
# Convert sub_task_id to UUID
|
||||
try:
|
||||
subtask_id = (
|
||||
UUID(subtask_id) if isinstance(subtask_id, str) else subtask_id
|
||||
)
|
||||
except ValueError as e:
|
||||
errors.append(
|
||||
{"error": f"Invalid UUID format for sub_task_id: {str(e)}"}
|
||||
)
|
||||
continue
|
||||
|
||||
scored_df = get_uprn_candidates(
|
||||
epc_df,
|
||||
user_address=input_address,
|
||||
)
|
||||
# Update existing subtask to 'in progress'
|
||||
subtask_interface.update_subtask_status(subtask_id, "in progress")
|
||||
logger.info(f"Processing subtask {subtask_id} for task {task_id}")
|
||||
|
||||
best_row = scored_df.iloc[0]
|
||||
# Parse S3 URI and read CSV from S3
|
||||
logger.info(f"Reading data from S3: {s3_uri}")
|
||||
try:
|
||||
bucket, key = parse_s3_uri(s3_uri)
|
||||
csv_data = read_csv_from_s3_dict(bucket, key)
|
||||
df = pd.DataFrame(csv_data)
|
||||
logger.info(f"Loaded {len(df)} rows from S3")
|
||||
except Exception as s3_error:
|
||||
logger.error(f"Failed to read data from S3: {s3_error}")
|
||||
errors.append(
|
||||
{"error": "Failed to read data from S3", "details": str(s3_error)}
|
||||
)
|
||||
try:
|
||||
subtask_interface.update_subtask_status(
|
||||
subtask_id, "failed", outputs={"error": str(s3_error)}
|
||||
)
|
||||
except Exception as db_error:
|
||||
logger.error(f"Failed to update subtask status: {db_error}")
|
||||
continue
|
||||
|
||||
best_match_uprn = str(best_row["uprn"])
|
||||
best_match_address = best_row["address"]
|
||||
best_match_lexiscore = round(float(best_row["lexiscore"]), 4)
|
||||
# Process the rows
|
||||
logger.info(f"Processing {len(df)} rows for task {task_id}")
|
||||
|
||||
found_uprn = get_uprn(input_address, postcode)
|
||||
# Create user_input column by concatenating Address columns if not already present
|
||||
if "user_input" not in df.columns:
|
||||
df["user_input"] = (
|
||||
df["Address 1"].fillna("")
|
||||
+ " "
|
||||
+ df["Address 2"].fillna("")
|
||||
+ " "
|
||||
+ df["Address 3"].fillna("")
|
||||
).str.strip()
|
||||
logger.info(f"Created user_input column from Address 1 and Address 2")
|
||||
else:
|
||||
logger.info(f"user_input column already present in data")
|
||||
|
||||
clean_df = df.dropna(subset=["postcode_clean"])
|
||||
|
||||
postcode_to_addresses = {
|
||||
postcode: group.to_dict(orient="records")
|
||||
for postcode, group in clean_df.groupby("postcode_clean", sort=False)
|
||||
}
|
||||
|
||||
logger.info(f"Total postcodes: {len(postcode_to_addresses)}")
|
||||
|
||||
# Process each postcode group
|
||||
|
||||
results_data = []
|
||||
|
||||
for postcode, postcode_rows in postcode_to_addresses.items():
|
||||
logger.info(
|
||||
f"Processing postcode: {postcode} with {len(postcode_rows)} rows"
|
||||
)
|
||||
|
||||
# Validate postcode before processing
|
||||
if not is_valid_postcode(postcode):
|
||||
logger.warning(f"Postcode {postcode} is invalid, skipping")
|
||||
continue
|
||||
|
||||
# Fetch EPC data once per postcode
|
||||
try:
|
||||
epc_df = get_epc_data_with_postcode(postcode=postcode)
|
||||
logger.info(
|
||||
f"Fetched {len(epc_df)} EPC records for postcode {postcode}"
|
||||
)
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Failed to fetch EPC data for postcode {postcode}: {e}"
|
||||
)
|
||||
continue
|
||||
|
||||
# Process each address in this postcode with the same EPC data
|
||||
for row in postcode_rows:
|
||||
try:
|
||||
user_input = row.get("user_input", "")
|
||||
if not user_input:
|
||||
logger.warning(
|
||||
f"Skipping row with missing user_input for postcode {postcode}"
|
||||
)
|
||||
continue
|
||||
|
||||
# Get UPRN using the pre-fetched EPC data with all return options
|
||||
result = get_uprn_with_epc_df(
|
||||
user_inputed_address=user_input, epc_df=epc_df, verbose=True
|
||||
)
|
||||
|
||||
# Parse result tuple if successful
|
||||
if result:
|
||||
uprn, found_address, score = result
|
||||
logger.info(
|
||||
f"Found UPRN for {user_input} in {postcode}: {uprn} (score: {score})"
|
||||
)
|
||||
|
||||
results_data.append(
|
||||
{
|
||||
**row, # Include all original data
|
||||
"uprn": uprn,
|
||||
"domna_found_address": found_address,
|
||||
"domna_lexiscore": score,
|
||||
}
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
f"No UPRN found for {user_input} in {postcode}"
|
||||
)
|
||||
results_data.append(
|
||||
{
|
||||
**row, # Include all original data
|
||||
"uprn": None,
|
||||
"domna_found_address": None,
|
||||
"domna_lexiscore": None,
|
||||
}
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(
|
||||
f"Error processing address {row.get('user_input', 'unknown')}: {e}"
|
||||
)
|
||||
# Still add the row with error markers
|
||||
results_data.append(
|
||||
{
|
||||
**row,
|
||||
"uprn": None,
|
||||
"domna_found_address": None,
|
||||
"domna_lexiscore": None,
|
||||
"error": str(e),
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
# Create results DataFrame
|
||||
result_df = pd.DataFrame(results_data)
|
||||
|
||||
# Save results to S3
|
||||
try:
|
||||
save_results_to_s3(result_df, str(task_id), str(subtask_id))
|
||||
except Exception as s3_error:
|
||||
logger.error(f"Failed to save results to S3: {s3_error}")
|
||||
|
||||
# Mark subtask as completed
|
||||
try:
|
||||
subtask_interface.update_subtask_status(
|
||||
subtask_id,
|
||||
"completed",
|
||||
outputs={"rows_processed": "todo -> show sensible output"},
|
||||
)
|
||||
logger.info(f"Marked subtask {subtask_id} as completed")
|
||||
except Exception as db_error:
|
||||
logger.error(f"Failed to mark subtask as completed: {db_error}")
|
||||
|
||||
except Exception as e:
|
||||
failures.append(
|
||||
{
|
||||
**row.to_dict(),
|
||||
"found_uprn": None,
|
||||
"best_match_uprn": None,
|
||||
"best_match_address": None,
|
||||
"best_match_lexiscore": None,
|
||||
"status": "exception",
|
||||
"error": str(e),
|
||||
}
|
||||
)
|
||||
continue
|
||||
logger.error(f"Unexpected error processing record: {e}", exc_info=True)
|
||||
errors.append({"error": "Unexpected error", "details": str(e)})
|
||||
# Mark subtask as failed if we have one
|
||||
if subtask_id:
|
||||
try:
|
||||
subtask_interface.update_subtask_status(
|
||||
subtask_id, "failed", outputs={"error": str(e)}
|
||||
)
|
||||
except Exception as db_error:
|
||||
logger.error(f"Failed to update subtask status: {db_error}")
|
||||
|
||||
found_uprn_norm = None if not found_uprn else str(found_uprn)
|
||||
# Return error if all records failed
|
||||
logger.info(results_data)
|
||||
logger.info(results)
|
||||
if errors and not results:
|
||||
return {"statusCode": 500, "body": json.dumps({"errors": errors})}
|
||||
|
||||
if found_uprn_norm != expected_uprn:
|
||||
failures.append(
|
||||
{
|
||||
**row.to_dict(),
|
||||
"found_uprn": found_uprn_norm,
|
||||
"best_match_uprn": best_match_uprn,
|
||||
"best_match_address": best_match_address,
|
||||
"best_match_lexiscore": best_match_lexiscore,
|
||||
"status": ("no_match" if found_uprn_norm is None else "mismatch"),
|
||||
}
|
||||
)
|
||||
|
||||
failures_df = pd.DataFrame(failures)
|
||||
|
||||
print("===================================")
|
||||
print(f"Total rows : {len(df)}")
|
||||
print(f"Failures : {len(failures_df)}")
|
||||
print("===================================")
|
||||
|
||||
failures_df.to_excel(
|
||||
"hackney_uprn_failures.xlsx",
|
||||
index=False,
|
||||
)
|
||||
return {
|
||||
"statusCode": 200,
|
||||
"body": json.dumps(
|
||||
{"processed": results, "errors": errors if errors else None}
|
||||
),
|
||||
}
|
||||
|
||||
|
||||
def handler(event, context):
|
||||
print("hello world")
|
||||
return {"statusCode": 200, "body": "hello world"}
|
||||
|
||||
|
||||
# TO do function dispatcher,
|
||||
|
||||
# get_uprn_candidates(get_epc_data_with_postcode("E9 5NH"),"Flat 1, 5 Semley Gate" and Flat 5, 1 Semley Gate)
|
||||
# fix that
|
||||
# Look again at flat 1
|
||||
# pandas reader the seperate postcode_splitter
|
||||
# dump into s3
|
||||
# TODO:
|
||||
# Don't add results to return messages as its too verbose
|
||||
# capture the exepection as e, into s3, to find the logs go to s3
|
||||
# Upload results to s3 as well as csv
|
||||
|
|
|
|||
|
|
@ -1,3 +1,5 @@
|
|||
# one time script for a customer forhousing
|
||||
|
||||
import pandas as pd
|
||||
from tqdm import tqdm
|
||||
from backend.address2UPRN.main import get_uprn
|
||||
|
|
@ -5,20 +7,35 @@ from backend.address2UPRN.main import get_uprn
|
|||
# Enable tqdm for pandas
|
||||
tqdm.pandas()
|
||||
|
||||
df = pd.read_excel("address2.xlsx")
|
||||
file_name = "forhousing.xlsx"
|
||||
|
||||
df = pd.read_excel(file_name)
|
||||
|
||||
|
||||
def extract_uprn(row):
|
||||
print(row["User Input"], row["Postcode"])
|
||||
result = get_uprn(row["User Input"], row["Postcode"], return_address=True)
|
||||
user_input = "Address"
|
||||
postcode = "Postcode"
|
||||
result = get_uprn(
|
||||
row[user_input],
|
||||
row[postcode],
|
||||
return_address=True,
|
||||
return_EPC=True,
|
||||
return_score=True,
|
||||
)
|
||||
|
||||
if result is None:
|
||||
return pd.Series([None, None])
|
||||
return pd.Series([None, None, None, None])
|
||||
|
||||
uprn, found_address = result
|
||||
return pd.Series([uprn, found_address])
|
||||
uprn, found_address, epc, score = result
|
||||
return pd.Series([uprn, found_address, epc, score])
|
||||
|
||||
|
||||
df[["juntes uprn", "junte found address"]] = df.progress_apply(extract_uprn, axis=1)
|
||||
df[["juntes uprn", "junte found address", "junte found epc", "junte score"]] = (
|
||||
df.progress_apply(extract_uprn, axis=1)
|
||||
)
|
||||
|
||||
df.to_excel("outputs2.xlsx", index=False)
|
||||
df.to_excel(f"{file_name}_outputs.xlsx", index=False)
|
||||
|
||||
# TODO: add lexiscore
|
||||
# TODO: run it
|
||||
# TODO: give it to danny
|
||||
|
|
|
|||
|
|
@ -18,37 +18,37 @@ def resolve_env_file() -> Optional[str]:
|
|||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
API_KEY: str
|
||||
API_KEY: str = "changeme"
|
||||
API_KEY_NAME: str = "X-API-KEY"
|
||||
SECRET_KEY: str
|
||||
ENVIRONMENT: str
|
||||
DATA_BUCKET: str
|
||||
SECRET_KEY: str = "changeme"
|
||||
ENVIRONMENT: str = "changeme"
|
||||
DATA_BUCKET: str = "changeme"
|
||||
PLAN_TRIGGER_BUCKET: str
|
||||
ENGINE_SQS_URL: str
|
||||
ENGINE_SQS_URL: str = "changeme"
|
||||
|
||||
# Third parties
|
||||
EPC_AUTH_TOKEN: str
|
||||
GOOGLE_SOLAR_API_KEY: str
|
||||
EPC_AUTH_TOKEN: str = "changeme"
|
||||
GOOGLE_SOLAR_API_KEY: str = "changeme"
|
||||
|
||||
# Database settings
|
||||
DB_HOST: str
|
||||
DB_PASSWORD: str
|
||||
DB_USERNAME: str
|
||||
DB_PORT: str
|
||||
DB_NAME: str
|
||||
DB_HOST: str = "changeme"
|
||||
DB_PASSWORD: str = "changeme"
|
||||
DB_USERNAME: str = "changeme"
|
||||
DB_PORT: str = "changeme"
|
||||
DB_NAME: str = "changeme"
|
||||
|
||||
# Prediction buckets
|
||||
SAP_PREDICTIONS_BUCKET: str
|
||||
CARBON_PREDICTIONS_BUCKET: str
|
||||
HEAT_PREDICTIONS_BUCKET: str
|
||||
SAP_PREDICTIONS_BUCKET: str = "changeme"
|
||||
CARBON_PREDICTIONS_BUCKET: str = "changeme"
|
||||
HEAT_PREDICTIONS_BUCKET: str = "changeme"
|
||||
# LIGHTING_COST_PREDICTIONS_BUCKET: str
|
||||
# HEATING_COST_PREDICTIONS_BUCKET: str
|
||||
# HOT_WATER_COST_PREDICTIONS_BUCKET: str
|
||||
HEATING_KWH_PREDICTIONS_BUCKET: str
|
||||
HOTWATER_KWH_PREDICTIONS_BUCKET: str
|
||||
HEATING_KWH_PREDICTIONS_BUCKET: str = "changeme"
|
||||
HOTWATER_KWH_PREDICTIONS_BUCKET: str = "changeme"
|
||||
|
||||
# Other S3 buckts
|
||||
ENERGY_ASSESSMENTS_BUCKET: str
|
||||
ENERGY_ASSESSMENTS_BUCKET: str = "changeme"
|
||||
|
||||
# Optional AWS creds (only required in local)
|
||||
AWS_ACCESS_KEY_ID: Optional[str] = None
|
||||
|
|
|
|||
|
|
@ -1,5 +1,10 @@
|
|||
from sqlalchemy import func
|
||||
from backend.app.db.models.recommendations import Plan, PlanRecommendations, Recommendation, Scenario
|
||||
from backend.app.db.models.recommendations import (
|
||||
PlanModel,
|
||||
PlanRecommendations,
|
||||
Recommendation,
|
||||
ScenarioModel,
|
||||
)
|
||||
|
||||
|
||||
def aggregate_portfolio_recommendations(
|
||||
|
|
@ -8,7 +13,7 @@ def aggregate_portfolio_recommendations(
|
|||
scenario_id: int,
|
||||
total_valuation_increase: float,
|
||||
labour_days: float,
|
||||
aggregated_data: dict
|
||||
aggregated_data: dict,
|
||||
):
|
||||
# Aggregate multiple fields
|
||||
aggregates = (
|
||||
|
|
@ -16,15 +21,20 @@ def aggregate_portfolio_recommendations(
|
|||
func.sum(Recommendation.estimated_cost).label("cost"),
|
||||
func.sum(Recommendation.total_work_hours).label("total_work_hours"),
|
||||
func.sum(Recommendation.kwh_savings).label("energy_savings"),
|
||||
func.sum(Recommendation.co2_equivalent_savings).label("co2_equivalent_savings"),
|
||||
func.sum(Recommendation.co2_equivalent_savings).label(
|
||||
"co2_equivalent_savings"
|
||||
),
|
||||
func.sum(Recommendation.energy_cost_savings).label("energy_cost_savings"),
|
||||
)
|
||||
.join(PlanRecommendations, PlanRecommendations.recommendation_id == Recommendation.id)
|
||||
.join(Plan, Plan.id == PlanRecommendations.plan_id)
|
||||
.join(
|
||||
PlanRecommendations,
|
||||
PlanRecommendations.recommendation_id == Recommendation.id,
|
||||
)
|
||||
.join(PlanModel, PlanModel.id == PlanRecommendations.plan_id)
|
||||
.filter(
|
||||
Plan.portfolio_id == portfolio_id,
|
||||
Plan.scenario_id == scenario_id,
|
||||
Recommendation.default == True
|
||||
PlanModel.portfolio_id == portfolio_id,
|
||||
PlanModel.scenario_id == scenario_id,
|
||||
Recommendation.default == True,
|
||||
)
|
||||
.one()
|
||||
)
|
||||
|
|
@ -36,11 +46,11 @@ def aggregate_portfolio_recommendations(
|
|||
"energy_savings": aggregates.energy_savings or 0,
|
||||
"co2_equivalent_savings": aggregates.co2_equivalent_savings or 0,
|
||||
"energy_cost_savings": aggregates.energy_cost_savings or 0,
|
||||
**aggregated_data
|
||||
**aggregated_data,
|
||||
}
|
||||
|
||||
# Get the scenario and update the fields. This data needs to be stored against the scenario, not the portfolio
|
||||
portfolio_scenario = session.query(Scenario).filter_by(id=scenario_id).one()
|
||||
portfolio_scenario = session.query(ScenarioModel).filter_by(id=scenario_id).one()
|
||||
|
||||
# Update the data
|
||||
for key, value in aggregates_dict.items():
|
||||
|
|
|
|||
|
|
@ -1,17 +1,33 @@
|
|||
from sqlalchemy import text
|
||||
from sqlalchemy import insert, delete
|
||||
from sqlalchemy.orm import Session
|
||||
from typing import Any, Dict, List, Optional
|
||||
from sqlalchemy import inspect, text, insert, delete, select, update
|
||||
from sqlalchemy.orm import Session, Mapper
|
||||
from sqlalchemy.exc import SQLAlchemyError
|
||||
from sqlmodel import Session
|
||||
|
||||
from backend.app.db.models.recommendations import (
|
||||
Plan, Recommendation, RecommendationMaterials, PlanRecommendations, Scenario
|
||||
PlanModel,
|
||||
Recommendation,
|
||||
RecommendationMaterials,
|
||||
PlanRecommendations,
|
||||
ScenarioModel,
|
||||
)
|
||||
from backend.app.db.models.portfolio import PropertyModel
|
||||
from backend.app.db.connection import db_session, db_read_session
|
||||
|
||||
|
||||
def prepare_plan_data(
|
||||
p, body, scenario_id, eco_packages, valuations, new_sap_points, new_epc, default_recommendations,
|
||||
rebaselining_carbon=0, rebaselining_heat_demand=0, rebaselining_kwh=0, rebaselining_bills=0,
|
||||
p,
|
||||
body,
|
||||
scenario_id,
|
||||
eco_packages,
|
||||
valuations,
|
||||
new_sap_points,
|
||||
new_epc,
|
||||
default_recommendations,
|
||||
rebaselining_carbon=0,
|
||||
rebaselining_heat_demand=0,
|
||||
rebaselining_kwh=0,
|
||||
rebaselining_bills=0,
|
||||
):
|
||||
"""
|
||||
Utility function to prepare the data that goes into the production of a plan. Is a fairly rough and unstructured
|
||||
|
|
@ -32,21 +48,37 @@ def prepare_plan_data(
|
|||
"""
|
||||
# Plan carbon savings
|
||||
co2_savings = sum(
|
||||
[r["co2_equivalent_savings"] for r in default_recommendations if not r.get("already_installed", False)]
|
||||
[
|
||||
r["co2_equivalent_savings"]
|
||||
for r in default_recommendations
|
||||
if not r.get("already_installed", False)
|
||||
]
|
||||
)
|
||||
post_co2_emissions = p.energy["co2_emissions"] - rebaselining_carbon - co2_savings
|
||||
|
||||
# Plan bill savings
|
||||
energy_bill_savings = sum(
|
||||
[r["energy_cost_savings"] for r in default_recommendations if not r.get("already_installed", False)]
|
||||
[
|
||||
r["energy_cost_savings"]
|
||||
for r in default_recommendations
|
||||
if not r.get("already_installed", False)
|
||||
]
|
||||
)
|
||||
post_energy_bill = (
|
||||
sum(p.current_energy_bill.values()) - rebaselining_bills - energy_bill_savings
|
||||
)
|
||||
post_energy_bill = sum(p.current_energy_bill.values()) - rebaselining_bills - energy_bill_savings
|
||||
|
||||
# energy consumption
|
||||
energy_consumption_savings = sum(
|
||||
[r["kwh_savings"] for r in default_recommendations if not r.get("already_installed", False)]
|
||||
[
|
||||
r["kwh_savings"]
|
||||
for r in default_recommendations
|
||||
if not r.get("already_installed", False)
|
||||
]
|
||||
)
|
||||
post_energy_consumption = (
|
||||
p.current_energy_consumption - rebaselining_kwh - energy_consumption_savings
|
||||
)
|
||||
post_energy_consumption = p.current_energy_consumption - rebaselining_kwh - energy_consumption_savings
|
||||
|
||||
valuation_post_retrofit, valuation_increase = None, None
|
||||
if valuations["current_value"]:
|
||||
|
|
@ -54,9 +86,19 @@ def prepare_plan_data(
|
|||
valuation_post_retrofit = valuations["average_increased_value"]
|
||||
|
||||
# plan costing data
|
||||
cost_of_works = sum([r["total"] for r in default_recommendations if not r.get("already_installed", False)])
|
||||
cost_of_works = sum(
|
||||
[
|
||||
r["total"]
|
||||
for r in default_recommendations
|
||||
if not r.get("already_installed", False)
|
||||
]
|
||||
)
|
||||
contingency_cost = sum(
|
||||
[r.get("contingency", 0) for r in default_recommendations if not r.get("already_installed", False)]
|
||||
[
|
||||
r.get("contingency", 0)
|
||||
for r in default_recommendations
|
||||
if not r.get("already_installed", False)
|
||||
]
|
||||
)
|
||||
|
||||
return {
|
||||
|
|
@ -86,7 +128,7 @@ def prepare_plan_data(
|
|||
"valuation_increase": valuation_increase,
|
||||
"cost_of_works": float(cost_of_works),
|
||||
"contingency_cost": float(contingency_cost),
|
||||
"plan_type": eco_packages.get(p.id, (None, None, None))[2]
|
||||
"plan_type": eco_packages.get(p.id, (None, None, None))[2],
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -97,7 +139,7 @@ def create_plan(session: Session, plan):
|
|||
:param plan: dictionary of data representing a plan to be created
|
||||
"""
|
||||
try:
|
||||
new_plan = Plan(**plan)
|
||||
new_plan = PlanModel(**plan)
|
||||
session.add(new_plan)
|
||||
session.flush()
|
||||
session.commit()
|
||||
|
|
@ -120,9 +162,7 @@ def bulk_create_plans(session: Session, plans_to_create: list[dict]) -> dict[int
|
|||
]
|
||||
|
||||
stmt = (
|
||||
insert(Plan)
|
||||
.values(payload)
|
||||
.returning(Plan.id, Plan.property_id)
|
||||
insert(PlanModel).values(payload).returning(PlanModel.id, PlanModel.property_id)
|
||||
)
|
||||
|
||||
result = session.execute(stmt).all()
|
||||
|
|
@ -133,14 +173,14 @@ def bulk_create_plans(session: Session, plans_to_create: list[dict]) -> dict[int
|
|||
|
||||
def create_scenario(session: Session, scenario: dict) -> int:
|
||||
existing_scenario = (
|
||||
session.query(Scenario)
|
||||
session.query(ScenarioModel)
|
||||
.filter_by(portfolio_id=scenario["portfolio_id"])
|
||||
.first()
|
||||
)
|
||||
|
||||
scenario["is_default"] = not bool(existing_scenario)
|
||||
|
||||
new_scenario = Scenario(**scenario)
|
||||
new_scenario = ScenarioModel(**scenario)
|
||||
session.add(new_scenario)
|
||||
session.flush() # ensures ID is populated
|
||||
|
||||
|
|
@ -167,7 +207,9 @@ def create_recommendation(session: Session, recommendation):
|
|||
raise e
|
||||
|
||||
|
||||
def create_recommendation_material(session: Session, recommendation_id, material_id, depth):
|
||||
def create_recommendation_material(
|
||||
session: Session, recommendation_id, material_id, depth
|
||||
):
|
||||
"""
|
||||
This function will create a record for the recommendation_material in the database if it does not exist.
|
||||
:param session: The databse session
|
||||
|
|
@ -177,9 +219,7 @@ def create_recommendation_material(session: Session, recommendation_id, material
|
|||
"""
|
||||
|
||||
new_recommendation_material = RecommendationMaterials(
|
||||
recommendation_id=recommendation_id,
|
||||
material_id=material_id,
|
||||
depth=depth
|
||||
recommendation_id=recommendation_id, material_id=material_id, depth=depth
|
||||
)
|
||||
session.add(new_recommendation_material)
|
||||
session.flush()
|
||||
|
|
@ -196,13 +236,17 @@ def create_plan_recommendations(session: Session, plan_id, recommendation_ids):
|
|||
"""
|
||||
|
||||
# Prepare a list of dictionaries for bulk insert
|
||||
data = [{"plan_id": plan_id, "recommendation_id": rid} for rid in recommendation_ids]
|
||||
data = [
|
||||
{"plan_id": plan_id, "recommendation_id": rid} for rid in recommendation_ids
|
||||
]
|
||||
|
||||
# Bulk insert using SQLAlchemy's core API
|
||||
session.execute(insert(PlanRecommendations).values(data))
|
||||
|
||||
|
||||
def upload_recommendations(session: Session, recommendations_to_upload, property_id, new_plan_id):
|
||||
def upload_recommendations(
|
||||
session: Session, recommendations_to_upload, property_id, new_plan_id
|
||||
):
|
||||
try:
|
||||
# Prepare data for bulk insert for Recommendation
|
||||
recommendations_data = [
|
||||
|
|
@ -213,8 +257,14 @@ def upload_recommendations(session: Session, recommendations_to_upload, property
|
|||
"description": rec["description"],
|
||||
"estimated_cost": float(rec["total"]),
|
||||
"default": rec["default"],
|
||||
"starting_u_value": float(rec.get("starting_u_value")) if rec.get("starting_u_value") else None,
|
||||
"new_u_value": float(rec.get("new_u_value")) if rec.get("new_u_value") else None,
|
||||
"starting_u_value": (
|
||||
float(rec.get("starting_u_value"))
|
||||
if rec.get("starting_u_value")
|
||||
else None
|
||||
),
|
||||
"new_u_value": (
|
||||
float(rec.get("new_u_value")) if rec.get("new_u_value") else None
|
||||
),
|
||||
"sap_points": float(rec["sap_points"]),
|
||||
"energy_savings": float(rec["heat_demand"]),
|
||||
"kwh_savings": float(rec["kwh_savings"]),
|
||||
|
|
@ -223,13 +273,17 @@ def upload_recommendations(session: Session, recommendations_to_upload, property
|
|||
"energy_cost_savings": float(rec["energy_cost_savings"]),
|
||||
"labour_days": float(rec["labour_days"]),
|
||||
"already_installed": rec["already_installed"],
|
||||
"heat_demand": float(rec["heat_demand"])
|
||||
"heat_demand": float(rec["heat_demand"]),
|
||||
}
|
||||
for rec in recommendations_to_upload
|
||||
]
|
||||
|
||||
# Insert the recommendations, get back the IDs
|
||||
stmt = insert(Recommendation).returning(Recommendation.id).values(recommendations_data)
|
||||
stmt = (
|
||||
insert(Recommendation)
|
||||
.returning(Recommendation.id)
|
||||
.values(recommendations_data)
|
||||
)
|
||||
result = session.execute(stmt)
|
||||
uploaded_recommendation_ids = [row[0] for row in result]
|
||||
|
||||
|
|
@ -243,11 +297,15 @@ def upload_recommendations(session: Session, recommendations_to_upload, property
|
|||
"quantity_unit": part.get("quantity_unit", None),
|
||||
"estimated_cost": float(part.get("total", part.get("total_cost"))),
|
||||
}
|
||||
for rec, recommendation_id in zip(recommendations_to_upload, uploaded_recommendation_ids)
|
||||
for rec, recommendation_id in zip(
|
||||
recommendations_to_upload, uploaded_recommendation_ids
|
||||
)
|
||||
for part in rec["parts"]
|
||||
]
|
||||
|
||||
session.bulk_insert_mappings(RecommendationMaterials, recommendation_materials_data)
|
||||
session.bulk_insert_mappings(
|
||||
RecommendationMaterials, recommendation_materials_data
|
||||
)
|
||||
|
||||
# flush the changes to get the newly created IDs
|
||||
session.flush()
|
||||
|
|
@ -283,25 +341,27 @@ def bulk_upload_recommendations_and_materials(
|
|||
plan_ids_by_index = []
|
||||
|
||||
for rec in recommendation_payload:
|
||||
recommendation_rows.append({
|
||||
"property_id": rec["property_id"],
|
||||
"type": rec["type"],
|
||||
"measure_type": rec["measure_type"],
|
||||
"description": rec["description"],
|
||||
"estimated_cost": rec["estimated_cost"],
|
||||
"default": rec["default"],
|
||||
"starting_u_value": rec["starting_u_value"],
|
||||
"new_u_value": rec["new_u_value"],
|
||||
"sap_points": rec["sap_points"],
|
||||
"heat_demand": rec["heat_demand"],
|
||||
"kwh_savings": rec["kwh_savings"],
|
||||
"co2_equivalent_savings": rec["co2_equivalent_savings"],
|
||||
"energy_savings": rec["energy_savings"],
|
||||
"energy_cost_savings": rec["energy_cost_savings"],
|
||||
"total_work_hours": rec["total_work_hours"],
|
||||
"labour_days": rec["labour_days"],
|
||||
"already_installed": rec["already_installed"],
|
||||
})
|
||||
recommendation_rows.append(
|
||||
{
|
||||
"property_id": rec["property_id"],
|
||||
"type": rec["type"],
|
||||
"measure_type": rec["measure_type"],
|
||||
"description": rec["description"],
|
||||
"estimated_cost": rec["estimated_cost"],
|
||||
"default": rec["default"],
|
||||
"starting_u_value": rec["starting_u_value"],
|
||||
"new_u_value": rec["new_u_value"],
|
||||
"sap_points": rec["sap_points"],
|
||||
"heat_demand": rec["heat_demand"],
|
||||
"kwh_savings": rec["kwh_savings"],
|
||||
"co2_equivalent_savings": rec["co2_equivalent_savings"],
|
||||
"energy_savings": rec["energy_savings"],
|
||||
"energy_cost_savings": rec["energy_cost_savings"],
|
||||
"total_work_hours": rec["total_work_hours"],
|
||||
"labour_days": rec["labour_days"],
|
||||
"already_installed": rec["already_installed"],
|
||||
}
|
||||
)
|
||||
|
||||
parts_by_index.append(rec["parts"])
|
||||
plan_ids_by_index.append(rec["plan_id"])
|
||||
|
|
@ -310,9 +370,7 @@ def bulk_upload_recommendations_and_materials(
|
|||
# 2. Insert recommendations and get IDs
|
||||
# ---------------------------------------------------------
|
||||
result = session.execute(
|
||||
insert(Recommendation)
|
||||
.values(recommendation_rows)
|
||||
.returning(Recommendation.id)
|
||||
insert(Recommendation).values(recommendation_rows).returning(Recommendation.id)
|
||||
)
|
||||
|
||||
recommendation_ids = [row[0] for row in result]
|
||||
|
|
@ -324,19 +382,19 @@ def bulk_upload_recommendations_and_materials(
|
|||
|
||||
for recommendation_id, parts in zip(recommendation_ids, parts_by_index):
|
||||
for part in parts:
|
||||
materials_rows.append({
|
||||
"recommendation_id": recommendation_id,
|
||||
"material_id": part["material_id"],
|
||||
"depth": part["depth"],
|
||||
"quantity": part["quantity"],
|
||||
"quantity_unit": part["quantity_unit"],
|
||||
"estimated_cost": part["estimated_cost"],
|
||||
})
|
||||
materials_rows.append(
|
||||
{
|
||||
"recommendation_id": recommendation_id,
|
||||
"material_id": part["material_id"],
|
||||
"depth": part["depth"],
|
||||
"quantity": part["quantity"],
|
||||
"quantity_unit": part["quantity_unit"],
|
||||
"estimated_cost": part["estimated_cost"],
|
||||
}
|
||||
)
|
||||
|
||||
if materials_rows:
|
||||
session.execute(
|
||||
insert(RecommendationMaterials).values(materials_rows)
|
||||
)
|
||||
session.execute(insert(RecommendationMaterials).values(materials_rows))
|
||||
|
||||
# ---------------------------------------------------------
|
||||
# 4. Insert plan ↔ recommendation links
|
||||
|
|
@ -346,26 +404,22 @@ def bulk_upload_recommendations_and_materials(
|
|||
"plan_id": plan_id,
|
||||
"recommendation_id": recommendation_id,
|
||||
}
|
||||
for plan_id, recommendation_id in zip(
|
||||
plan_ids_by_index, recommendation_ids
|
||||
)
|
||||
for plan_id, recommendation_id in zip(plan_ids_by_index, recommendation_ids)
|
||||
]
|
||||
|
||||
session.execute(
|
||||
insert(PlanRecommendations).values(plan_recommendation_rows)
|
||||
)
|
||||
session.execute(insert(PlanRecommendations).values(plan_recommendation_rows))
|
||||
|
||||
|
||||
def chunked(iterable, size=100):
|
||||
for i in range(0, len(iterable), size):
|
||||
yield iterable[i:i + size]
|
||||
yield iterable[i : i + size]
|
||||
|
||||
|
||||
def get_property_ids(portfolio_id: int) -> list[int]:
|
||||
with db_read_session() as session:
|
||||
return [
|
||||
pid for (pid,) in
|
||||
session.query(PropertyModel.id)
|
||||
pid
|
||||
for (pid,) in session.query(PropertyModel.id)
|
||||
.filter(PropertyModel.portfolio_id == portfolio_id)
|
||||
.all()
|
||||
]
|
||||
|
|
@ -381,12 +435,14 @@ def delete_property_batch(session: Session, property_ids: list[int]):
|
|||
# recommendation_materials (via recommendation)
|
||||
# --------------------------------------------------
|
||||
session.execute(
|
||||
text("""
|
||||
text(
|
||||
"""
|
||||
DELETE FROM recommendation_materials rm
|
||||
USING recommendation r
|
||||
WHERE rm.recommendation_id = r.id
|
||||
AND r.property_id = ANY(:property_ids)
|
||||
"""),
|
||||
"""
|
||||
),
|
||||
params,
|
||||
)
|
||||
|
||||
|
|
@ -394,12 +450,14 @@ def delete_property_batch(session: Session, property_ids: list[int]):
|
|||
# plan_recommendations (via plan)
|
||||
# --------------------------------------------------
|
||||
session.execute(
|
||||
text("""
|
||||
text(
|
||||
"""
|
||||
DELETE FROM plan_recommendations pr
|
||||
USING plan p
|
||||
WHERE pr.plan_id = p.id
|
||||
AND p.property_id = ANY(:property_ids)
|
||||
"""),
|
||||
"""
|
||||
),
|
||||
params,
|
||||
)
|
||||
|
||||
|
|
@ -407,13 +465,15 @@ def delete_property_batch(session: Session, property_ids: list[int]):
|
|||
# funding_package_measures
|
||||
# --------------------------------------------------
|
||||
session.execute(
|
||||
text("""
|
||||
text(
|
||||
"""
|
||||
DELETE FROM funding_package_measures fpm
|
||||
USING funding_package fp, plan p
|
||||
WHERE fpm.funding_package_id = fp.id
|
||||
AND fp.plan_id = p.id
|
||||
AND p.property_id = ANY(:property_ids)
|
||||
"""),
|
||||
"""
|
||||
),
|
||||
params,
|
||||
)
|
||||
|
||||
|
|
@ -421,10 +481,12 @@ def delete_property_batch(session: Session, property_ids: list[int]):
|
|||
# inspections (direct)
|
||||
# --------------------------------------------------
|
||||
session.execute(
|
||||
text("""
|
||||
text(
|
||||
"""
|
||||
DELETE FROM inspections
|
||||
WHERE property_id = ANY(:property_ids)
|
||||
"""),
|
||||
"""
|
||||
),
|
||||
params,
|
||||
)
|
||||
|
||||
|
|
@ -432,12 +494,14 @@ def delete_property_batch(session: Session, property_ids: list[int]):
|
|||
# funding_package
|
||||
# --------------------------------------------------
|
||||
session.execute(
|
||||
text("""
|
||||
text(
|
||||
"""
|
||||
DELETE FROM funding_package fp
|
||||
USING plan p
|
||||
WHERE fp.plan_id = p.id
|
||||
AND p.property_id = ANY(:property_ids)
|
||||
"""),
|
||||
"""
|
||||
),
|
||||
params,
|
||||
)
|
||||
|
||||
|
|
@ -445,10 +509,12 @@ def delete_property_batch(session: Session, property_ids: list[int]):
|
|||
# recommendation (direct — CRITICAL FIX)
|
||||
# --------------------------------------------------
|
||||
session.execute(
|
||||
text("""
|
||||
text(
|
||||
"""
|
||||
DELETE FROM recommendation
|
||||
WHERE property_id = ANY(:property_ids)
|
||||
"""),
|
||||
"""
|
||||
),
|
||||
params,
|
||||
)
|
||||
|
||||
|
|
@ -456,10 +522,12 @@ def delete_property_batch(session: Session, property_ids: list[int]):
|
|||
# plan (direct)
|
||||
# --------------------------------------------------
|
||||
session.execute(
|
||||
text("""
|
||||
text(
|
||||
"""
|
||||
DELETE FROM plan
|
||||
WHERE property_id = ANY(:property_ids)
|
||||
"""),
|
||||
"""
|
||||
),
|
||||
params,
|
||||
)
|
||||
|
||||
|
|
@ -467,18 +535,22 @@ def delete_property_batch(session: Session, property_ids: list[int]):
|
|||
# property-scoped tables
|
||||
# --------------------------------------------------
|
||||
session.execute(
|
||||
text("""
|
||||
text(
|
||||
"""
|
||||
DELETE FROM property_details_epc
|
||||
WHERE property_id = ANY(:property_ids)
|
||||
"""),
|
||||
"""
|
||||
),
|
||||
params,
|
||||
)
|
||||
|
||||
session.execute(
|
||||
text("""
|
||||
text(
|
||||
"""
|
||||
DELETE FROM property_targets
|
||||
WHERE property_id = ANY(:property_ids)
|
||||
"""),
|
||||
"""
|
||||
),
|
||||
params,
|
||||
)
|
||||
|
||||
|
|
@ -486,10 +558,12 @@ def delete_property_batch(session: Session, property_ids: list[int]):
|
|||
# properties LAST
|
||||
# --------------------------------------------------
|
||||
session.execute(
|
||||
text("""
|
||||
text(
|
||||
"""
|
||||
DELETE FROM property
|
||||
WHERE id = ANY(:property_ids)
|
||||
"""),
|
||||
"""
|
||||
),
|
||||
params,
|
||||
)
|
||||
|
||||
|
|
@ -510,8 +584,7 @@ def delete_portfolio_scenarios_if_empty(portfolio_id: int):
|
|||
|
||||
with db_session() as session:
|
||||
session.execute(
|
||||
delete(Scenario)
|
||||
.where(Scenario.portfolio_id == portfolio_id)
|
||||
delete(ScenarioModel).where(ScenarioModel.portfolio_id == portfolio_id)
|
||||
)
|
||||
|
||||
print("Deleted scenarios for empty portfolio")
|
||||
|
|
@ -530,6 +603,7 @@ def clear_portfolio_in_batches(
|
|||
|
||||
total = (len(property_ids) + property_batch_size - 1) // property_batch_size
|
||||
import time
|
||||
|
||||
for i, batch in enumerate(chunked(property_ids, property_batch_size), start=1):
|
||||
print(f"Deleting batch {i}/{total} ({len(batch)} properties)")
|
||||
start_time = time.time()
|
||||
|
|
@ -542,3 +616,61 @@ def clear_portfolio_in_batches(
|
|||
delete_portfolio_scenarios_if_empty(portfolio_id)
|
||||
|
||||
print("Portfolio cleared in batches.")
|
||||
|
||||
|
||||
def get_plans_by_portfolio_id(portfolio_id: int) -> List[PlanModel]:
|
||||
stmt = select(PlanModel).where(PlanModel.portfolio_id == portfolio_id)
|
||||
with db_read_session() as session:
|
||||
session_any: Any = session # Typehint as Any to satisfy Pylance...
|
||||
return session_any.exec(stmt).scalars().all()
|
||||
|
||||
|
||||
def get_scenario(scenario_id: int) -> Optional[ScenarioModel]:
|
||||
stmt = select(ScenarioModel).where(ScenarioModel.id == scenario_id)
|
||||
with db_read_session() as session:
|
||||
session_any: Any = session # Typehint as Any to satisfy Pylance...
|
||||
return session_any.exec(stmt).scalar_one_or_none()
|
||||
|
||||
|
||||
def bulk_update_plans(
|
||||
plan_models: List[PlanModel],
|
||||
scenario_models: List[ScenarioModel],
|
||||
) -> int:
|
||||
if not plan_models:
|
||||
return 0
|
||||
|
||||
with db_read_session() as session:
|
||||
|
||||
plan_mapper: Mapper[Any] = inspect(PlanModel)
|
||||
scenario_mapper: Mapper[Any] = inspect(ScenarioModel)
|
||||
|
||||
plan_mappings: List[Dict[str, Any]] = (
|
||||
[]
|
||||
) # Typehint as Any to satisfy Pylance...
|
||||
for plan in plan_models:
|
||||
data: Dict[str, Any] = {
|
||||
c.name: getattr(plan, c.name)
|
||||
for c in plan.__table__.columns
|
||||
if c.name != "id"
|
||||
}
|
||||
data["id"] = plan.id
|
||||
plan_mappings.append(data)
|
||||
|
||||
session.bulk_update_mappings(plan_mapper, plan_mappings)
|
||||
|
||||
scenario_mappings: List[Dict[str, Any]] = (
|
||||
[]
|
||||
) # Typehint as Any to satisfy Pylance...
|
||||
for scenario in scenario_models:
|
||||
data: Dict[str, Any] = {
|
||||
c.name: getattr(scenario, c.name)
|
||||
for c in scenario.__table__.columns
|
||||
if c.name not in {"id", "portfolio_id"}
|
||||
}
|
||||
data["id"] = scenario.id
|
||||
scenario_mappings.append(data)
|
||||
|
||||
session.bulk_update_mappings(scenario_mapper, scenario_mappings)
|
||||
|
||||
session.commit()
|
||||
return len(plan_models)
|
||||
|
|
|
|||
0
backend/app/db/functions/tasks/__init__.py
Normal file
0
backend/app/db/functions/tasks/__init__.py
Normal file
|
|
@ -1,9 +1,18 @@
|
|||
import enum
|
||||
|
||||
from sqlalchemy import Column, Integer, String, Float, Enum, TIMESTAMP, BigInteger, ForeignKey
|
||||
from sqlalchemy import (
|
||||
Column,
|
||||
Integer,
|
||||
String,
|
||||
Float,
|
||||
Enum,
|
||||
TIMESTAMP,
|
||||
BigInteger,
|
||||
ForeignKey,
|
||||
)
|
||||
from sqlalchemy.orm import declarative_base
|
||||
from sqlalchemy.sql import func
|
||||
from backend.app.db.models.recommendations import Plan
|
||||
from backend.app.db.models.recommendations import PlanModel
|
||||
from backend.app.db.models.materials import MaterialType, Material
|
||||
|
||||
Base = declarative_base()
|
||||
|
|
@ -17,13 +26,17 @@ class SchemeEnum(enum.Enum):
|
|||
|
||||
|
||||
class FundingPackage(Base):
|
||||
__tablename__ = 'funding_package'
|
||||
__tablename__ = "funding_package"
|
||||
|
||||
id = Column(Integer, primary_key=True, autoincrement=True)
|
||||
plan_id = Column(BigInteger, ForeignKey(Plan.id), nullable=False)
|
||||
plan_id = Column(BigInteger, ForeignKey(PlanModel.id), nullable=False)
|
||||
scheme = Column(
|
||||
Enum(SchemeEnum, values_callable=lambda x: [e.value for e in x], create_constraint=False),
|
||||
nullable=False
|
||||
Enum(
|
||||
SchemeEnum,
|
||||
values_callable=lambda x: [e.value for e in x],
|
||||
create_constraint=False,
|
||||
),
|
||||
nullable=False,
|
||||
)
|
||||
created_at = Column(TIMESTAMP, nullable=False, server_default=func.now())
|
||||
project_funding = Column(Float)
|
||||
|
|
@ -34,15 +47,23 @@ class FundingPackage(Base):
|
|||
|
||||
|
||||
class FundingPackageMeasures(Base):
|
||||
__tablename__ = 'funding_package_measures'
|
||||
__tablename__ = "funding_package_measures"
|
||||
|
||||
id = Column(Integer, primary_key=True, autoincrement=True)
|
||||
funding_package_id = Column(BigInteger, ForeignKey(FundingPackage.id), nullable=False)
|
||||
measure = Column(
|
||||
Enum(MaterialType, values_callable=lambda x: [e.value for e in x], create_constraint=False),
|
||||
nullable=False
|
||||
funding_package_id = Column(
|
||||
BigInteger, ForeignKey(FundingPackage.id), nullable=False
|
||||
)
|
||||
material_id = Column(BigInteger, ForeignKey(Material.id), nullable=False) # Assuming material table exists
|
||||
measure = Column(
|
||||
Enum(
|
||||
MaterialType,
|
||||
values_callable=lambda x: [e.value for e in x],
|
||||
create_constraint=False,
|
||||
),
|
||||
nullable=False,
|
||||
)
|
||||
material_id = Column(
|
||||
BigInteger, ForeignKey(Material.id), nullable=False
|
||||
) # Assuming material table exists
|
||||
innovation_uplift = Column(Float)
|
||||
partial_project_score = Column(Float)
|
||||
uplift_project_score = Column(Float)
|
||||
|
|
|
|||
|
|
@ -1,7 +1,17 @@
|
|||
import enum
|
||||
import pytz
|
||||
import datetime
|
||||
from sqlalchemy import Column, Integer, Text, Boolean, Float, DateTime, Enum, ForeignKey, CheckConstraint
|
||||
from sqlalchemy import (
|
||||
Column,
|
||||
Integer,
|
||||
Text,
|
||||
Boolean,
|
||||
Float,
|
||||
DateTime,
|
||||
Enum,
|
||||
ForeignKey,
|
||||
CheckConstraint,
|
||||
)
|
||||
from sqlalchemy.ext.declarative import declarative_base
|
||||
from backend.app.db.models.users import UserModel # noqa
|
||||
from backend.app.db.models.materials import MaterialType
|
||||
|
|
@ -22,7 +32,7 @@ class PortfolioStatus(enum.Enum):
|
|||
NEEDS_REVIEW = "needs review"
|
||||
|
||||
|
||||
class PortfolioGoal(enum.Enum):
|
||||
class PortfolioGoal(enum.Enum): # TODO: Move to domain?
|
||||
VALUATION_IMPROVEMENT = "Valuation Improvement"
|
||||
INCREASING_EPC = "Increasing EPC"
|
||||
REDUCING_CO2_EMISSIONS = "Reducing CO2 emissions"
|
||||
|
|
@ -31,23 +41,43 @@ class PortfolioGoal(enum.Enum):
|
|||
|
||||
|
||||
class Portfolio(Base):
|
||||
__tablename__ = 'portfolio'
|
||||
__tablename__ = "portfolio"
|
||||
id = Column(Integer, primary_key=True, autoincrement=True)
|
||||
name = Column(Text, nullable=False)
|
||||
budget = Column(Float)
|
||||
status = Column(Enum(PortfolioStatus, values_callable=lambda x: [e.value for e in x]), nullable=False)
|
||||
goal = Column(Enum(PortfolioGoal, values_callable=lambda x: [e.value for e in x]), nullable=False)
|
||||
status = Column(
|
||||
Enum(PortfolioStatus, values_callable=lambda x: [e.value for e in x]),
|
||||
nullable=False,
|
||||
)
|
||||
goal = Column(
|
||||
Enum(PortfolioGoal, values_callable=lambda x: [e.value for e in x]),
|
||||
nullable=False,
|
||||
)
|
||||
cost = Column(Float)
|
||||
number_of_properties = Column(Integer)
|
||||
co2_equivalent_savings = Column(Float) # Unit is always tonnes so we don't need to store the unit
|
||||
energy_savings = Column(Float) # Unit is always kWh so we don't need to store the unit
|
||||
energy_cost_savings = Column(Float) # Unit is always £ so we don't need to store the unit for the moment
|
||||
property_valuation_increase = Column(Float) # Unit is always £ so we don't need to store the unit for the moment
|
||||
rental_yield_increase = Column(Float) # Unit is always £ so we don't need to store the unit for the moment
|
||||
co2_equivalent_savings = Column(
|
||||
Float
|
||||
) # Unit is always tonnes so we don't need to store the unit
|
||||
energy_savings = Column(
|
||||
Float
|
||||
) # Unit is always kWh so we don't need to store the unit
|
||||
energy_cost_savings = Column(
|
||||
Float
|
||||
) # Unit is always £ so we don't need to store the unit for the moment
|
||||
property_valuation_increase = Column(
|
||||
Float
|
||||
) # Unit is always £ so we don't need to store the unit for the moment
|
||||
rental_yield_increase = Column(
|
||||
Float
|
||||
) # Unit is always £ so we don't need to store the unit for the moment
|
||||
total_work_hours = Column(Float)
|
||||
labour_days = Column(Float)
|
||||
created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
|
||||
updated_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
|
||||
created_at = Column(
|
||||
DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)
|
||||
)
|
||||
updated_at = Column(
|
||||
DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)
|
||||
)
|
||||
# Aggregations for summary
|
||||
epc_breakdown_pre_retrofit = Column(Text)
|
||||
epc_breakdown_post_retrofit = Column(Text)
|
||||
|
|
@ -71,7 +101,7 @@ class PropertyCreationStatus(enum.Enum):
|
|||
ERROR = "ERROR"
|
||||
|
||||
|
||||
class Epc(enum.Enum):
|
||||
class Epc(enum.Enum): # TODO: Move to domain?
|
||||
A = "A"
|
||||
B = "B"
|
||||
C = "C"
|
||||
|
|
@ -82,20 +112,27 @@ class Epc(enum.Enum):
|
|||
|
||||
|
||||
class PropertyModel(Base):
|
||||
__tablename__ = 'property'
|
||||
__tablename__ = "property"
|
||||
id = Column(Integer, primary_key=True, autoincrement=True)
|
||||
portfolio_id = Column(Integer, ForeignKey('portfolio.id'), nullable=False)
|
||||
portfolio_id = Column(Integer, ForeignKey("portfolio.id"), nullable=False)
|
||||
creation_status = Column(Enum(PropertyCreationStatus), nullable=False)
|
||||
uprn = Column(Integer)
|
||||
landlord_property_id = Column(Text)
|
||||
building_reference_number = Column(Integer)
|
||||
status = Column(Enum(PortfolioStatus, values_callable=lambda x: [e.value for e in x]), nullable=False)
|
||||
status = Column(
|
||||
Enum(PortfolioStatus, values_callable=lambda x: [e.value for e in x]),
|
||||
nullable=False,
|
||||
)
|
||||
address = Column(Text)
|
||||
postcode = Column(Text)
|
||||
has_pre_condition_report = Column(Boolean)
|
||||
has_recommendations = Column(Boolean)
|
||||
created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
|
||||
updated_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
|
||||
created_at = Column(
|
||||
DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)
|
||||
)
|
||||
updated_at = Column(
|
||||
DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)
|
||||
)
|
||||
property_type = Column(Text)
|
||||
built_form = Column(Text)
|
||||
local_authority = Column(Text)
|
||||
|
|
@ -127,7 +164,7 @@ rating_lookup = {
|
|||
"Average": FeatureRating.AVERAGE,
|
||||
"Poor": FeatureRating.POOR,
|
||||
"Very Poor": FeatureRating.VERY_POOR,
|
||||
"N/A": FeatureRating.NA
|
||||
"N/A": FeatureRating.NA,
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -136,32 +173,45 @@ def get_feature_rating_from_string(rating_str: str):
|
|||
|
||||
|
||||
class PropertyDetailsEpcModel(Base):
|
||||
__tablename__ = 'property_details_epc'
|
||||
__tablename__ = "property_details_epc"
|
||||
id = Column(Integer, primary_key=True, autoincrement=True)
|
||||
property_id = Column(Integer, ForeignKey('property.id'), nullable=False)
|
||||
portfolio_id = Column(Integer, ForeignKey('portfolio.id'), nullable=False)
|
||||
property_id = Column(Integer, ForeignKey("property.id"), nullable=False)
|
||||
portfolio_id = Column(Integer, ForeignKey("portfolio.id"), nullable=False)
|
||||
full_address = Column(Text)
|
||||
lodgement_date = Column(DateTime)
|
||||
is_expired = Column(Boolean)
|
||||
total_floor_area = Column(Float)
|
||||
walls = Column(Text)
|
||||
walls_rating = Column(Integer, CheckConstraint('walls_rating>=1 AND walls_rating<=5'))
|
||||
walls_rating = Column(
|
||||
Integer, CheckConstraint("walls_rating>=1 AND walls_rating<=5")
|
||||
)
|
||||
roof = Column(Text)
|
||||
roof_rating = Column(Integer, CheckConstraint('roof_rating>=1 AND roof_rating<=5'))
|
||||
roof_rating = Column(Integer, CheckConstraint("roof_rating>=1 AND roof_rating<=5"))
|
||||
floor = Column(Text)
|
||||
floor_rating = Column(Integer, CheckConstraint('floor_rating>=1 AND floor_rating<=5'))
|
||||
floor_rating = Column(
|
||||
Integer, CheckConstraint("floor_rating>=1 AND floor_rating<=5")
|
||||
)
|
||||
windows = Column(Text)
|
||||
windows_rating = Column(Integer, CheckConstraint('windows_rating>=1 AND windows_rating<=5'))
|
||||
windows_rating = Column(
|
||||
Integer, CheckConstraint("windows_rating>=1 AND windows_rating<=5")
|
||||
)
|
||||
heating = Column(Text)
|
||||
heating_rating = Column(Integer, CheckConstraint('heating_rating>=1 AND heating_rating<=5'))
|
||||
heating_rating = Column(
|
||||
Integer, CheckConstraint("heating_rating>=1 AND heating_rating<=5")
|
||||
)
|
||||
heating_controls = Column(Text)
|
||||
heating_controls_rating = Column(
|
||||
Integer, CheckConstraint('heating_controls_rating>=1 AND heating_controls_rating<=5')
|
||||
Integer,
|
||||
CheckConstraint("heating_controls_rating>=1 AND heating_controls_rating<=5"),
|
||||
)
|
||||
hot_water = Column(Text)
|
||||
hot_water_rating = Column(Integer, CheckConstraint('hot_water_rating>=1 AND hot_water_rating<=5'))
|
||||
hot_water_rating = Column(
|
||||
Integer, CheckConstraint("hot_water_rating>=1 AND hot_water_rating<=5")
|
||||
)
|
||||
lighting = Column(Text)
|
||||
lighting_rating = Column(Integer, CheckConstraint('lighting_rating>=1 AND lighting_rating<=5'))
|
||||
lighting_rating = Column(
|
||||
Integer, CheckConstraint("lighting_rating>=1 AND lighting_rating<=5")
|
||||
)
|
||||
mainfuel = Column(Text)
|
||||
ventilation = Column(Text)
|
||||
solar_pv = Column(Text)
|
||||
|
|
@ -219,7 +269,7 @@ class PropertyDetailsSpatial(Base):
|
|||
|
||||
|
||||
class PropertyDetailsMeter(Base):
|
||||
__tablename__ = 'property_details_meter'
|
||||
__tablename__ = "property_details_meter"
|
||||
id = Column(Integer, primary_key=True, autoincrement=True)
|
||||
uprn = Column(Integer, nullable=False)
|
||||
energy_supplier = Column(Text)
|
||||
|
|
@ -230,11 +280,13 @@ class PropertyDetailsMeter(Base):
|
|||
|
||||
|
||||
class PropertyTargetsModel(Base):
|
||||
__tablename__ = 'property_targets'
|
||||
__tablename__ = "property_targets"
|
||||
id = Column(Integer, primary_key=True, autoincrement=True)
|
||||
property_id = Column(Integer, ForeignKey('property.id'), nullable=False)
|
||||
portfolio_id = Column(Integer, ForeignKey('portfolio.id'), nullable=False)
|
||||
created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
|
||||
property_id = Column(Integer, ForeignKey("property.id"), nullable=False)
|
||||
portfolio_id = Column(Integer, ForeignKey("portfolio.id"), nullable=False)
|
||||
created_at = Column(
|
||||
DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)
|
||||
)
|
||||
epc = Column(Enum(Epc))
|
||||
heat_demand = Column(Text)
|
||||
|
||||
|
|
@ -242,23 +294,36 @@ class PropertyTargetsModel(Base):
|
|||
class PortfolioUsers(Base):
|
||||
__tablename__ = "portfolioUsers"
|
||||
id = Column(Integer, primary_key=True, autoincrement=True)
|
||||
user_id = Column(Integer, ForeignKey('user.id'), nullable=False)
|
||||
portfolioId = Column(Integer, ForeignKey('portfolio.id'), nullable=False)
|
||||
user_id = Column(Integer, ForeignKey("user.id"), nullable=False)
|
||||
portfolioId = Column(Integer, ForeignKey("portfolio.id"), nullable=False)
|
||||
role = Column(Text, nullable=False)
|
||||
created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
|
||||
updated_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
|
||||
created_at = Column(
|
||||
DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)
|
||||
)
|
||||
updated_at = Column(
|
||||
DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)
|
||||
)
|
||||
|
||||
|
||||
class PropertyInstalledMeasures(Base):
|
||||
"""
|
||||
This model keeps a record of the installed measures for each property, at the UPRN level
|
||||
"""
|
||||
__tablename__ = 'property_installed_measures'
|
||||
|
||||
__tablename__ = "property_installed_measures"
|
||||
id = Column(Integer, primary_key=True, autoincrement=True)
|
||||
uprn = Column(Integer, nullable=False)
|
||||
measure_type = Column(
|
||||
Enum(MaterialType, values_callable=lambda x: [e.value for e in x], create_constraint=False),
|
||||
nullable=False
|
||||
Enum(
|
||||
MaterialType,
|
||||
values_callable=lambda x: [e.value for e in x],
|
||||
create_constraint=False,
|
||||
),
|
||||
nullable=False,
|
||||
)
|
||||
created_at = Column(
|
||||
DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)
|
||||
)
|
||||
installed_at = Column(
|
||||
DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)
|
||||
)
|
||||
created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
|
||||
installed_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
|
||||
|
|
|
|||
|
|
@ -1,7 +1,19 @@
|
|||
from sqlalchemy import Column, BigInteger, String, Float, Boolean, TIMESTAMP, ForeignKey, Enum
|
||||
from sqlalchemy.orm import declarative_base
|
||||
from typing import Iterable, List, NamedTuple, Optional, Type
|
||||
from sqlalchemy import (
|
||||
Column,
|
||||
BigInteger,
|
||||
String,
|
||||
Float,
|
||||
Boolean,
|
||||
TIMESTAMP,
|
||||
ForeignKey,
|
||||
Enum,
|
||||
)
|
||||
from sqlalchemy.orm import declarative_base, Mapped, mapped_column
|
||||
from sqlalchemy.sql import func
|
||||
from backend.app.db.models.portfolio import Portfolio, PropertyModel
|
||||
from datetime import datetime
|
||||
|
||||
from backend.app.db.models.portfolio import Portfolio, PortfolioGoal, PropertyModel
|
||||
from backend.app.db.models.materials import Material
|
||||
from backend.app.db.models.portfolio import Epc
|
||||
from datatypes.enums import QuantityUnits
|
||||
|
|
@ -10,8 +22,12 @@ import enum
|
|||
Base = declarative_base()
|
||||
|
||||
|
||||
def portfolio_goal_values(enum_cls: Type[PortfolioGoal]) -> List[str]:
|
||||
return [e.value for e in enum_cls]
|
||||
|
||||
|
||||
class Recommendation(Base):
|
||||
__tablename__ = 'recommendation'
|
||||
__tablename__ = "recommendation"
|
||||
|
||||
id = Column(BigInteger, primary_key=True, autoincrement=True)
|
||||
property_id = Column(BigInteger, ForeignKey(PropertyModel.id), nullable=False)
|
||||
|
|
@ -37,19 +53,24 @@ class Recommendation(Base):
|
|||
|
||||
|
||||
class RecommendationMaterials(Base):
|
||||
__tablename__ = 'recommendation_materials'
|
||||
__tablename__ = "recommendation_materials"
|
||||
|
||||
id = Column(BigInteger, primary_key=True, autoincrement=True)
|
||||
recommendation_id = Column(BigInteger, ForeignKey('recommendation.id'), nullable=False)
|
||||
recommendation_id = Column(
|
||||
BigInteger, ForeignKey("recommendation.id"), nullable=False
|
||||
)
|
||||
material_id = Column(BigInteger, ForeignKey(Material.id), nullable=False)
|
||||
created_at = Column(TIMESTAMP, nullable=False, server_default=func.now())
|
||||
depth = Column(Float, nullable=False)
|
||||
quantity = Column(Float, nullable=False)
|
||||
quantity_unit = Column(Enum(QuantityUnits, values_callable=lambda x: [e.value for e in x]), nullable=False)
|
||||
quantity_unit = Column(
|
||||
Enum(QuantityUnits, values_callable=lambda x: [e.value for e in x]),
|
||||
nullable=False,
|
||||
)
|
||||
estimated_cost = Column(Float, nullable=False)
|
||||
|
||||
|
||||
class PlanTypeEnum(enum.Enum):
|
||||
class PlanTypeEnum(enum.Enum): # TODO: move this to domain?
|
||||
SOLAR_ECO4 = "solar_eco4"
|
||||
SOLAR_HHRSH_ECO4 = "solar_hhrsh_eco4"
|
||||
EMPTY_CAVITY_ECO = "empty_cavity_eco"
|
||||
|
|
@ -57,20 +78,36 @@ class PlanTypeEnum(enum.Enum):
|
|||
EXTRACTION_ECO = "extraction_eco"
|
||||
|
||||
|
||||
class Plan(Base):
|
||||
__tablename__ = 'plan'
|
||||
class PlanModel(Base):
|
||||
__tablename__ = "plan"
|
||||
|
||||
id = Column(BigInteger, primary_key=True, autoincrement=True)
|
||||
name = Column(String, nullable=True, default="")
|
||||
portfolio_id = Column(BigInteger, ForeignKey(Portfolio.id), nullable=False)
|
||||
property_id = Column(BigInteger, ForeignKey(PropertyModel.id), nullable=False)
|
||||
scenario_id = Column(BigInteger, ForeignKey('scenario.id')) # Doesn't have to be linked to a scenario
|
||||
created_at = Column(TIMESTAMP, nullable=False, server_default=func.now())
|
||||
is_default = Column(Boolean, nullable=False)
|
||||
valuation_increase_lower_bound = Column(Float)
|
||||
valuation_increase_upper_bound = Column(Float)
|
||||
valuation_increase_average = Column(Float)
|
||||
plan_type = Column(
|
||||
id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True)
|
||||
|
||||
name: Mapped[Optional[str]] = mapped_column(String, nullable=True, default="")
|
||||
|
||||
portfolio_id: Mapped[int] = mapped_column(
|
||||
BigInteger, ForeignKey(Portfolio.id), nullable=False
|
||||
)
|
||||
|
||||
property_id: Mapped[int] = mapped_column(
|
||||
BigInteger, ForeignKey(PropertyModel.id), nullable=False
|
||||
)
|
||||
|
||||
scenario_id: Mapped[Optional[int]] = mapped_column(
|
||||
BigInteger, ForeignKey("scenario.id")
|
||||
)
|
||||
|
||||
created_at: Mapped[datetime] = mapped_column( # type: ignore
|
||||
TIMESTAMP, nullable=False, server_default=func.now()
|
||||
)
|
||||
|
||||
is_default: Mapped[bool] = mapped_column(Boolean, nullable=False)
|
||||
|
||||
valuation_increase_lower_bound: Mapped[Optional[float]] = mapped_column(Float)
|
||||
valuation_increase_upper_bound: Mapped[Optional[float]] = mapped_column(Float)
|
||||
valuation_increase_average: Mapped[Optional[float]] = mapped_column(Float)
|
||||
|
||||
plan_type: Mapped[Optional[PlanTypeEnum]] = mapped_column(
|
||||
Enum(
|
||||
PlanTypeEnum,
|
||||
name="plan_type",
|
||||
|
|
@ -79,73 +116,90 @@ class Plan(Base):
|
|||
),
|
||||
nullable=True,
|
||||
)
|
||||
post_sap_points = Column(Float)
|
||||
post_epc_rating = Column(Enum(Epc))
|
||||
post_co2_emissions = Column(Float)
|
||||
co2_savings = Column(Float)
|
||||
post_energy_bill = Column(Float)
|
||||
energy_bill_savings = Column(Float)
|
||||
post_energy_consumption = Column(Float) # energy demand in kWh/year
|
||||
energy_consumption_savings = Column(Float)
|
||||
valuation_post_retrofit = Column(Float)
|
||||
valuation_increase = Column(Float)
|
||||
|
||||
post_sap_points: Mapped[Optional[float]] = mapped_column(Float)
|
||||
post_epc_rating: Mapped[Optional[Epc]] = mapped_column(Enum(Epc))
|
||||
post_co2_emissions: Mapped[Optional[float]] = mapped_column(Float)
|
||||
co2_savings: Mapped[Optional[float]] = mapped_column(Float)
|
||||
post_energy_bill: Mapped[Optional[float]] = mapped_column(Float)
|
||||
energy_bill_savings: Mapped[Optional[float]] = mapped_column(Float)
|
||||
post_energy_consumption: Mapped[Optional[float]] = mapped_column(Float)
|
||||
energy_consumption_savings: Mapped[Optional[float]] = mapped_column(Float)
|
||||
valuation_post_retrofit: Mapped[Optional[float]] = mapped_column(Float)
|
||||
valuation_increase: Mapped[Optional[float]] = mapped_column(Float)
|
||||
|
||||
# Financial metrics, excluding funding
|
||||
cost_of_works = Column(Float)
|
||||
contingency_cost = Column(Float)
|
||||
cost_of_works: Mapped[Optional[float]] = mapped_column(Float)
|
||||
contingency_cost: Mapped[Optional[float]] = mapped_column(Float)
|
||||
|
||||
|
||||
class PlanRecommendations(Base):
|
||||
__tablename__ = 'plan_recommendations'
|
||||
__tablename__ = "plan_recommendations"
|
||||
|
||||
id = Column(BigInteger, primary_key=True, autoincrement=True)
|
||||
plan_id = Column(BigInteger, ForeignKey('plan.id'), nullable=False)
|
||||
recommendation_id = Column(BigInteger, ForeignKey('recommendation.id'), nullable=False)
|
||||
plan_id = Column(BigInteger, ForeignKey("plan.id"), nullable=False)
|
||||
recommendation_id = Column(
|
||||
BigInteger, ForeignKey("recommendation.id"), nullable=False
|
||||
)
|
||||
|
||||
|
||||
class Scenario(Base):
|
||||
__tablename__ = 'scenario'
|
||||
class ScenarioModel(Base):
|
||||
__tablename__ = "scenario"
|
||||
|
||||
id = Column(BigInteger, primary_key=True, autoincrement=True)
|
||||
name = Column(String, nullable=False)
|
||||
created_at = Column(TIMESTAMP, nullable=False, server_default=func.now())
|
||||
budget = Column(Float)
|
||||
portfolio_id = Column(BigInteger, ForeignKey(Portfolio.id), nullable=False)
|
||||
housing_type = Column(String, nullable=False)
|
||||
goal = Column(String, nullable=False)
|
||||
goal_value = Column(String, nullable=False)
|
||||
trigger_file_path = Column(String, nullable=False)
|
||||
already_installed_file_path = Column(String)
|
||||
patches_file_path = Column(String)
|
||||
non_invasive_recommendations_file_path = Column(String)
|
||||
exclusions = Column(String)
|
||||
multi_plan = Column(Boolean, default=False)
|
||||
is_default = Column(Boolean, default=False, nullable=False)
|
||||
id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True)
|
||||
name: Mapped[str] = mapped_column(String, nullable=False)
|
||||
created_at: Mapped[datetime] = mapped_column(
|
||||
TIMESTAMP, nullable=False, server_default=func.now()
|
||||
)
|
||||
budget: Mapped[Optional[float]] = mapped_column(Float)
|
||||
portfolio_id: Mapped[int] = mapped_column(
|
||||
BigInteger, ForeignKey(Portfolio.id), nullable=False
|
||||
)
|
||||
housing_type: Mapped[str] = mapped_column(String, nullable=False)
|
||||
goal: Mapped[PortfolioGoal] = mapped_column(
|
||||
Enum(PortfolioGoal, values_callable=portfolio_goal_values, name="goal"),
|
||||
nullable=False,
|
||||
)
|
||||
goal_value: Mapped[str] = mapped_column(String, nullable=False)
|
||||
trigger_file_path: Mapped[str] = mapped_column(String, nullable=False)
|
||||
already_installed_file_path: Mapped[Optional[str]] = mapped_column(String)
|
||||
patches_file_path: Mapped[Optional[str]] = mapped_column(String)
|
||||
non_invasive_recommendations_file_path: Mapped[Optional[str]] = mapped_column(
|
||||
String
|
||||
)
|
||||
exclusions: Mapped[Optional[str]] = mapped_column(String)
|
||||
multi_plan: Mapped[bool] = mapped_column(Boolean, default=False)
|
||||
is_default: Mapped[bool] = mapped_column(Boolean, default=False, nullable=False)
|
||||
|
||||
# Add in the fields we need, which were previously sitting at the portfolio level
|
||||
cost = Column(Float)
|
||||
contingency = Column(Float)
|
||||
funding = Column(Float)
|
||||
total_work_hours = Column(Float)
|
||||
energy_savings = Column(Float)
|
||||
co2_equivalent_savings = Column(Float)
|
||||
energy_cost_savings = Column(Float)
|
||||
epc_breakdown_pre_retrofit = Column(String)
|
||||
epc_breakdown_post_retrofit = Column(String)
|
||||
number_of_properties = Column(BigInteger)
|
||||
n_units_to_retrofit = Column(BigInteger)
|
||||
co2_per_unit_pre_retrofit = Column(String)
|
||||
co2_per_unit_post_retrofit = Column(String)
|
||||
energy_bill_per_unit_pre_retrofit = Column(String)
|
||||
energy_bill_per_unit_post_retrofit = Column(String)
|
||||
energy_consumption_per_unit_pre_retrofit = Column(String)
|
||||
energy_consumption_per_unit_post_retrofit = Column(String)
|
||||
valuation_improvement_per_unit = Column(String)
|
||||
cost_per_unit = Column(String)
|
||||
cost_per_co2_saved = Column(String)
|
||||
cost_per_sap_point = Column(String)
|
||||
valuation_return_on_investment = Column(String)
|
||||
property_valuation_increase = Column(Float)
|
||||
labour_days = Column(Float)
|
||||
cost: Mapped[Optional[float]] = mapped_column(Float)
|
||||
contingency: Mapped[Optional[float]] = mapped_column(Float)
|
||||
funding: Mapped[Optional[float]] = mapped_column(Float)
|
||||
total_work_hours: Mapped[Optional[float]] = mapped_column(Float)
|
||||
energy_savings: Mapped[Optional[float]] = mapped_column(Float)
|
||||
co2_equivalent_savings: Mapped[Optional[float]] = mapped_column(Float)
|
||||
energy_cost_savings: Mapped[Optional[float]] = mapped_column(Float)
|
||||
epc_breakdown_pre_retrofit: Mapped[Optional[str]] = mapped_column(String)
|
||||
epc_breakdown_post_retrofit: Mapped[Optional[str]] = mapped_column(String)
|
||||
number_of_properties: Mapped[Optional[int]] = mapped_column(BigInteger)
|
||||
n_units_to_retrofit: Mapped[Optional[int]] = mapped_column(BigInteger)
|
||||
co2_per_unit_pre_retrofit: Mapped[Optional[str]] = mapped_column(String)
|
||||
co2_per_unit_post_retrofit: Mapped[Optional[str]] = mapped_column(String)
|
||||
energy_bill_per_unit_pre_retrofit: Mapped[Optional[str]] = mapped_column(String)
|
||||
energy_bill_per_unit_post_retrofit: Mapped[Optional[str]] = mapped_column(String)
|
||||
energy_consumption_per_unit_pre_retrofit: Mapped[Optional[str]] = mapped_column(
|
||||
String
|
||||
)
|
||||
energy_consumption_per_unit_post_retrofit: Mapped[Optional[str]] = mapped_column(
|
||||
String
|
||||
)
|
||||
valuation_improvement_per_unit: Mapped[Optional[str]] = mapped_column(String)
|
||||
cost_per_unit: Mapped[Optional[str]] = mapped_column(String)
|
||||
cost_per_co2_saved: Mapped[Optional[str]] = mapped_column(String)
|
||||
cost_per_sap_point: Mapped[Optional[str]] = mapped_column(String)
|
||||
valuation_return_on_investment: Mapped[Optional[str]] = mapped_column(String)
|
||||
property_valuation_increase: Mapped[Optional[float]] = mapped_column(Float)
|
||||
labour_days: Mapped[Optional[float]] = mapped_column(Float)
|
||||
|
||||
|
||||
class MeasureType(enum.Enum):
|
||||
|
|
@ -201,3 +255,12 @@ class InstalledMeasure(Base):
|
|||
heat_demand_savings = Column(Float)
|
||||
source = Column(String)
|
||||
is_active = Column(Boolean, nullable=False, default=True)
|
||||
|
||||
|
||||
def enum_values(e: Iterable[PlanTypeEnum]) -> list[str]:
|
||||
return [m.value for m in e]
|
||||
|
||||
|
||||
class PlanPersistence(NamedTuple):
|
||||
plan: PlanModel
|
||||
scenario: ScenarioModel
|
||||
|
|
|
|||
150
backend/app/domain/classes/plan.py
Normal file
150
backend/app/domain/classes/plan.py
Normal file
|
|
@ -0,0 +1,150 @@
|
|||
from __future__ import annotations
|
||||
from dataclasses import replace
|
||||
from typing import Optional
|
||||
|
||||
from backend.app.db.models.portfolio import PortfolioGoal
|
||||
from backend.app.db.models.recommendations import (
|
||||
PlanModel,
|
||||
PlanPersistence,
|
||||
ScenarioModel,
|
||||
)
|
||||
from backend.app.domain.classes.scenario import Scenario
|
||||
from backend.app.domain.records.plan_record import PlanRecord
|
||||
from backend.app.utils import sap_to_epc
|
||||
|
||||
|
||||
class Plan:
|
||||
def __init__(
|
||||
self, record: PlanRecord, scenario: Scenario, id: Optional[int] = None
|
||||
):
|
||||
self.id: Optional[int] = id
|
||||
self.record: PlanRecord = record
|
||||
self.scenario: Scenario = scenario
|
||||
|
||||
@classmethod
|
||||
def from_sqlalchemy(cls, plan_model: PlanModel, scenario: Scenario) -> Plan:
|
||||
if not scenario:
|
||||
raise ValueError(f"No Scenario associated with Plan of ID {plan_model.id}")
|
||||
|
||||
record = PlanRecord(
|
||||
property_id=plan_model.property_id,
|
||||
portfolio_id=plan_model.portfolio_id,
|
||||
created_at=plan_model.created_at,
|
||||
is_default=plan_model.is_default,
|
||||
valuation_increase_lower_bound=plan_model.valuation_increase_lower_bound,
|
||||
valuation_increase_upper_bound=plan_model.valuation_increase_upper_bound,
|
||||
valuation_increase_average=plan_model.valuation_increase_average,
|
||||
plan_type=plan_model.plan_type,
|
||||
post_sap_points=plan_model.post_sap_points,
|
||||
post_epc_rating=plan_model.post_epc_rating,
|
||||
post_co2_emissions=plan_model.post_co2_emissions,
|
||||
co2_savings=plan_model.co2_savings,
|
||||
post_energy_bill=plan_model.post_energy_bill,
|
||||
energy_bill_savings=plan_model.energy_bill_savings,
|
||||
post_energy_consumption=plan_model.post_energy_consumption,
|
||||
energy_consumption_savings=plan_model.energy_consumption_savings,
|
||||
valuation_post_retrofit=plan_model.valuation_post_retrofit,
|
||||
valuation_increase=plan_model.valuation_increase,
|
||||
cost_of_works=plan_model.cost_of_works,
|
||||
contingency_cost=plan_model.contingency_cost,
|
||||
)
|
||||
return cls(record=record, scenario=scenario, id=plan_model.id)
|
||||
|
||||
@property
|
||||
def is_compliant(self) -> bool:
|
||||
goal: PortfolioGoal = self.scenario.record.goal
|
||||
|
||||
match goal:
|
||||
case PortfolioGoal.INCREASING_EPC:
|
||||
return self._is_compliant_epc()
|
||||
case _:
|
||||
raise NotImplementedError
|
||||
|
||||
def to_sqlalchemy(self) -> PlanPersistence:
|
||||
scenario_record = self.scenario.record
|
||||
|
||||
scenario_model = ScenarioModel(
|
||||
id=self.scenario.id,
|
||||
name=scenario_record.name,
|
||||
created_at=scenario_record.created_at,
|
||||
housing_type=scenario_record.housing_type,
|
||||
goal=scenario_record.goal,
|
||||
goal_value=scenario_record.goal_value,
|
||||
trigger_file_path=scenario_record.trigger_file_path,
|
||||
multi_plan=scenario_record.multi_plan,
|
||||
is_default=scenario_record.is_default,
|
||||
budget=scenario_record.budget,
|
||||
already_installed_file_path=scenario_record.already_installed_file_path,
|
||||
patches_file_path=scenario_record.patches_file_path,
|
||||
non_invasive_recommendations_file_path=scenario_record.non_invasive_recommendations_file_path,
|
||||
exclusions=scenario_record.exclusions,
|
||||
cost=scenario_record.cost,
|
||||
contingency=scenario_record.contingency,
|
||||
funding=scenario_record.funding,
|
||||
total_work_hours=scenario_record.total_work_hours,
|
||||
energy_savings=scenario_record.energy_savings,
|
||||
co2_equivalent_savings=scenario_record.co2_equivalent_savings,
|
||||
energy_cost_savings=scenario_record.energy_cost_savings,
|
||||
epc_breakdown_pre_retrofit=scenario_record.epc_breakdown_pre_retrofit,
|
||||
epc_breakdown_post_retrofit=scenario_record.epc_breakdown_post_retrofit,
|
||||
number_of_properties=scenario_record.number_of_properties,
|
||||
n_units_to_retrofit=scenario_record.n_units_to_retrofit,
|
||||
co2_per_unit_pre_retrofit=scenario_record.co2_per_unit_pre_retrofit,
|
||||
co2_per_unit_post_retrofit=scenario_record.co2_per_unit_post_retrofit,
|
||||
energy_bill_per_unit_pre_retrofit=scenario_record.energy_bill_per_unit_pre_retrofit,
|
||||
energy_bill_per_unit_post_retrofit=scenario_record.energy_bill_per_unit_post_retrofit,
|
||||
energy_consumption_per_unit_pre_retrofit=scenario_record.energy_consumption_per_unit_pre_retrofit,
|
||||
energy_consumption_per_unit_post_retrofit=scenario_record.energy_consumption_per_unit_post_retrofit,
|
||||
valuation_improvement_per_unit=scenario_record.valuation_improvement_per_unit,
|
||||
cost_per_unit=scenario_record.cost_per_unit,
|
||||
cost_per_co2_saved=scenario_record.cost_per_co2_saved,
|
||||
cost_per_sap_point=scenario_record.cost_per_sap_point,
|
||||
valuation_return_on_investment=scenario_record.valuation_return_on_investment,
|
||||
property_valuation_increase=scenario_record.property_valuation_increase,
|
||||
labour_days=scenario_record.labour_days,
|
||||
)
|
||||
|
||||
record = self.record
|
||||
|
||||
plan_model = PlanModel(
|
||||
id=self.id,
|
||||
property_id=record.property_id,
|
||||
portfolio_id=record.portfolio_id,
|
||||
scenario_id=self.scenario.id,
|
||||
created_at=record.created_at,
|
||||
is_default=record.is_default,
|
||||
valuation_increase_lower_bound=record.valuation_increase_lower_bound,
|
||||
valuation_increase_upper_bound=record.valuation_increase_upper_bound,
|
||||
valuation_increase_average=record.valuation_increase_average,
|
||||
plan_type=record.plan_type,
|
||||
post_sap_points=record.post_sap_points,
|
||||
post_epc_rating=record.post_epc_rating,
|
||||
post_co2_emissions=record.post_co2_emissions,
|
||||
co2_savings=record.co2_savings,
|
||||
post_energy_bill=record.post_energy_bill,
|
||||
energy_bill_savings=record.energy_bill_savings,
|
||||
post_energy_consumption=record.post_energy_consumption,
|
||||
energy_consumption_savings=record.energy_consumption_savings,
|
||||
valuation_post_retrofit=record.valuation_post_retrofit,
|
||||
valuation_increase=record.valuation_increase,
|
||||
cost_of_works=record.cost_of_works,
|
||||
contingency_cost=record.contingency_cost,
|
||||
)
|
||||
|
||||
return PlanPersistence(plan=plan_model, scenario=scenario_model)
|
||||
|
||||
def set_default(self, value: bool) -> None:
|
||||
self.record = replace(self.record, is_default=value)
|
||||
self.scenario.record = replace(self.scenario.record, is_default=value)
|
||||
|
||||
def _is_compliant_epc(self) -> bool:
|
||||
goal_value: str = self.scenario.record.goal_value
|
||||
|
||||
if self.record.post_epc_rating:
|
||||
post_epc = self.record.post_epc_rating.value
|
||||
elif self.record.post_sap_points:
|
||||
post_epc = sap_to_epc(self.record.post_sap_points)
|
||||
else:
|
||||
return False
|
||||
|
||||
return post_epc <= goal_value
|
||||
58
backend/app/domain/classes/scenario.py
Normal file
58
backend/app/domain/classes/scenario.py
Normal file
|
|
@ -0,0 +1,58 @@
|
|||
from __future__ import annotations
|
||||
from dataclasses import replace
|
||||
from typing import Optional
|
||||
|
||||
from backend.app.db.models.recommendations import ScenarioModel
|
||||
from backend.app.domain.records.scenario_record import ScenarioRecord
|
||||
|
||||
|
||||
class Scenario:
|
||||
def __init__(self, record: ScenarioRecord, id: Optional[int] = None):
|
||||
self.id = id
|
||||
self.record = record
|
||||
|
||||
@classmethod
|
||||
def from_sqlalchemy(cls, scenario_model: ScenarioModel) -> Scenario:
|
||||
record = ScenarioRecord(
|
||||
name=scenario_model.name,
|
||||
created_at=scenario_model.created_at,
|
||||
housing_type=scenario_model.housing_type,
|
||||
goal=scenario_model.goal,
|
||||
goal_value=scenario_model.goal_value,
|
||||
trigger_file_path=scenario_model.trigger_file_path,
|
||||
multi_plan=scenario_model.multi_plan,
|
||||
is_default=scenario_model.is_default,
|
||||
budget=scenario_model.budget,
|
||||
already_installed_file_path=scenario_model.already_installed_file_path,
|
||||
patches_file_path=scenario_model.patches_file_path,
|
||||
non_invasive_recommendations_file_path=scenario_model.non_invasive_recommendations_file_path,
|
||||
exclusions=scenario_model.exclusions,
|
||||
cost=scenario_model.cost,
|
||||
contingency=scenario_model.contingency,
|
||||
funding=scenario_model.funding,
|
||||
total_work_hours=scenario_model.total_work_hours,
|
||||
energy_savings=scenario_model.energy_savings,
|
||||
co2_equivalent_savings=scenario_model.co2_equivalent_savings,
|
||||
energy_cost_savings=scenario_model.energy_cost_savings,
|
||||
epc_breakdown_pre_retrofit=scenario_model.epc_breakdown_pre_retrofit,
|
||||
epc_breakdown_post_retrofit=scenario_model.epc_breakdown_post_retrofit,
|
||||
number_of_properties=scenario_model.number_of_properties,
|
||||
n_units_to_retrofit=scenario_model.n_units_to_retrofit,
|
||||
co2_per_unit_pre_retrofit=scenario_model.co2_per_unit_pre_retrofit,
|
||||
co2_per_unit_post_retrofit=scenario_model.co2_per_unit_post_retrofit,
|
||||
energy_bill_per_unit_pre_retrofit=scenario_model.energy_bill_per_unit_pre_retrofit,
|
||||
energy_bill_per_unit_post_retrofit=scenario_model.energy_bill_per_unit_post_retrofit,
|
||||
energy_consumption_per_unit_pre_retrofit=scenario_model.energy_consumption_per_unit_pre_retrofit,
|
||||
energy_consumption_per_unit_post_retrofit=scenario_model.energy_consumption_per_unit_post_retrofit,
|
||||
valuation_improvement_per_unit=scenario_model.valuation_improvement_per_unit,
|
||||
cost_per_unit=scenario_model.cost_per_unit,
|
||||
cost_per_co2_saved=scenario_model.cost_per_co2_saved,
|
||||
cost_per_sap_point=scenario_model.cost_per_sap_point,
|
||||
valuation_return_on_investment=scenario_model.valuation_return_on_investment,
|
||||
property_valuation_increase=scenario_model.property_valuation_increase,
|
||||
labour_days=scenario_model.labour_days,
|
||||
)
|
||||
return cls(record, scenario_model.id)
|
||||
|
||||
def set_default(self, value: bool) -> None:
|
||||
self.record = replace(self.record, is_default=value)
|
||||
31
backend/app/domain/records/plan_record.py
Normal file
31
backend/app/domain/records/plan_record.py
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
|
||||
from backend.app.db.models.portfolio import Epc
|
||||
from backend.app.db.models.recommendations import PlanTypeEnum
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class PlanRecord:
|
||||
property_id: int
|
||||
portfolio_id: int
|
||||
created_at: datetime
|
||||
is_default: bool
|
||||
|
||||
valuation_increase_lower_bound: Optional[float] = None
|
||||
valuation_increase_upper_bound: Optional[float] = None
|
||||
valuation_increase_average: Optional[float] = None
|
||||
plan_type: Optional[PlanTypeEnum] = None
|
||||
post_sap_points: Optional[float] = None
|
||||
post_epc_rating: Optional[Epc] = None
|
||||
post_co2_emissions: Optional[float] = None
|
||||
co2_savings: Optional[float] = None
|
||||
post_energy_bill: Optional[float] = None
|
||||
energy_bill_savings: Optional[float] = None
|
||||
post_energy_consumption: Optional[float] = None
|
||||
energy_consumption_savings: Optional[float] = None
|
||||
valuation_post_retrofit: Optional[float] = None
|
||||
valuation_increase: Optional[float] = None
|
||||
cost_of_works: Optional[float] = None
|
||||
contingency_cost: Optional[float] = None
|
||||
47
backend/app/domain/records/scenario_record.py
Normal file
47
backend/app/domain/records/scenario_record.py
Normal file
|
|
@ -0,0 +1,47 @@
|
|||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
|
||||
from backend.app.db.models.portfolio import PortfolioGoal
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ScenarioRecord:
|
||||
name: str
|
||||
created_at: datetime
|
||||
housing_type: str
|
||||
goal: PortfolioGoal
|
||||
goal_value: str
|
||||
trigger_file_path: str
|
||||
multi_plan: bool
|
||||
is_default: bool
|
||||
budget: Optional[float] = None
|
||||
already_installed_file_path: Optional[str] = None
|
||||
patches_file_path: Optional[str] = None
|
||||
non_invasive_recommendations_file_path: Optional[str] = None
|
||||
exclusions: Optional[str] = None
|
||||
|
||||
cost: Optional[float] = None
|
||||
contingency: Optional[float] = None
|
||||
funding: Optional[float] = None
|
||||
total_work_hours: Optional[float] = None
|
||||
energy_savings: Optional[float] = None
|
||||
co2_equivalent_savings: Optional[float] = None
|
||||
energy_cost_savings: Optional[float] = None
|
||||
epc_breakdown_pre_retrofit: Optional[str] = None
|
||||
epc_breakdown_post_retrofit: Optional[str] = None
|
||||
number_of_properties: Optional[int] = None
|
||||
n_units_to_retrofit: Optional[int] = None
|
||||
co2_per_unit_pre_retrofit: Optional[str] = None
|
||||
co2_per_unit_post_retrofit: Optional[str] = None
|
||||
energy_bill_per_unit_pre_retrofit: Optional[str] = None
|
||||
energy_bill_per_unit_post_retrofit: Optional[str] = None
|
||||
energy_consumption_per_unit_pre_retrofit: Optional[str] = None
|
||||
energy_consumption_per_unit_post_retrofit: Optional[str] = None
|
||||
valuation_improvement_per_unit: Optional[str] = None
|
||||
cost_per_unit: Optional[str] = None
|
||||
cost_per_co2_saved: Optional[str] = None
|
||||
cost_per_sap_point: Optional[str] = None
|
||||
valuation_return_on_investment: Optional[str] = None
|
||||
property_valuation_increase: Optional[float] = None
|
||||
labour_days: Optional[float] = None
|
||||
|
|
@ -10,7 +10,7 @@ mangum==0.19.0
|
|||
# AWS
|
||||
boto3==1.35.44
|
||||
# Data
|
||||
openpyxl==3.1.2
|
||||
openpyxl==3.1.5
|
||||
# Basic
|
||||
pytz
|
||||
sqlmodel
|
||||
0
backend/categorisation/__init__.py
Normal file
0
backend/categorisation/__init__.py
Normal file
5
backend/categorisation/categorisation_trigger_request.py
Normal file
5
backend/categorisation/categorisation_trigger_request.py
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class CategorisationTriggerRequest(BaseModel):
|
||||
portfolio_id: int
|
||||
47
backend/categorisation/handler/Dockerfile
Normal file
47
backend/categorisation/handler/Dockerfile
Normal file
|
|
@ -0,0 +1,47 @@
|
|||
FROM public.ecr.aws/lambda/python:3.11
|
||||
# For local running:
|
||||
# FROM python:3.11.10-bullseye
|
||||
|
||||
ARG DEV_DB_HOST
|
||||
ARG DEV_DB_PORT
|
||||
ARG DEV_DB_NAME
|
||||
|
||||
|
||||
# Set working directory (Lambda task root)
|
||||
WORKDIR /var/task
|
||||
|
||||
# Environment
|
||||
ENV DB_HOST=${DEV_DB_HOST}
|
||||
ENV DB_PORT=${DEV_DB_PORT}
|
||||
ENV DB_NAME=${DEV_DB_NAME}
|
||||
|
||||
COPY backend/.env.test backend/.env
|
||||
|
||||
# -----------------------------
|
||||
# Copy requirements FIRST (for Docker layer caching)
|
||||
# -----------------------------
|
||||
COPY backend/categorisation/handler/requirements.txt .
|
||||
|
||||
# Install dependencies into Lambda runtime
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# -----------------------------
|
||||
# Copy application code
|
||||
# -----------------------------
|
||||
COPY utils/ utils/
|
||||
COPY backend/categorisation/ backend/categorisation/
|
||||
|
||||
COPY backend/app/db/connection.py backend/app/db/connection.py
|
||||
COPY backend/app/config.py backend/app/config.py
|
||||
|
||||
COPY backend/__init__.py backend/__init__.py
|
||||
COPY backend/app/__init__.py backend/app/__init__.py
|
||||
COPY backend/app/db/__init__.py backend/app/db/__init__.py
|
||||
|
||||
|
||||
# -----------------------------
|
||||
# Lambda handler
|
||||
# -----------------------------
|
||||
CMD ["backend/categorisation/handler/handler.handler"]
|
||||
# For local running
|
||||
# CMD ["python", "-m", "backend.categorisation.handler.handler"]
|
||||
10
backend/categorisation/handler/handler.py
Normal file
10
backend/categorisation/handler/handler.py
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
from typing import Any, Mapping
|
||||
from utils.logger import setup_logger
|
||||
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
def handler(event: Mapping[str, Any], context: Any) -> None:
|
||||
|
||||
pass
|
||||
3
backend/categorisation/handler/requirements.txt
Normal file
3
backend/categorisation/handler/requirements.txt
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
sqlmodel
|
||||
pydantic-settings
|
||||
psycopg2-binary==2.9.10
|
||||
11
backend/categorisation/local_runner.py
Normal file
11
backend/categorisation/local_runner.py
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
from backend.categorisation.processor import process_portfolio
|
||||
|
||||
|
||||
def main() -> None:
|
||||
portfolio_id = 556
|
||||
|
||||
process_portfolio(portfolio_id)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
93
backend/categorisation/processor.py
Normal file
93
backend/categorisation/processor.py
Normal file
|
|
@ -0,0 +1,93 @@
|
|||
from collections import defaultdict
|
||||
from typing import Dict, List
|
||||
|
||||
from backend.app.db.functions.recommendations_functions import (
|
||||
bulk_update_plans,
|
||||
get_plans_by_portfolio_id,
|
||||
get_scenario,
|
||||
)
|
||||
from backend.app.db.models.recommendations import PlanModel, ScenarioModel
|
||||
from backend.app.domain.classes.plan import Plan
|
||||
from backend.app.domain.classes.scenario import Scenario
|
||||
from utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
def process_portfolio(portfolio_id: int) -> None:
|
||||
print(f"Processing portfolio {portfolio_id}")
|
||||
plans: List[Plan] = _load_plans_for_portfolio(portfolio_id)
|
||||
plans_by_property: Dict[int, List[Plan]] = _group_plans_by_property(plans)
|
||||
|
||||
for uprn, property_plans in plans_by_property.items():
|
||||
|
||||
if not property_plans:
|
||||
raise ValueError(f"No plans for property {uprn}")
|
||||
|
||||
cheapest_plan = _choose_cheapest_relevant_plan(property_plans)
|
||||
_update_default_flags(property_plans, cheapest_plan)
|
||||
|
||||
|
||||
def _load_plans_for_portfolio(portfolio_id: int) -> List[Plan]:
|
||||
plan_models = get_plans_by_portfolio_id(portfolio_id)
|
||||
print(f"Got {len(plan_models)} plans from database")
|
||||
|
||||
plans: List[Plan] = []
|
||||
|
||||
for model in plan_models:
|
||||
if not model.scenario_id:
|
||||
logger.info(f"No Scenario associated with Plan of ID {model.id}")
|
||||
continue
|
||||
|
||||
scenario_model = get_scenario(model.scenario_id)
|
||||
plans.append(
|
||||
Plan.from_sqlalchemy(model, Scenario.from_sqlalchemy(scenario_model))
|
||||
)
|
||||
print("Successfully mapped plan and scenario to domain object")
|
||||
|
||||
return plans
|
||||
|
||||
|
||||
def _group_plans_by_property(plans: List[Plan]) -> Dict[int, List[Plan]]:
|
||||
grouped: dict[int, List[Plan]] = defaultdict(list)
|
||||
|
||||
for plan in plans:
|
||||
grouped[plan.record.property_id].append(plan)
|
||||
|
||||
return grouped
|
||||
|
||||
|
||||
def _choose_cheapest_relevant_plan(plans: List[Plan]) -> Plan:
|
||||
plans_to_consider: List[Plan] = [p for p in plans if p.is_compliant] or plans
|
||||
|
||||
def plan_cost(plan: Plan) -> float:
|
||||
return (
|
||||
plan.record.cost_of_works
|
||||
if plan.record.cost_of_works is not None
|
||||
else float("inf")
|
||||
)
|
||||
|
||||
cheapest_plan = min(plans_to_consider, key=plan_cost)
|
||||
|
||||
return cheapest_plan
|
||||
|
||||
|
||||
def _update_default_flags(plans: List[Plan], cheapest_plan: Plan) -> None:
|
||||
plans_to_update: List[Plan] = []
|
||||
|
||||
for plan in plans:
|
||||
should_be_default: bool = plan.id == cheapest_plan.id
|
||||
if plan.record.is_default != should_be_default:
|
||||
plan.set_default(should_be_default)
|
||||
plans_to_update.append(plan)
|
||||
|
||||
if plans_to_update:
|
||||
plan_models: List[PlanModel] = []
|
||||
scenario_models: List[ScenarioModel] = []
|
||||
|
||||
for plan in plans_to_update:
|
||||
plan_model, scenario_model = plan.to_sqlalchemy()
|
||||
plan_models.append(plan_model)
|
||||
scenario_models.append(scenario_model)
|
||||
|
||||
bulk_update_plans(plan_models, scenario_models)
|
||||
73
backend/categorisation/tests/test_plan_is_compliant.py
Normal file
73
backend/categorisation/tests/test_plan_is_compliant.py
Normal file
|
|
@ -0,0 +1,73 @@
|
|||
from typing import Callable
|
||||
import pytest
|
||||
from datetime import datetime
|
||||
|
||||
from backend.app.domain.classes.plan import Plan
|
||||
from backend.app.domain.classes.scenario import Scenario
|
||||
from backend.app.domain.records.plan_record import PlanRecord
|
||||
from backend.app.domain.records.scenario_record import ScenarioRecord
|
||||
from backend.app.db.models.portfolio import Epc, PortfolioGoal
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def created_at_datetime() -> datetime:
|
||||
return datetime.now()
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def epc_c_scenario(created_at_datetime: datetime) -> "Scenario":
|
||||
# arrange
|
||||
scenario_record = ScenarioRecord(
|
||||
name="EPC C",
|
||||
created_at=created_at_datetime,
|
||||
housing_type="",
|
||||
goal=PortfolioGoal.INCREASING_EPC,
|
||||
goal_value="C",
|
||||
trigger_file_path="",
|
||||
multi_plan=False,
|
||||
is_default=False,
|
||||
)
|
||||
return Scenario(record=scenario_record, id=1)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def plan_factory(
|
||||
epc_c_scenario: "Scenario", created_at_datetime: datetime
|
||||
) -> Callable[[int, "Epc"], "Plan"]:
|
||||
# returns a function to create plans with different attributes
|
||||
def _create_plan(post_sap_points: int, post_epc_rating: "Epc") -> "Plan":
|
||||
plan_record = PlanRecord(
|
||||
property_id=1,
|
||||
portfolio_id=1,
|
||||
created_at=created_at_datetime,
|
||||
is_default=False,
|
||||
post_sap_points=post_sap_points,
|
||||
post_epc_rating=post_epc_rating,
|
||||
)
|
||||
return Plan(record=plan_record, scenario=epc_c_scenario, id=1)
|
||||
|
||||
return _create_plan
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"post_sap_points, post_epc_rating, expected_compliance",
|
||||
[
|
||||
(75, Epc.C, True),
|
||||
(100, Epc.A, True),
|
||||
(60, Epc.D, False),
|
||||
],
|
||||
)
|
||||
def test_scenario_goal_is_epc_c(
|
||||
plan_factory: Callable[[int, "Epc"], "Plan"],
|
||||
post_sap_points: int,
|
||||
post_epc_rating: "Epc",
|
||||
expected_compliance: bool,
|
||||
) -> None:
|
||||
# arrange
|
||||
plan = plan_factory(post_sap_points, post_epc_rating)
|
||||
|
||||
# act
|
||||
actual_compliance: bool = plan.is_compliant
|
||||
|
||||
# assert
|
||||
assert actual_compliance == expected_compliance
|
||||
|
|
@ -29,5 +29,5 @@ class ConditionTriggerRequest(BaseModel):
|
|||
# {
|
||||
# "file_type": "LBWF",
|
||||
# "trigger_file_bucket": "condition-data-dev",
|
||||
# "trigger_file_key": "input/lbwf/LBWF - Example Asset Data September 2025.xlsx",
|
||||
# "trigger_file_key": "input/lbwf/LBWF - Example Asset Data September 2025.xlsx"
|
||||
# }
|
||||
|
|
|
|||
|
|
@ -1,9 +1,28 @@
|
|||
FROM public.ecr.aws/lambda/python:3.10
|
||||
FROM public.ecr.aws/lambda/python:3.11
|
||||
|
||||
ARG DEV_DB_HOST
|
||||
ARG DEV_DB_PORT
|
||||
ARG DEV_DB_NAME
|
||||
|
||||
ENV DB_HOST=${DEV_DB_HOST}
|
||||
ENV DB_PORT=${DEV_DB_PORT}
|
||||
ENV DB_NAME=${DEV_DB_NAME}
|
||||
|
||||
# Set working directory (Lambda task root)
|
||||
WORKDIR /var/task
|
||||
|
||||
# -----------------------------
|
||||
COPY backend/postcode_splitter/handler/requirements.txt .
|
||||
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Copy necessary files for database and utility imports
|
||||
COPY utils/ utils/
|
||||
COPY backend/ backend/
|
||||
COPY datatypes/ datatypes/
|
||||
|
||||
# Copy the handler
|
||||
COPY backend/postcode_splitter/main.py .
|
||||
|
||||
# Lambda handler
|
||||
# -----------------------------
|
||||
CMD ["main.handler"]
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,11 @@
|
|||
pandas==2.2.2
|
||||
numpy<2.0
|
||||
requests
|
||||
tqdm
|
||||
openpyxl
|
||||
epc-api-python==1.0.2
|
||||
boto3==1.35.44
|
||||
sqlmodel
|
||||
sqlalchemy==2.0.36
|
||||
psycopg2-binary==2.9.10
|
||||
pydantic-settings==2.6.0
|
||||
|
|
@ -1,127 +1,278 @@
|
|||
import os
|
||||
import sys
|
||||
import json
|
||||
import pandas as pd
|
||||
import requests
|
||||
from backend.address2UPRN.main import (
|
||||
resolve_uprns_for_postcode_group,
|
||||
get_epc_data_with_postcode,
|
||||
import boto3
|
||||
from uuid import UUID, uuid4
|
||||
from utils.s3 import (
|
||||
read_csv_from_s3 as read_csv_from_s3_dict,
|
||||
save_csv_to_s3,
|
||||
parse_s3_uri,
|
||||
)
|
||||
from utils.logger import setup_logger
|
||||
from tqdm import tqdm
|
||||
from backend.app.db.functions.tasks.Tasks import SubTaskInterface
|
||||
from datetime import datetime
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
def sanitise_postcode(postcode: str) -> str | None:
|
||||
def upload_batch_to_s3(
|
||||
batch_df: pd.DataFrame, task_id: str, sub_task_id: str, bucket_name: str = None
|
||||
) -> str:
|
||||
"""
|
||||
Normalise postcode for grouping.
|
||||
|
||||
- Uppercase
|
||||
- Remove all whitespace
|
||||
Upload batch DataFrame to S3 as CSV.
|
||||
"""
|
||||
if pd.isna(postcode):
|
||||
return None
|
||||
if bucket_name is None:
|
||||
bucket_name = os.getenv("S3_BUCKET_NAME")
|
||||
|
||||
return postcode.upper().replace(" ", "")
|
||||
|
||||
|
||||
def is_valid_postcode(postcode_clean: str) -> bool:
|
||||
"""
|
||||
Validate postcode using postcodes.io.
|
||||
|
||||
Expects a sanitised postcode (e.g. E84SQ).
|
||||
Returns True if valid, False otherwise.
|
||||
"""
|
||||
POSTCODES_IO_VALIDATE_URL = "https://api.postcodes.io/postcodes/{postcode}/validate"
|
||||
if not postcode_clean:
|
||||
return False
|
||||
if not bucket_name:
|
||||
logger.error(
|
||||
"S3 bucket name not provided and S3_BUCKET_NAME environment variable not set"
|
||||
)
|
||||
raise ValueError("S3_BUCKET_NAME not configured")
|
||||
|
||||
try:
|
||||
resp = requests.get(
|
||||
POSTCODES_IO_VALIDATE_URL.format(postcode=postcode_clean),
|
||||
timeout=5,
|
||||
file_name = f"{datetime.now().isoformat()}_{str(uuid4())[:8]}"
|
||||
file_key = (
|
||||
f"ara_postcode_splitter_batches/{task_id}/{sub_task_id}/{file_name}.csv"
|
||||
)
|
||||
resp.raise_for_status()
|
||||
return resp.json().get("result", False)
|
||||
except requests.RequestException:
|
||||
# Network issues, rate limits, etc.
|
||||
return False
|
||||
|
||||
success = save_csv_to_s3(batch_df, bucket_name, file_key)
|
||||
|
||||
if success:
|
||||
s3_uri = f"s3://{bucket_name}/{file_key}"
|
||||
logger.info(f"Successfully uploaded batch to {s3_uri}")
|
||||
return s3_uri
|
||||
else:
|
||||
logger.error(f"Failed to upload batch to S3")
|
||||
raise ValueError("Failed to save CSV to S3")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error uploading batch to S3: {str(e)}")
|
||||
raise
|
||||
|
||||
|
||||
def main():
|
||||
df = pd.read_excel("hackney.xlsx", sheet_name="Sustainability")
|
||||
df = df.head(500)
|
||||
def send_to_address2uprn_queue(task_id: str, sub_task_id: str, s3_uri: str) -> str:
|
||||
"""
|
||||
Send a batch to the address2UPRN SQS queue with S3 reference.
|
||||
|
||||
# Sanitise postcodes
|
||||
df["postcode_clean"] = df["Postcode"].apply(sanitise_postcode)
|
||||
Args:
|
||||
task_id: The parent task ID
|
||||
sub_task_id: The new subtask ID for this batch
|
||||
s3_uri: S3 URI pointing to the batch CSV file
|
||||
|
||||
# --- validate AFTER grouping (save API calls) ---
|
||||
Returns:
|
||||
Message ID from SQS
|
||||
"""
|
||||
sqs_client = boto3.client("sqs")
|
||||
queue_url = os.getenv("ADDRESS2UPRN_QUEUE_URL")
|
||||
|
||||
# Get unique, non-null postcodes
|
||||
unique_postcodes = df["postcode_clean"].dropna().unique()
|
||||
if not queue_url:
|
||||
raise ValueError("ADDRESS2UPRN_QUEUE_URL environment variable not set")
|
||||
|
||||
# Validate each postcode once, TODOadd a progress bar
|
||||
postcode_validity = {
|
||||
pc: is_valid_postcode(pc)
|
||||
for pc in tqdm(unique_postcodes, total=len(unique_postcodes))
|
||||
message_body = {
|
||||
"task_id": task_id,
|
||||
"sub_task_id": sub_task_id,
|
||||
"s3_uri": s3_uri,
|
||||
}
|
||||
|
||||
# Map validity back onto dataframe
|
||||
df["postcode_valid"] = df["postcode_clean"].map(postcode_validity)
|
||||
response = sqs_client.send_message(
|
||||
QueueUrl=queue_url,
|
||||
MessageBody=json.dumps(message_body),
|
||||
)
|
||||
|
||||
logger.info(
|
||||
f"Sent message to address2UPRN queue. "
|
||||
f"Task: {task_id}, SubTask: {sub_task_id}, MessageId: {response['MessageId']}"
|
||||
)
|
||||
|
||||
return response["MessageId"]
|
||||
|
||||
|
||||
def create_batch_and_send_to_address2uprn(
|
||||
batch_df: pd.DataFrame,
|
||||
task_id: str,
|
||||
sub_task_id: str,
|
||||
subtask_interface: SubTaskInterface,
|
||||
bucket_name: str,
|
||||
) -> str:
|
||||
"""
|
||||
Create a batch DataFrame, upload to S3, create subtask, and send to address2UPRN queue.
|
||||
|
||||
"""
|
||||
# Upload batch to S3
|
||||
|
||||
s3_uri = upload_batch_to_s3(batch_df, str(task_id), str(sub_task_id), bucket_name)
|
||||
|
||||
# Create a new subtask for this batch with all inputs
|
||||
created_batch_sub_task_id = subtask_interface.create_subtask(
|
||||
task_id=task_id,
|
||||
inputs={
|
||||
"task_id": str(task_id),
|
||||
"s3_uri": s3_uri,
|
||||
},
|
||||
)
|
||||
|
||||
logger.info(f"Created batch subtask {created_batch_sub_task_id}")
|
||||
|
||||
# Send message with S3 reference
|
||||
send_to_address2uprn_queue(
|
||||
task_id=str(task_id),
|
||||
sub_task_id=str(created_batch_sub_task_id),
|
||||
s3_uri=s3_uri,
|
||||
)
|
||||
|
||||
return created_batch_sub_task_id
|
||||
|
||||
|
||||
def handler(event, context, local=False):
|
||||
print(f"Function: {context.function_name}")
|
||||
print(f"Request ID: {context.aws_request_id}")
|
||||
|
||||
# Example SQS message for testing (copy and paste into SQS):
|
||||
if local is True:
|
||||
event = {
|
||||
"Records": [
|
||||
{
|
||||
"body": json.dumps(
|
||||
{
|
||||
"task_id": "e31f2f21-175b-4a91-a3ec-a6baa325e917",
|
||||
"sub_task_id": "8673913b-1a88-42d7-8578-0449123d94b0",
|
||||
"s3_uri": "s3://retrofit-data-dev/ara_raw_inputs/peabody/2025_11_11 - Peabody - Data Extracts for Domna_transformed.csv",
|
||||
}
|
||||
)
|
||||
}
|
||||
]
|
||||
}
|
||||
# Handle both single event and batch events (SQS, etc.)
|
||||
records = event.get("Records", [event])
|
||||
results = []
|
||||
errors = []
|
||||
subtask_interface = SubTaskInterface()
|
||||
bucket_name = os.getenv("S3_BUCKET_NAME")
|
||||
if local:
|
||||
bucket_name = "retrofit-data-dev"
|
||||
|
||||
for postcode, group_df in tqdm(
|
||||
df[df["postcode_valid"]].groupby("postcode_clean"),
|
||||
desc="Resolving UPRNs by postcode",
|
||||
):
|
||||
try:
|
||||
epc_df = get_epc_data_with_postcode(postcode)
|
||||
for record in records:
|
||||
if local:
|
||||
record = records[0]
|
||||
task_id = None
|
||||
subtask_id = None
|
||||
# Parse body (inputs)
|
||||
|
||||
if epc_df.empty:
|
||||
tmp = group_df.copy()
|
||||
tmp["found_uprn"] = None
|
||||
tmp["status"] = "no_epc_results"
|
||||
results.append(tmp)
|
||||
continue
|
||||
if isinstance(record.get("body"), str):
|
||||
body = json.loads(record["body"])
|
||||
else:
|
||||
body = record.get("body", {})
|
||||
|
||||
resolved = resolve_uprns_for_postcode_group(
|
||||
group_df=group_df,
|
||||
epc_df=epc_df,
|
||||
# Validate required fields
|
||||
task_id = body.get("task_id")
|
||||
subtask_id = body.get("sub_task_id")
|
||||
s3_uri = body.get("s3_uri")
|
||||
|
||||
# Convert task_id to UUID
|
||||
task_id = UUID(task_id) if isinstance(task_id, str) else task_id
|
||||
subtask_id = UUID(subtask_id) if isinstance(subtask_id, str) else subtask_id
|
||||
|
||||
# Mark subtask as in progress
|
||||
subtask_interface.update_subtask_status(subtask_id, "in progress")
|
||||
logger.info(f"Marked subtask {subtask_id} as in progress")
|
||||
|
||||
# Read CSV from S3
|
||||
bucket, key = parse_s3_uri(s3_uri)
|
||||
logger.info(f"S3 Bucket: {bucket}, Key: {key}")
|
||||
|
||||
csv_data = read_csv_from_s3_dict(bucket, key)
|
||||
df = pd.DataFrame(csv_data)
|
||||
|
||||
logger.info(f"CSV loaded: {len(df)} rows, {len(df.columns)} columns")
|
||||
|
||||
# Sanitise postcodes
|
||||
df["postcode_clean"] = df["postcode"].str.upper().str.replace(" ", "")
|
||||
|
||||
df = df.dropna(subset=["postcode_clean"])
|
||||
|
||||
batch_size = 500
|
||||
if df.shape[0] < batch_size:
|
||||
create_batch_and_send_to_address2uprn(
|
||||
batch_df=df,
|
||||
task_id=task_id,
|
||||
sub_task_id=subtask_id,
|
||||
subtask_interface=subtask_interface,
|
||||
bucket_name=bucket_name,
|
||||
)
|
||||
else:
|
||||
postcode_to_addresses = {
|
||||
postcode: group
|
||||
for postcode, group in df.groupby("postcode_clean", sort=False)
|
||||
}
|
||||
|
||||
results.append(resolved)
|
||||
count = 0
|
||||
buffer = []
|
||||
|
||||
except Exception as e:
|
||||
tmp = group_df.copy()
|
||||
tmp["found_uprn"] = None
|
||||
tmp["status"] = "exception"
|
||||
tmp["error"] = str(e)
|
||||
results.append(tmp)
|
||||
for postcode, group_df in postcode_to_addresses.items():
|
||||
group_len = len(group_df)
|
||||
|
||||
final_df = pd.concat(results, ignore_index=True)
|
||||
a = final_df[
|
||||
[
|
||||
"best_match_lexiscore",
|
||||
"Address 1",
|
||||
"best_match_address",
|
||||
"Postcode",
|
||||
"UPRN",
|
||||
"best_match_uprn",
|
||||
]
|
||||
] # add levi score to viewing
|
||||
b = final_df[final_df["best_match_lexiscore"] > 0] # add levi score to viewing
|
||||
b = b[
|
||||
[
|
||||
"best_match_lexiscore",
|
||||
"Address 1",
|
||||
"best_match_address",
|
||||
"Postcode",
|
||||
"UPRN",
|
||||
"best_match_uprn",
|
||||
]
|
||||
]
|
||||
# If single postcode is bigger than batch_size → send directly
|
||||
if group_len >= batch_size:
|
||||
if buffer:
|
||||
create_batch_and_send_to_address2uprn(
|
||||
batch_df=pd.concat(buffer, ignore_index=True),
|
||||
task_id=task_id,
|
||||
sub_task_id=subtask_id,
|
||||
subtask_interface=subtask_interface,
|
||||
bucket_name=bucket_name,
|
||||
)
|
||||
buffer = []
|
||||
count = 0
|
||||
|
||||
create_batch_and_send_to_address2uprn(
|
||||
batch_df=group_df,
|
||||
task_id=task_id,
|
||||
sub_task_id=subtask_id,
|
||||
subtask_interface=subtask_interface,
|
||||
bucket_name=bucket_name,
|
||||
)
|
||||
continue
|
||||
|
||||
def handler(event, context):
|
||||
print("hello Postcode splitter world")
|
||||
return {"statusCode": 200, "body": "hello world"}
|
||||
# If adding would exceed batch → flush first
|
||||
if count + group_len > batch_size:
|
||||
create_batch_and_send_to_address2uprn(
|
||||
batch_df=pd.concat(buffer, ignore_index=True),
|
||||
task_id=task_id,
|
||||
sub_task_id=subtask_id,
|
||||
subtask_interface=subtask_interface,
|
||||
bucket_name=bucket_name,
|
||||
)
|
||||
buffer = []
|
||||
count = 0
|
||||
|
||||
# Add group
|
||||
buffer.append(group_df)
|
||||
count += group_len
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
# Final flush
|
||||
if buffer:
|
||||
create_batch_and_send_to_address2uprn(
|
||||
batch_df=pd.concat(buffer, ignore_index=True),
|
||||
task_id=task_id,
|
||||
sub_task_id=subtask_id,
|
||||
subtask_interface=subtask_interface,
|
||||
bucket_name=bucket_name,
|
||||
)
|
||||
|
||||
# Mark subtask as completed
|
||||
subtask_interface.update_subtask_status(
|
||||
subtask_id,
|
||||
"completed",
|
||||
outputs={"rows_processed": "completed"},
|
||||
)
|
||||
|
||||
return {
|
||||
"statusCode": 200,
|
||||
"body": json.dumps(
|
||||
{"processed": results, "errors": errors if errors else None}
|
||||
),
|
||||
}
|
||||
|
|
|
|||
|
|
@ -41,7 +41,10 @@ epc_data = pd.read_csv(
|
|||
|
||||
# Classify floor area in <73m2, 73-98, 99-200, 200+
|
||||
epc_data["floor_area_bracket"] = epc_data["total_floor_area"].apply(
|
||||
lambda x: "<73" if x < 73 else "73-98" if x < 99 else "99-200" if x < 200 else "200+")
|
||||
lambda x: (
|
||||
"<73" if x < 73 else "73-98" if x < 99 else "99-200" if x < 200 else "200+"
|
||||
)
|
||||
)
|
||||
|
||||
# 73-98 185
|
||||
# <73 156
|
||||
|
|
@ -65,7 +68,11 @@ import pandas as pd
|
|||
import numpy as np
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
from backend.app.db.connection import db_engine
|
||||
from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations
|
||||
from backend.app.db.models.recommendations import (
|
||||
Recommendation,
|
||||
PlanModel,
|
||||
PlanRecommendations,
|
||||
)
|
||||
from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
|
||||
|
||||
|
||||
|
|
@ -74,56 +81,79 @@ def get_data(portfolio_id, scenario_ids):
|
|||
session.begin()
|
||||
|
||||
# Get properties and their details for a specific portfolio
|
||||
properties_query = session.query(
|
||||
PropertyModel,
|
||||
PropertyDetailsEpcModel
|
||||
).join(
|
||||
PropertyDetailsEpcModel, PropertyModel.id == PropertyDetailsEpcModel.property_id
|
||||
).filter(
|
||||
PropertyModel.portfolio_id == portfolio_id # Filter by portfolio ID
|
||||
).all()
|
||||
properties_query = (
|
||||
session.query(PropertyModel, PropertyDetailsEpcModel)
|
||||
.join(
|
||||
PropertyDetailsEpcModel,
|
||||
PropertyModel.id == PropertyDetailsEpcModel.property_id,
|
||||
)
|
||||
.filter(PropertyModel.portfolio_id == portfolio_id) # Filter by portfolio ID
|
||||
.all()
|
||||
)
|
||||
|
||||
# Transform properties data to include all fields dynamically
|
||||
properties_data = [
|
||||
{**{col.name: getattr(prop.PropertyModel, col.name) for col in PropertyModel.__table__.columns},
|
||||
**{col.name: getattr(prop.PropertyDetailsEpcModel, col.name) for col in
|
||||
PropertyDetailsEpcModel.__table__.columns}}
|
||||
{
|
||||
**{
|
||||
col.name: getattr(prop.PropertyModel, col.name)
|
||||
for col in PropertyModel.__table__.columns
|
||||
},
|
||||
**{
|
||||
col.name: getattr(prop.PropertyDetailsEpcModel, col.name)
|
||||
for col in PropertyDetailsEpcModel.__table__.columns
|
||||
},
|
||||
}
|
||||
for prop in properties_query
|
||||
]
|
||||
|
||||
# Get property IDs from fetched properties
|
||||
|
||||
# Get plans linked to the fetched properties
|
||||
plans_query = session.query(Plan).filter(Plan.scenario_id.in_(scenario_ids)).all()
|
||||
plans_query = (
|
||||
session.query(PlanModel).filter(PlanModel.scenario_id.in_(scenario_ids)).all()
|
||||
)
|
||||
|
||||
# Transform plans data to include all fields dynamically
|
||||
plans_data = [
|
||||
{col.name: getattr(plan, col.name) for col in Plan.__table__.columns}
|
||||
{col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns}
|
||||
for plan in plans_query
|
||||
]
|
||||
|
||||
# Extract plan IDs for filtering recommendations through PlanRecommendations
|
||||
plan_ids = [plan['id'] for plan in plans_data]
|
||||
plan_ids = [plan["id"] for plan in plans_data]
|
||||
|
||||
# Get recommendations through PlanRecommendations for those plans and that are default
|
||||
recommendations_query = session.query(
|
||||
Recommendation,
|
||||
Plan.scenario_id
|
||||
).join(
|
||||
PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id
|
||||
).join(
|
||||
Plan, Plan.id == PlanRecommendations.plan_id # Join with Plan to access scenario_id
|
||||
).filter(
|
||||
PlanRecommendations.plan_id.in_(plan_ids),
|
||||
Recommendation.default == True # Filtering for default recommendations
|
||||
).all()
|
||||
recommendations_query = (
|
||||
session.query(Recommendation, PlanModel.scenario_id)
|
||||
.join(
|
||||
PlanRecommendations,
|
||||
Recommendation.id == PlanRecommendations.recommendation_id,
|
||||
)
|
||||
.join(
|
||||
PlanModel,
|
||||
PlanModel.id
|
||||
== PlanRecommendations.plan_id, # Join with Plan to access scenario_id
|
||||
)
|
||||
.filter(
|
||||
PlanRecommendations.plan_id.in_(plan_ids),
|
||||
Recommendation.default == True, # Filtering for default recommendations
|
||||
)
|
||||
.all()
|
||||
)
|
||||
|
||||
# Transform recommendations data to include all fields dynamically and include scenario_id
|
||||
recommendations_data = [
|
||||
{**{col.name: getattr(rec.Recommendation, col.name) if hasattr(rec, 'Recommendation') else getattr(rec,
|
||||
col.name) for
|
||||
col in Recommendation.__table__.columns},
|
||||
"Scenario ID": rec.scenario_id}
|
||||
{
|
||||
**{
|
||||
col.name: (
|
||||
getattr(rec.Recommendation, col.name)
|
||||
if hasattr(rec, "Recommendation")
|
||||
else getattr(rec, col.name)
|
||||
)
|
||||
for col in Recommendation.__table__.columns
|
||||
},
|
||||
"Scenario ID": rec.scenario_id,
|
||||
}
|
||||
for rec in recommendations_query
|
||||
]
|
||||
|
||||
|
|
@ -132,7 +162,9 @@ def get_data(portfolio_id, scenario_ids):
|
|||
return properties_data, plans_data, recommendations_data
|
||||
|
||||
|
||||
properties_data, plans_data, recommendations_data = get_data(portfolio_id=124, scenario_ids=[205])
|
||||
properties_data, plans_data, recommendations_data = get_data(
|
||||
portfolio_id=124, scenario_ids=[205]
|
||||
)
|
||||
|
||||
properties_df = pd.DataFrame(properties_data)
|
||||
plans_df = pd.DataFrame(plans_data)
|
||||
|
|
@ -147,12 +179,12 @@ recommended_measures_df = recommended_measures_df.drop(columns=["default"])
|
|||
post_install_sap = recommendations_df[["property_id", "default", "sap_points"]]
|
||||
post_install_sap = post_install_sap[post_install_sap["default"]]
|
||||
# Sum up the sap points by property id
|
||||
post_install_sap = post_install_sap.groupby("property_id")[["sap_points"]].sum().reset_index()
|
||||
post_install_sap = (
|
||||
post_install_sap.groupby("property_id")[["sap_points"]].sum().reset_index()
|
||||
)
|
||||
|
||||
recommendations_measures_pivot = recommended_measures_df.pivot(
|
||||
index='property_id',
|
||||
columns='measure_type',
|
||||
values='estimated_cost'
|
||||
index="property_id", columns="measure_type", values="estimated_cost"
|
||||
)
|
||||
recommendations_measures_pivot = recommendations_measures_pivot.reset_index()
|
||||
|
||||
|
|
@ -163,7 +195,7 @@ recommendations_measures_pivot = recommendations_measures_pivot.rename(
|
|||
"double_glazing": "Cost: Double Glazing",
|
||||
"loft_insulation": "Cost: Loft Insulation",
|
||||
"mechanical_ventilation": "Cost: Ventilation",
|
||||
"solar_pv": "Cost: Solar PV"
|
||||
"solar_pv": "Cost: Solar PV",
|
||||
}
|
||||
)
|
||||
recommendations_measures_pivot = recommendations_measures_pivot.fillna(0)
|
||||
|
|
@ -186,16 +218,26 @@ recommendations_measures_pivot["Recommendation: Solar PV"] = (
|
|||
recommendations_measures_pivot["Cost: Solar PV"] > 0
|
||||
)
|
||||
|
||||
df = properties_df[
|
||||
[
|
||||
"property_id", "uprn", "address", "postcode", "property_type", "walls", "roof", "heating", "windows",
|
||||
"current_epc_rating",
|
||||
"current_sap_points", "total_floor_area", "number_of_rooms",
|
||||
df = (
|
||||
properties_df[
|
||||
[
|
||||
"property_id",
|
||||
"uprn",
|
||||
"address",
|
||||
"postcode",
|
||||
"property_type",
|
||||
"walls",
|
||||
"roof",
|
||||
"heating",
|
||||
"windows",
|
||||
"current_epc_rating",
|
||||
"current_sap_points",
|
||||
"total_floor_area",
|
||||
"number_of_rooms",
|
||||
]
|
||||
]
|
||||
].merge(
|
||||
recommendations_measures_pivot, how="left", on="property_id"
|
||||
).merge(
|
||||
post_install_sap, how="left", on="property_id"
|
||||
.merge(recommendations_measures_pivot, how="left", on="property_id")
|
||||
.merge(post_install_sap, how="left", on="property_id")
|
||||
)
|
||||
|
||||
df = df.drop(columns=["property_id"])
|
||||
|
|
@ -222,25 +264,36 @@ df["Has Recommendations"] = ~pd.isnull(df["Cost: Air Source Heat Pump"])
|
|||
|
||||
# We fill missings:
|
||||
for col in [
|
||||
"Recommendation: Air Source Heat Pump", "Recommendation: Cavity Wall Insulation",
|
||||
"Recommendation: Double Glazing", "Recommendation: Loft Insulation", "Recommendation: Ventilation",
|
||||
"Recommendation: Solar PV"
|
||||
"Recommendation: Air Source Heat Pump",
|
||||
"Recommendation: Cavity Wall Insulation",
|
||||
"Recommendation: Double Glazing",
|
||||
"Recommendation: Loft Insulation",
|
||||
"Recommendation: Ventilation",
|
||||
"Recommendation: Solar PV",
|
||||
]:
|
||||
df[col] = df[col].fillna(False)
|
||||
|
||||
for col in [
|
||||
"Cost: Air Source Heat Pump", "Cost: Cavity Wall Insulation",
|
||||
"Cost: Double Glazing", "Cost: Loft Insulation", "Cost: Ventilation",
|
||||
"Cost: Solar PV"
|
||||
"Cost: Air Source Heat Pump",
|
||||
"Cost: Cavity Wall Insulation",
|
||||
"Cost: Double Glazing",
|
||||
"Cost: Loft Insulation",
|
||||
"Cost: Ventilation",
|
||||
"Cost: Solar PV",
|
||||
]:
|
||||
df[col] = df[col].fillna(0)
|
||||
|
||||
# Calculate post SAP
|
||||
df["Predicted Post Works SAP"] = df["Current SAP Points"] + df["sap_points"]
|
||||
df["Predicted Post Works SAP"] = df["Predicted Post Works SAP"].round()
|
||||
df["Predicted Post Works EPC"] = df["Predicted Post Works SAP"].apply(lambda x: sap_to_epc(x))
|
||||
df["Predicted Post Works EPC"] = df["Predicted Post Works SAP"].apply(
|
||||
lambda x: sap_to_epc(x)
|
||||
)
|
||||
|
||||
df["Recommendation: Air Source Heat Pump"].sum()
|
||||
df["Cost: Air Source Heat Pump"].sum()
|
||||
|
||||
df.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/L&G/Basildon Data Export - 2.csv", index=False)
|
||||
df.to_csv(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/L&G/Basildon Data Export - 2.csv",
|
||||
index=False,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -4,7 +4,11 @@ import numpy as np
|
|||
from backend.app.utils import sap_to_epc
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
from backend.app.db.connection import db_engine
|
||||
from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations
|
||||
from backend.app.db.models.recommendations import (
|
||||
Recommendation,
|
||||
PlanModel,
|
||||
PlanRecommendations,
|
||||
)
|
||||
from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
|
||||
|
||||
|
||||
|
|
@ -13,56 +17,79 @@ def get_data(portfolio_id, scenario_ids):
|
|||
session.begin()
|
||||
|
||||
# Get properties and their details for a specific portfolio
|
||||
properties_query = session.query(
|
||||
PropertyModel,
|
||||
PropertyDetailsEpcModel
|
||||
).join(
|
||||
PropertyDetailsEpcModel, PropertyModel.id == PropertyDetailsEpcModel.property_id
|
||||
).filter(
|
||||
PropertyModel.portfolio_id == portfolio_id # Filter by portfolio ID
|
||||
).all()
|
||||
properties_query = (
|
||||
session.query(PropertyModel, PropertyDetailsEpcModel)
|
||||
.join(
|
||||
PropertyDetailsEpcModel,
|
||||
PropertyModel.id == PropertyDetailsEpcModel.property_id,
|
||||
)
|
||||
.filter(PropertyModel.portfolio_id == portfolio_id) # Filter by portfolio ID
|
||||
.all()
|
||||
)
|
||||
|
||||
# Transform properties data to include all fields dynamically
|
||||
properties_data = [
|
||||
{**{col.name: getattr(prop.PropertyModel, col.name) for col in PropertyModel.__table__.columns},
|
||||
**{col.name: getattr(prop.PropertyDetailsEpcModel, col.name) for col in
|
||||
PropertyDetailsEpcModel.__table__.columns}}
|
||||
{
|
||||
**{
|
||||
col.name: getattr(prop.PropertyModel, col.name)
|
||||
for col in PropertyModel.__table__.columns
|
||||
},
|
||||
**{
|
||||
col.name: getattr(prop.PropertyDetailsEpcModel, col.name)
|
||||
for col in PropertyDetailsEpcModel.__table__.columns
|
||||
},
|
||||
}
|
||||
for prop in properties_query
|
||||
]
|
||||
|
||||
# Get property IDs from fetched properties
|
||||
|
||||
# Get plans linked to the fetched properties
|
||||
plans_query = session.query(Plan).filter(Plan.scenario_id.in_(scenario_ids)).all()
|
||||
plans_query = (
|
||||
session.query(PlanModel).filter(PlanModel.scenario_id.in_(scenario_ids)).all()
|
||||
)
|
||||
|
||||
# Transform plans data to include all fields dynamically
|
||||
plans_data = [
|
||||
{col.name: getattr(plan, col.name) for col in Plan.__table__.columns}
|
||||
{col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns}
|
||||
for plan in plans_query
|
||||
]
|
||||
|
||||
# Extract plan IDs for filtering recommendations through PlanRecommendations
|
||||
plan_ids = [plan['id'] for plan in plans_data]
|
||||
plan_ids = [plan["id"] for plan in plans_data]
|
||||
|
||||
# Get recommendations through PlanRecommendations for those plans and that are default
|
||||
recommendations_query = session.query(
|
||||
Recommendation,
|
||||
Plan.scenario_id
|
||||
).join(
|
||||
PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id
|
||||
).join(
|
||||
Plan, Plan.id == PlanRecommendations.plan_id # Join with Plan to access scenario_id
|
||||
).filter(
|
||||
PlanRecommendations.plan_id.in_(plan_ids),
|
||||
Recommendation.default == True # Filtering for default recommendations
|
||||
).all()
|
||||
recommendations_query = (
|
||||
session.query(Recommendation, PlanModel.scenario_id)
|
||||
.join(
|
||||
PlanRecommendations,
|
||||
Recommendation.id == PlanRecommendations.recommendation_id,
|
||||
)
|
||||
.join(
|
||||
PlanModel,
|
||||
PlanModel.id
|
||||
== PlanRecommendations.plan_id, # Join with Plan to access scenario_id
|
||||
)
|
||||
.filter(
|
||||
PlanRecommendations.plan_id.in_(plan_ids),
|
||||
Recommendation.default == True, # Filtering for default recommendations
|
||||
)
|
||||
.all()
|
||||
)
|
||||
|
||||
# Transform recommendations data to include all fields dynamically and include scenario_id
|
||||
recommendations_data = [
|
||||
{**{col.name: getattr(rec.Recommendation, col.name) if hasattr(rec, 'Recommendation')
|
||||
else getattr(rec, col.name) for
|
||||
col in Recommendation.__table__.columns},
|
||||
"Scenario ID": rec.scenario_id}
|
||||
{
|
||||
**{
|
||||
col.name: (
|
||||
getattr(rec.Recommendation, col.name)
|
||||
if hasattr(rec, "Recommendation")
|
||||
else getattr(rec, col.name)
|
||||
)
|
||||
for col in Recommendation.__table__.columns
|
||||
},
|
||||
"Scenario ID": rec.scenario_id,
|
||||
}
|
||||
for rec in recommendations_query
|
||||
]
|
||||
|
||||
|
|
@ -94,16 +121,34 @@ def app():
|
|||
)
|
||||
|
||||
property_asset_data = properties_df.merge(
|
||||
mod_property_data.drop(columns=["address", "postcode", "tenure"]), how="left", on="uprn"
|
||||
mod_property_data.drop(columns=["address", "postcode", "tenure"]),
|
||||
how="left",
|
||||
on="uprn",
|
||||
)
|
||||
|
||||
property_asset_data["is_pitched"] = property_asset_data["roof"].str.contains("pitched", case=False)
|
||||
property_asset_data["is_pitched"] = property_asset_data["roof"].str.contains(
|
||||
"pitched", case=False
|
||||
)
|
||||
property_asset_data["pre_1970"] = property_asset_data["BUILD_YEAR"] < 1970
|
||||
property_asset_data["wall_type"] = property_asset_data["walls"].str.split(" ").str[0].str.strip()
|
||||
property_asset_data["is_insulated"] = (
|
||||
property_asset_data["walls"].str.split(",").str[1].str.strip().isin(
|
||||
["filled cavity", "with external insulation", "filled cavity and external insulation"]
|
||||
) | property_asset_data["walls"].str.split(",").str[2].str.strip().isin(["insulated"])
|
||||
property_asset_data["wall_type"] = (
|
||||
property_asset_data["walls"].str.split(" ").str[0].str.strip()
|
||||
)
|
||||
property_asset_data["is_insulated"] = property_asset_data["walls"].str.split(
|
||||
","
|
||||
).str[1].str.strip().isin(
|
||||
[
|
||||
"filled cavity",
|
||||
"with external insulation",
|
||||
"filled cavity and external insulation",
|
||||
]
|
||||
) | property_asset_data[
|
||||
"walls"
|
||||
].str.split(
|
||||
","
|
||||
).str[
|
||||
2
|
||||
].str.strip().isin(
|
||||
["insulated"]
|
||||
)
|
||||
property_asset_data["is_insulated"] = np.where(
|
||||
property_asset_data["is_insulated"], "Insulated", "Uninsulated"
|
||||
|
|
@ -115,18 +160,26 @@ def app():
|
|||
property_asset_data["pre_1970"], "Pre 1970", "Post 1970"
|
||||
)
|
||||
|
||||
archetype_variables = ["property_type", "wall_type", "is_insulated", "is_pitched", "pre_1970"]
|
||||
archetype_variables = [
|
||||
"property_type",
|
||||
"wall_type",
|
||||
"is_insulated",
|
||||
"is_pitched",
|
||||
"pre_1970",
|
||||
]
|
||||
|
||||
assigned_archetypes = (
|
||||
property_asset_data.groupby(
|
||||
archetype_variables
|
||||
).size().reset_index().rename(columns={0: "n_properties"}).sort_values("n_properties", ascending=False)
|
||||
property_asset_data.groupby(archetype_variables)
|
||||
.size()
|
||||
.reset_index()
|
||||
.rename(columns={0: "n_properties"})
|
||||
.sort_values("n_properties", ascending=False)
|
||||
)
|
||||
|
||||
# Make the archetype ID a concatenation of the variables
|
||||
assigned_archetypes["archetype_id"] = assigned_archetypes[archetype_variables].apply(
|
||||
lambda x: "_".join(x.astype(str)), axis=1
|
||||
)
|
||||
assigned_archetypes["archetype_id"] = assigned_archetypes[
|
||||
archetype_variables
|
||||
].apply(lambda x: "_".join(x.astype(str)), axis=1)
|
||||
|
||||
# Most prominent archetypes
|
||||
prominent_archetypes = assigned_archetypes.head(6)
|
||||
|
|
@ -136,7 +189,7 @@ def app():
|
|||
property_asset_data = property_asset_data.merge(
|
||||
assigned_archetypes[archetype_variables + ["archetype_id"]],
|
||||
how="left",
|
||||
on=archetype_variables
|
||||
on=archetype_variables,
|
||||
)
|
||||
|
||||
# Create age bands:
|
||||
|
|
@ -148,7 +201,7 @@ def app():
|
|||
property_asset_data["age_band"] = pd.cut(
|
||||
property_asset_data["BUILD_YEAR"],
|
||||
bins=[1959, 1969, 1979, 1989, 1999, 2022],
|
||||
labels=["1960-1969", "1970-1979", "1980-1989", "1990-1999", "2000+"]
|
||||
labels=["1960-1969", "1970-1979", "1980-1989", "1990-1999", "2000+"],
|
||||
)
|
||||
|
||||
# Create floor area bands
|
||||
|
|
@ -159,47 +212,59 @@ def app():
|
|||
property_asset_data["floor_area_band"] = pd.cut(
|
||||
property_asset_data["total_floor_area"],
|
||||
bins=[0, 73, 97, 199, 10000],
|
||||
labels=["0-73", "74-97", "98-199", "200+"]
|
||||
labels=["0-73", "74-97", "98-199", "200+"],
|
||||
)
|
||||
|
||||
property_asset_data["archetype_group"] = property_asset_data["archetype_id"].copy()
|
||||
property_asset_data["archetype_group"] = np.where(
|
||||
property_asset_data["archetype_id"].isin(other_archetypes["archetype_id"].values),
|
||||
property_asset_data["archetype_id"].isin(
|
||||
other_archetypes["archetype_id"].values
|
||||
),
|
||||
"other",
|
||||
property_asset_data["archetype_group"]
|
||||
property_asset_data["archetype_group"],
|
||||
)
|
||||
|
||||
# For colour
|
||||
wall_types = (
|
||||
property_asset_data[["wall_type"]].value_counts().to_frame().reset_index().rename(
|
||||
columns={"wall_type": "Wall Type"}
|
||||
)
|
||||
property_asset_data[["wall_type"]]
|
||||
.value_counts()
|
||||
.to_frame()
|
||||
.reset_index()
|
||||
.rename(columns={"wall_type": "Wall Type"})
|
||||
)
|
||||
# Group into age bands
|
||||
ages = (
|
||||
property_asset_data[["age_band"]].value_counts()
|
||||
property_asset_data[["age_band"]]
|
||||
.value_counts()
|
||||
.to_frame()
|
||||
.reset_index().sort_values("age_band", ascending=True)
|
||||
.reset_index()
|
||||
.sort_values("age_band", ascending=True)
|
||||
.rename(columns={"age_band": "Age Band"})
|
||||
)
|
||||
floor_area_bands = (
|
||||
property_asset_data[["floor_area_band"]].value_counts()
|
||||
property_asset_data[["floor_area_band"]]
|
||||
.value_counts()
|
||||
.to_frame()
|
||||
.reset_index().sort_values("floor_area_band", ascending=True)
|
||||
.reset_index()
|
||||
.sort_values("floor_area_band", ascending=True)
|
||||
.rename(columns={"floor_area_band": "Floor Area Band"})
|
||||
)
|
||||
archetype_counts = (
|
||||
property_asset_data[["archetype_group"]].
|
||||
value_counts().
|
||||
to_frame().
|
||||
reset_index()
|
||||
property_asset_data[["archetype_group"]]
|
||||
.value_counts()
|
||||
.to_frame()
|
||||
.reset_index()
|
||||
.rename(columns={"archetype_group": "Archetype"})
|
||||
)
|
||||
property_types = (
|
||||
(property_asset_data["property_type"] + ": " + property_asset_data["built_form"]).
|
||||
value_counts().
|
||||
to_frame().
|
||||
reset_index()
|
||||
(
|
||||
property_asset_data["property_type"]
|
||||
+ ": "
|
||||
+ property_asset_data["built_form"]
|
||||
)
|
||||
.value_counts()
|
||||
.to_frame()
|
||||
.reset_index()
|
||||
.rename(columns={"index": "Property Type", 0: "Count"})
|
||||
)
|
||||
|
||||
|
|
@ -217,18 +282,24 @@ def app():
|
|||
totals = property_asset_data[
|
||||
[
|
||||
"Total_household_members",
|
||||
"co2_emissions", "current_energy_demand", "current_energy_demand_heating_hotwater",
|
||||
"heating_cost_current", "hot_water_cost_current", "lighting_cost_current",
|
||||
"appliances_cost_current", "gas_standing_charge", "electricity_standing_charge"
|
||||
"co2_emissions",
|
||||
"current_energy_demand",
|
||||
"current_energy_demand_heating_hotwater",
|
||||
"heating_cost_current",
|
||||
"hot_water_cost_current",
|
||||
"lighting_cost_current",
|
||||
"appliances_cost_current",
|
||||
"gas_standing_charge",
|
||||
"electricity_standing_charge",
|
||||
]
|
||||
].copy()
|
||||
totals["total_cost"] = (
|
||||
totals["heating_cost_current"] +
|
||||
totals["hot_water_cost_current"] +
|
||||
totals["lighting_cost_current"] +
|
||||
totals["appliances_cost_current"] +
|
||||
totals["gas_standing_charge"] +
|
||||
totals["electricity_standing_charge"]
|
||||
totals["heating_cost_current"]
|
||||
+ totals["hot_water_cost_current"]
|
||||
+ totals["lighting_cost_current"]
|
||||
+ totals["appliances_cost_current"]
|
||||
+ totals["gas_standing_charge"]
|
||||
+ totals["electricity_standing_charge"]
|
||||
)
|
||||
print(
|
||||
totals[
|
||||
|
|
@ -259,38 +330,59 @@ def app():
|
|||
|
||||
scenario_recommendations_df = recommendations_df[
|
||||
recommendations_df["Scenario ID"] == scenario
|
||||
].copy()
|
||||
].copy()
|
||||
|
||||
scenario_recommendations_df["contingency"] = contingency * scenario_recommendations_df["estimated_cost"]
|
||||
scenario_recommendations_df["contingency"] = (
|
||||
contingency * scenario_recommendations_df["estimated_cost"]
|
||||
)
|
||||
scenario_recommendations_df["total_cost"] = (
|
||||
scenario_recommendations_df["estimated_cost"] + scenario_recommendations_df["contingency"]
|
||||
scenario_recommendations_df["estimated_cost"]
|
||||
+ scenario_recommendations_df["contingency"]
|
||||
)
|
||||
|
||||
recommended_measures_df = scenario_recommendations_df[
|
||||
["property_id", "measure_type", "estimated_cost", "default"]
|
||||
]
|
||||
|
||||
recommended_measures_df = recommended_measures_df[recommended_measures_df["default"]]
|
||||
recommended_measures_df = recommended_measures_df[
|
||||
recommended_measures_df["default"]
|
||||
]
|
||||
recommended_measures_df = recommended_measures_df.drop(columns=["default"])
|
||||
|
||||
# Metrics by property ID
|
||||
aggregated_metrics = scenario_recommendations_df[
|
||||
[
|
||||
"property_id", "type", "default", "sap_points",
|
||||
"energy_cost_savings", "kwh_savings", "co2_equivalent_savings", "estimated_cost", "contingency",
|
||||
"total_cost"
|
||||
"property_id",
|
||||
"type",
|
||||
"default",
|
||||
"sap_points",
|
||||
"energy_cost_savings",
|
||||
"kwh_savings",
|
||||
"co2_equivalent_savings",
|
||||
"estimated_cost",
|
||||
"contingency",
|
||||
"total_cost",
|
||||
]
|
||||
]
|
||||
aggregated_metrics = aggregated_metrics[aggregated_metrics["default"]]
|
||||
aggregated_metrics = aggregated_metrics.groupby("property_id")[
|
||||
["sap_points", "co2_equivalent_savings", "energy_cost_savings", "kwh_savings", "estimated_cost",
|
||||
"total_cost", "contingency"]
|
||||
].sum().reset_index()
|
||||
aggregated_metrics = (
|
||||
aggregated_metrics.groupby("property_id")[
|
||||
[
|
||||
"sap_points",
|
||||
"co2_equivalent_savings",
|
||||
"energy_cost_savings",
|
||||
"kwh_savings",
|
||||
"estimated_cost",
|
||||
"total_cost",
|
||||
"contingency",
|
||||
]
|
||||
]
|
||||
.sum()
|
||||
.reset_index()
|
||||
)
|
||||
|
||||
recommendations_measures_pivot = recommended_measures_df.pivot(
|
||||
index='property_id',
|
||||
columns='measure_type',
|
||||
values='estimated_cost'
|
||||
index="property_id", columns="measure_type", values="estimated_cost"
|
||||
)
|
||||
recommendations_measures_pivot = recommendations_measures_pivot.reset_index()
|
||||
recommendations_measures_pivot = recommendations_measures_pivot.fillna(0)
|
||||
|
|
@ -299,30 +391,58 @@ def app():
|
|||
for c in recommendations_measures_pivot.columns:
|
||||
if c == "property_id":
|
||||
continue
|
||||
recommendations_measures_pivot["Recommendation: " + c] = recommendations_measures_pivot[c] > 0
|
||||
recommendations_measures_pivot["Recommendation: " + c] = (
|
||||
recommendations_measures_pivot[c] > 0
|
||||
)
|
||||
|
||||
# We now create a final output
|
||||
df = properties_df[
|
||||
[
|
||||
"property_id", "uprn", "address", "postcode", "property_type", "walls", "roof", "heating", "windows",
|
||||
"current_epc_rating", "current_sap_points", "total_floor_area", "number_of_rooms",
|
||||
"co2_emissions", "current_energy_demand", "current_energy_demand_heating_hotwater",
|
||||
"heating_cost_current", "hot_water_cost_current", "lighting_cost_current",
|
||||
"appliances_cost_current", "gas_standing_charge", "electricity_standing_charge"
|
||||
df = (
|
||||
properties_df[
|
||||
[
|
||||
"property_id",
|
||||
"uprn",
|
||||
"address",
|
||||
"postcode",
|
||||
"property_type",
|
||||
"walls",
|
||||
"roof",
|
||||
"heating",
|
||||
"windows",
|
||||
"current_epc_rating",
|
||||
"current_sap_points",
|
||||
"total_floor_area",
|
||||
"number_of_rooms",
|
||||
"co2_emissions",
|
||||
"current_energy_demand",
|
||||
"current_energy_demand_heating_hotwater",
|
||||
"heating_cost_current",
|
||||
"hot_water_cost_current",
|
||||
"lighting_cost_current",
|
||||
"appliances_cost_current",
|
||||
"gas_standing_charge",
|
||||
"electricity_standing_charge",
|
||||
]
|
||||
]
|
||||
].merge(
|
||||
recommendations_measures_pivot, how="left", on="property_id"
|
||||
).merge(
|
||||
aggregated_metrics, how="left", on="property_id"
|
||||
.merge(recommendations_measures_pivot, how="left", on="property_id")
|
||||
.merge(aggregated_metrics, how="left", on="property_id")
|
||||
)
|
||||
|
||||
df["bills_total_cost"] = (
|
||||
df["heating_cost_current"] + df["hot_water_cost_current"] + df["lighting_cost_current"] +
|
||||
df["appliances_cost_current"] + df["gas_standing_charge"] + df["electricity_standing_charge"]
|
||||
df["heating_cost_current"]
|
||||
+ df["hot_water_cost_current"]
|
||||
+ df["lighting_cost_current"]
|
||||
+ df["appliances_cost_current"]
|
||||
+ df["gas_standing_charge"]
|
||||
+ df["electricity_standing_charge"]
|
||||
)
|
||||
|
||||
df = df.drop(columns=["property_id"])
|
||||
for c in ["sap_points", "co2_equivalent_savings", "energy_cost_savings", "kwh_savings"]:
|
||||
for c in [
|
||||
"sap_points",
|
||||
"co2_equivalent_savings",
|
||||
"energy_cost_savings",
|
||||
"kwh_savings",
|
||||
]:
|
||||
df[c] = df[c].fillna(0)
|
||||
|
||||
df = df.rename(
|
||||
|
|
@ -345,16 +465,23 @@ def app():
|
|||
# Calculate post SAP
|
||||
df["Predicted Post Works SAP"] = df["Current SAP Points"] + df["sap_points"]
|
||||
df["Predicted Post Works SAP"] = df["Predicted Post Works SAP"].round()
|
||||
df["Predicted Post Works EPC"] = df["Predicted Post Works SAP"].apply(lambda x: sap_to_epc(x))
|
||||
df["Predicted Post Works EPC"] = df["Predicted Post Works SAP"].apply(
|
||||
lambda x: sap_to_epc(x)
|
||||
)
|
||||
|
||||
# Calculate the relative savings on carbon, kwh, and bills
|
||||
df["relative_carbon_savings"] = df["co2_equivalent_savings"] / df["co2_emissions"]
|
||||
df["relative_carbon_savings"] = (
|
||||
df["co2_equivalent_savings"] / df["co2_emissions"]
|
||||
)
|
||||
df["relative_kwh_savings"] = df["kwh_savings"] / df["current_energy_demand"]
|
||||
df["relative_bill_savings"] = df["energy_cost_savings"] / df["bills_total_cost"]
|
||||
|
||||
# Add on the archetype
|
||||
df = df.merge(
|
||||
property_asset_data[["uprn", "archetype_group"]], how="left", left_on="UPRN", right_on="uprn"
|
||||
property_asset_data[["uprn", "archetype_group"]],
|
||||
how="left",
|
||||
left_on="UPRN",
|
||||
right_on="uprn",
|
||||
)
|
||||
|
||||
# For properties that don't make it to EPC B, check why. E.g. for a property that has an oil boiler, it
|
||||
|
|
@ -387,7 +514,9 @@ def app():
|
|||
|
||||
printing_scenario_id = scenario_ids[0]
|
||||
# EPC breakdown
|
||||
print(scenario_data[printing_scenario_id]['Predicted Post Works EPC'].value_counts())
|
||||
print(
|
||||
scenario_data[printing_scenario_id]["Predicted Post Works EPC"].value_counts()
|
||||
)
|
||||
# Cost
|
||||
# Total cost
|
||||
print(scenario_data[printing_scenario_id]["total_cost"].sum())
|
||||
|
|
@ -408,16 +537,24 @@ def app():
|
|||
measure_details = {}
|
||||
for scenario in scenario_ids:
|
||||
measure_details[scenario] = {}
|
||||
recommendation_cols = [c for c in scenario_data[scenario].columns if "Recommendation:" in c]
|
||||
measure_details[scenario]["count"] = scenario_data[scenario][recommendation_cols].sum().to_dict()
|
||||
recommendation_cols = [
|
||||
c for c in scenario_data[scenario].columns if "Recommendation:" in c
|
||||
]
|
||||
measure_details[scenario]["count"] = (
|
||||
scenario_data[scenario][recommendation_cols].sum().to_dict()
|
||||
)
|
||||
# Get average cost per measure
|
||||
measure_columns = [
|
||||
c.split("Recommendation: ")[1] for c in scenario_data[scenario].columns if "Recommendation:" in c
|
||||
c.split("Recommendation: ")[1]
|
||||
for c in scenario_data[scenario].columns
|
||||
if "Recommendation:" in c
|
||||
]
|
||||
# Take the mean, drop zero columns
|
||||
measure_costs = {}
|
||||
for m in measure_columns:
|
||||
measure_costs[m] = float(scenario_data[scenario][scenario_data[scenario][m] > 0][m].mean())
|
||||
measure_costs[m] = float(
|
||||
scenario_data[scenario][scenario_data[scenario][m] > 0][m].mean()
|
||||
)
|
||||
measure_details[scenario]["cost_per_measure"] = measure_costs
|
||||
|
||||
pprint(measure_details[scenario_ids[0]]["count"])
|
||||
|
|
@ -452,12 +589,27 @@ def app():
|
|||
for scenario in scenario_ids:
|
||||
df = scenario_data[scenario].copy()
|
||||
|
||||
avg_savings = df[
|
||||
["sap_points", "co2_equivalent_savings", "energy_cost_savings", "kwh_savings", "estimated_cost",
|
||||
"total_cost", "contingency"]
|
||||
].mean().to_dict()
|
||||
avg_savings["cost_per_sap_point"] = avg_savings["total_cost"] / avg_savings["sap_points"]
|
||||
avg_savings["cost_per_carbon"] = avg_savings["total_cost"] / avg_savings["co2_equivalent_savings"]
|
||||
avg_savings = (
|
||||
df[
|
||||
[
|
||||
"sap_points",
|
||||
"co2_equivalent_savings",
|
||||
"energy_cost_savings",
|
||||
"kwh_savings",
|
||||
"estimated_cost",
|
||||
"total_cost",
|
||||
"contingency",
|
||||
]
|
||||
]
|
||||
.mean()
|
||||
.to_dict()
|
||||
)
|
||||
avg_savings["cost_per_sap_point"] = (
|
||||
avg_savings["total_cost"] / avg_savings["sap_points"]
|
||||
)
|
||||
avg_savings["cost_per_carbon"] = (
|
||||
avg_savings["total_cost"] / avg_savings["co2_equivalent_savings"]
|
||||
)
|
||||
scenario_metrics[scenario] = avg_savings
|
||||
|
||||
pprint(scenario_metrics[scenario_ids[0]])
|
||||
|
|
@ -465,11 +617,11 @@ def app():
|
|||
|
||||
scenario_data[scenario_ids[0]]["loft_insulation"][
|
||||
scenario_data[scenario_ids[0]]["loft_insulation"] > 0
|
||||
].mean()
|
||||
].mean()
|
||||
|
||||
scenario_data[scenario_ids[0]]["cavity_wall_insulation"][
|
||||
scenario_data[scenario_ids[0]]["cavity_wall_insulation"] > 0
|
||||
].mean()
|
||||
].mean()
|
||||
|
||||
# Testing checking floor risk
|
||||
|
||||
|
|
@ -477,11 +629,7 @@ def app():
|
|||
|
||||
def get_flood_risk(lat, lon, radius_km=1):
|
||||
url = "https://environment.data.gov.uk/flood-monitoring/id/floods"
|
||||
params = {
|
||||
'lat': lat,
|
||||
'long': lon,
|
||||
'dist': radius_km # search radius in km
|
||||
}
|
||||
params = {"lat": lat, "long": lon, "dist": radius_km} # search radius in km
|
||||
|
||||
response = requests.get(url, params=params)
|
||||
response.raise_for_status()
|
||||
|
|
@ -495,20 +643,19 @@ def app():
|
|||
print(f"{len(flood_warnings)} warning(s) found near the location:")
|
||||
for warning in flood_warnings:
|
||||
print(f"- Area: {warning.get('description')}")
|
||||
print(f" Severity: {warning.get('severity')} (Level {warning.get('severityLevel')})")
|
||||
print(
|
||||
f" Severity: {warning.get('severity')} (Level {warning.get('severityLevel')})"
|
||||
)
|
||||
print(f" Message changed at: {warning.get('timeMessageChanged')}")
|
||||
print()
|
||||
|
||||
return flood_warnings
|
||||
|
||||
from shapely.geometry import shape, Point
|
||||
|
||||
def get_flood_areas_near_point(lat, lon, radius_km=2):
|
||||
url = "https://environment.data.gov.uk/flood-monitoring/id/floodAreas"
|
||||
params = {
|
||||
'lat': lat,
|
||||
'long': lon,
|
||||
'dist': radius_km
|
||||
}
|
||||
params = {"lat": lat, "long": lon, "dist": radius_km}
|
||||
|
||||
response = requests.get(url, params=params)
|
||||
response.raise_for_status()
|
||||
|
|
@ -531,7 +678,7 @@ def app():
|
|||
if not features:
|
||||
continue
|
||||
|
||||
flood_polygon = shape(features[0]['geometry'])
|
||||
flood_polygon = shape(features[0]["geometry"])
|
||||
|
||||
try:
|
||||
is_inside = flood_polygon.contains(point)
|
||||
|
|
@ -539,12 +686,17 @@ def app():
|
|||
is_inside = False
|
||||
|
||||
if is_inside:
|
||||
print(f"📍 Point is inside flood area: {area['label']} ({area['notation']})")
|
||||
print(
|
||||
f"📍 Point is inside flood area: {area['label']} ({area['notation']})"
|
||||
)
|
||||
return area
|
||||
|
||||
from tqdm import tqdm
|
||||
|
||||
floor_warnings_data = []
|
||||
for _, property in tqdm(property_asset_data.iterrows(), total=len(property_asset_data)):
|
||||
for _, property in tqdm(
|
||||
property_asset_data.iterrows(), total=len(property_asset_data)
|
||||
):
|
||||
# warnings = floor_warnings_data.extend(
|
||||
# get_flood_risk(lat=property["LATITUDE"], lon=property["LONGITUDE"], radius_km=1)
|
||||
# )
|
||||
|
|
@ -556,7 +708,7 @@ def app():
|
|||
"uprn": property["uprn"],
|
||||
"address": property["address"],
|
||||
"postcode": property["postcode"],
|
||||
"area": resp
|
||||
"area": resp,
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
|
@ -570,7 +722,7 @@ def app():
|
|||
"House_Cavity_Uninsulated_Pitched roof_Post 1970",
|
||||
"other",
|
||||
"House_System_Uninsulated_Pitched roof_Pre 1970",
|
||||
"House_Solid_Uninsulated_Not Pitched Roof_Pre 1970"
|
||||
"House_Solid_Uninsulated_Not Pitched Roof_Pre 1970",
|
||||
]
|
||||
|
||||
values = [62, 36, 21, 16, 16, 4, 2]
|
||||
|
|
@ -582,36 +734,39 @@ def app():
|
|||
"Cavity wall insulation, ventilation",
|
||||
"Bespoke retrofit measures",
|
||||
"External wall insulation, roof insulation",
|
||||
"Flat roof insulation, internal wall insulation"
|
||||
"Flat roof insulation, internal wall insulation",
|
||||
]
|
||||
|
||||
fig = go.Figure(go.Treemap(
|
||||
labels=labels,
|
||||
parents=[""] * len(labels), # No root
|
||||
values=values,
|
||||
hovertext=hovertext,
|
||||
hoverinfo="text",
|
||||
textinfo="none",
|
||||
marker=dict(
|
||||
line=dict(color="white", width=4),
|
||||
colors=values,
|
||||
colorscale="Blues"
|
||||
fig = go.Figure(
|
||||
go.Treemap(
|
||||
labels=labels,
|
||||
parents=[""] * len(labels), # No root
|
||||
values=values,
|
||||
hovertext=hovertext,
|
||||
hoverinfo="text",
|
||||
textinfo="none",
|
||||
marker=dict(
|
||||
line=dict(color="white", width=4), colors=values, colorscale="Blues"
|
||||
),
|
||||
)
|
||||
))
|
||||
)
|
||||
|
||||
fig.update_layout(
|
||||
margin=dict(t=10, l=10, r=10, b=10),
|
||||
plot_bgcolor="white",
|
||||
paper_bgcolor="white"
|
||||
margin=dict(t=10, l=10, r=10, b=10), plot_bgcolor="white", paper_bgcolor="white"
|
||||
)
|
||||
|
||||
fig.show()
|
||||
|
||||
# Get the recommended measures by scenario id
|
||||
recommendation_cols = [c for c in scenario_data[scenario_ids[1]].columns if "Recommendation:" in c]
|
||||
measure_counts_by_scenario = scenario_data[scenario_ids[1]].groupby("archetype_group")[
|
||||
recommendation_cols
|
||||
].sum().reset_index()
|
||||
recommendation_cols = [
|
||||
c for c in scenario_data[scenario_ids[1]].columns if "Recommendation:" in c
|
||||
]
|
||||
measure_counts_by_scenario = (
|
||||
scenario_data[scenario_ids[1]]
|
||||
.groupby("archetype_group")[recommendation_cols]
|
||||
.sum()
|
||||
.reset_index()
|
||||
)
|
||||
|
||||
measure_counts_by_scenario.to_csv(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/MOD/Pilot Programme/measure_counts_by_scenario.csv"
|
||||
|
|
@ -630,15 +785,13 @@ def app():
|
|||
|
||||
to_append = {"uprn": uprn}
|
||||
for _id in scenario_ids:
|
||||
scenario = scenario_data[_id][
|
||||
scenario_data[_id]["uprn"] == uprn
|
||||
].squeeze()
|
||||
scenario = scenario_data[_id][scenario_data[_id]["uprn"] == uprn].squeeze()
|
||||
|
||||
val = PropertyValuation.estimate_valuation_improvement(
|
||||
current_value=x["valuation"],
|
||||
current_epc=scenario["Current EPC Rating"].value,
|
||||
target_epc=scenario["Predicted Post Works EPC"],
|
||||
total_cost=None
|
||||
total_cost=None,
|
||||
)
|
||||
|
||||
to_append[_id] = val["average_increase"]
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -10,6 +10,7 @@ Additionally, we wil find the problematic records and remove them
|
|||
Given we ran an EPC C scenario, we should check how many properties, below EPC C we have, that have no plan
|
||||
or recommendations in case something went wrong
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
from sqlalchemy.orm import Session
|
||||
from backend.app.db.models.portfolio import PropertyModel
|
||||
|
|
@ -19,8 +20,7 @@ from backend.app.db.connection import db_session
|
|||
def get_uprns_for_portfolio(session: Session, portfolio_id: int) -> list[int]:
|
||||
return [
|
||||
uprn
|
||||
for (uprn,) in
|
||||
session.query(PropertyModel.uprn)
|
||||
for (uprn,) in session.query(PropertyModel.uprn)
|
||||
.filter(PropertyModel.portfolio_id == portfolio_id)
|
||||
.all()
|
||||
if uprn is not None
|
||||
|
|
@ -34,7 +34,7 @@ with db_session() as session:
|
|||
sal = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20251213 Model "
|
||||
"data.xlsx",
|
||||
sheet_name="Standardised Asset List"
|
||||
sheet_name="Standardised Asset List",
|
||||
)
|
||||
|
||||
missed_properties = sal[~sal["epc_os_uprn"].isin(completed_uprns)]
|
||||
|
|
@ -44,7 +44,7 @@ missed_properties.to_excel(
|
|||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/"
|
||||
"d_failed_properties_to_restart_20260102.xlsx",
|
||||
sheet_name="Standardised Asset List",
|
||||
index=False
|
||||
index=False,
|
||||
)
|
||||
|
||||
# Fixing an error - triggered jobs without removing EWI/IWI so need to delete all plans associated to these scenarios:
|
||||
|
|
@ -52,14 +52,14 @@ scenario_id = None
|
|||
|
||||
from sqlalchemy import select, func
|
||||
from sqlalchemy.orm import Session
|
||||
from backend.app.db.models.recommendations import Plan
|
||||
from backend.app.db.models.recommendations import PlanModel
|
||||
|
||||
|
||||
def count_plans_for_scenario(session: Session, scenario_id: int) -> int:
|
||||
return session.execute(
|
||||
select(func.count())
|
||||
.select_from(Plan)
|
||||
.where(Plan.scenario_id == scenario_id)
|
||||
.select_from(PlanModel)
|
||||
.where(PlanModel.scenario_id == scenario_id)
|
||||
).scalar_one()
|
||||
|
||||
|
||||
|
|
@ -69,8 +69,7 @@ with db_session() as session:
|
|||
|
||||
def get_plan_ids_for_scenario(session: Session, scenario_id: int) -> list[int]:
|
||||
result = session.execute(
|
||||
select(Plan.id)
|
||||
.where(Plan.scenario_id == scenario_id)
|
||||
select(PlanModel.id).where(PlanModel.scenario_id == scenario_id)
|
||||
)
|
||||
return [row.id for row in result]
|
||||
|
||||
|
|
@ -84,7 +83,7 @@ from sqlalchemy.orm import Session
|
|||
|
||||
def chunked(iterable, size):
|
||||
for i in range(0, len(iterable), size):
|
||||
yield iterable[i:i + size]
|
||||
yield iterable[i : i + size]
|
||||
|
||||
|
||||
from sqlalchemy import text
|
||||
|
|
@ -103,12 +102,14 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
|
|||
# recommendation_materials
|
||||
# ----------------------------
|
||||
session.execute(
|
||||
text("""
|
||||
text(
|
||||
"""
|
||||
DELETE FROM recommendation_materials rm
|
||||
USING plan_recommendations pr
|
||||
WHERE rm.recommendation_id = pr.recommendation_id
|
||||
AND pr.plan_id = ANY(:plan_ids)
|
||||
"""),
|
||||
"""
|
||||
),
|
||||
params,
|
||||
)
|
||||
|
||||
|
|
@ -116,10 +117,12 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
|
|||
# plan_recommendations
|
||||
# ----------------------------
|
||||
session.execute(
|
||||
text("""
|
||||
text(
|
||||
"""
|
||||
DELETE FROM plan_recommendations
|
||||
WHERE plan_id = ANY(:plan_ids)
|
||||
"""),
|
||||
"""
|
||||
),
|
||||
params,
|
||||
)
|
||||
|
||||
|
|
@ -127,14 +130,16 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
|
|||
# recommendations (only those used by these plans)
|
||||
# ----------------------------
|
||||
session.execute(
|
||||
text("""
|
||||
text(
|
||||
"""
|
||||
DELETE FROM recommendation r
|
||||
WHERE r.id IN (
|
||||
SELECT DISTINCT recommendation_id
|
||||
FROM plan_recommendations
|
||||
WHERE plan_id = ANY(:plan_ids)
|
||||
)
|
||||
"""),
|
||||
"""
|
||||
),
|
||||
params,
|
||||
)
|
||||
|
||||
|
|
@ -142,10 +147,12 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
|
|||
# plans LAST
|
||||
# ----------------------------
|
||||
session.execute(
|
||||
text("""
|
||||
text(
|
||||
"""
|
||||
DELETE FROM plan
|
||||
WHERE id = ANY(:plan_ids)
|
||||
"""),
|
||||
"""
|
||||
),
|
||||
params,
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@ This includes:
|
|||
# EPC C, there should be a plan
|
||||
2) If the plan is fabric first, make sure they are actually fabric first
|
||||
"""
|
||||
|
||||
import pandas as pd
|
||||
|
||||
scenario_names = {
|
||||
|
|
@ -33,7 +34,9 @@ for scenario_id, scenario_name in scenario_names.items():
|
|||
)
|
||||
|
||||
# find properties that are below the scenario sap target, but have no recommended measures
|
||||
df["below_scenario_target"] = df["current_sap_points"] < scenario_sap_targets[scenario_id]
|
||||
df["below_scenario_target"] = (
|
||||
df["current_sap_points"] < scenario_sap_targets[scenario_id]
|
||||
)
|
||||
df["no_recommended_measures"] = df["sap_points"] == 0
|
||||
df["zero_cost"] = df["total_retrofit_cost"] == 0
|
||||
df["sap_points_above_zero"] = df["sap_points"] > 0
|
||||
|
|
@ -45,7 +48,9 @@ for scenario_id, scenario_name in scenario_names.items():
|
|||
].copy()
|
||||
|
||||
if scenario_sap_targets[scenario_id] == 81:
|
||||
problematic_properties = problematic_properties[problematic_properties["property_type"] != "Flat"]
|
||||
problematic_properties = problematic_properties[
|
||||
problematic_properties["property_type"] != "Flat"
|
||||
]
|
||||
|
||||
zero_cost_above_zero_sap = df[
|
||||
(df["sap_points_above_zero"] & df["zero_cost"])
|
||||
|
|
@ -61,8 +66,12 @@ for scenario_id, scenario_name in scenario_names.items():
|
|||
# pd.set_option('display.width', 1000)
|
||||
# problematic_properties.head(len(problematic_properties))
|
||||
|
||||
print(f"We have {len(problematic_properties)} problematic properties for scenario {scenario_name} ({scenario_id})")
|
||||
print(f"We have {len(zero_cost_above_zero_sap)} zero cost properties for scenario {scenario_name} ({scenario_id})")
|
||||
print(
|
||||
f"We have {len(problematic_properties)} problematic properties for scenario {scenario_name} ({scenario_id})"
|
||||
)
|
||||
print(
|
||||
f"We have {len(zero_cost_above_zero_sap)} zero cost properties for scenario {scenario_name} ({scenario_id})"
|
||||
)
|
||||
|
||||
problems.append(problematic_properties)
|
||||
problems.append(zero_cost_above_zero_sap)
|
||||
|
|
@ -97,12 +106,12 @@ all_problems = all_problems.drop_duplicates(subset=["uprn"])
|
|||
sal = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20251213 Model "
|
||||
"data.xlsx",
|
||||
sheet_name="Standardised Asset List"
|
||||
sheet_name="Standardised Asset List",
|
||||
)
|
||||
sal2 = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20260105 - additional "
|
||||
"UPRNS.xlsx",
|
||||
sheet_name="Standardised Asset List"
|
||||
sheet_name="Standardised Asset List",
|
||||
)
|
||||
|
||||
sal = pd.concat([sal, sal2])
|
||||
|
|
@ -114,7 +123,7 @@ retry.to_excel(
|
|||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/"
|
||||
"d_problematic_properties_to_review_20260106.xlsx",
|
||||
sheet_name="Standardised Asset List",
|
||||
index=False
|
||||
index=False,
|
||||
)
|
||||
|
||||
# Delete associated plans
|
||||
|
|
@ -126,19 +135,20 @@ uprns = retry["epc_os_uprn"].tolist()
|
|||
from sqlalchemy.orm import Session
|
||||
from backend.app.db.models.portfolio import PropertyModel
|
||||
from backend.app.db.connection import db_session
|
||||
from backend.app.db.models.recommendations import Plan
|
||||
from backend.app.db.models.recommendations import PlanModel
|
||||
from sqlalchemy import select, delete
|
||||
from sqlalchemy.exc import NoResultFound
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
|
||||
|
||||
def get_property_ids_for_uprns(session: Session, portfolio_id: int, uprns: list[int]) -> list[int]:
|
||||
def get_property_ids_for_uprns(
|
||||
session: Session, portfolio_id: int, uprns: list[int]
|
||||
) -> list[int]:
|
||||
return [
|
||||
property.id
|
||||
for property in session.query(PropertyModel)
|
||||
.filter(
|
||||
PropertyModel.portfolio_id == portfolio_id,
|
||||
PropertyModel.uprn.in_(uprns)
|
||||
PropertyModel.portfolio_id == portfolio_id, PropertyModel.uprn.in_(uprns)
|
||||
)
|
||||
.all()
|
||||
]
|
||||
|
|
@ -149,15 +159,21 @@ with db_session() as session:
|
|||
|
||||
|
||||
# Get all and delete plans for these property IDs
|
||||
def get_all_plans_for_property_ids(session: Session, property_ids: list[int]) -> list[Plan]:
|
||||
return session.query(Plan).filter(Plan.property_id.in_(property_ids)).all()
|
||||
def get_all_plans_for_property_ids(
|
||||
session: Session, property_ids: list[int]
|
||||
) -> list[PlanModel]:
|
||||
return (
|
||||
session.query(PlanModel).filter(PlanModel.property_id.in_(property_ids)).all()
|
||||
)
|
||||
|
||||
|
||||
def get_ids_of_plans_for_deletion(session: Session, property_ids: list[int]) -> list[int]:
|
||||
def get_ids_of_plans_for_deletion(
|
||||
session: Session, property_ids: list[int]
|
||||
) -> list[int]:
|
||||
return [
|
||||
plan.id
|
||||
for plan in session.query(Plan)
|
||||
.filter(Plan.property_id.in_(property_ids))
|
||||
for plan in session.query(PlanModel)
|
||||
.filter(PlanModel.property_id.in_(property_ids))
|
||||
.all()
|
||||
]
|
||||
|
||||
|
|
@ -168,7 +184,7 @@ with db_session() as session:
|
|||
|
||||
def chunked(iterable, size):
|
||||
for i in range(0, len(iterable), size):
|
||||
yield iterable[i:i + size]
|
||||
yield iterable[i : i + size]
|
||||
|
||||
|
||||
from sqlalchemy import text
|
||||
|
|
@ -187,12 +203,14 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
|
|||
# recommendation_materials
|
||||
# ----------------------------
|
||||
session.execute(
|
||||
text("""
|
||||
text(
|
||||
"""
|
||||
DELETE FROM recommendation_materials rm
|
||||
USING plan_recommendations pr
|
||||
WHERE rm.recommendation_id = pr.recommendation_id
|
||||
AND pr.plan_id = ANY(:plan_ids)
|
||||
"""),
|
||||
"""
|
||||
),
|
||||
params,
|
||||
)
|
||||
|
||||
|
|
@ -200,10 +218,12 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
|
|||
# plan_recommendations
|
||||
# ----------------------------
|
||||
session.execute(
|
||||
text("""
|
||||
text(
|
||||
"""
|
||||
DELETE FROM plan_recommendations
|
||||
WHERE plan_id = ANY(:plan_ids)
|
||||
"""),
|
||||
"""
|
||||
),
|
||||
params,
|
||||
)
|
||||
|
||||
|
|
@ -211,14 +231,16 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
|
|||
# recommendations (only those used by these plans)
|
||||
# ----------------------------
|
||||
session.execute(
|
||||
text("""
|
||||
text(
|
||||
"""
|
||||
DELETE FROM recommendation r
|
||||
WHERE r.id IN (
|
||||
SELECT DISTINCT recommendation_id
|
||||
FROM plan_recommendations
|
||||
WHERE plan_id = ANY(:plan_ids)
|
||||
)
|
||||
"""),
|
||||
"""
|
||||
),
|
||||
params,
|
||||
)
|
||||
|
||||
|
|
@ -226,10 +248,12 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
|
|||
# plans LAST
|
||||
# ----------------------------
|
||||
session.execute(
|
||||
text("""
|
||||
text(
|
||||
"""
|
||||
DELETE FROM plan
|
||||
WHERE id = ANY(:plan_ids)
|
||||
"""),
|
||||
"""
|
||||
),
|
||||
params,
|
||||
)
|
||||
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -3,31 +3,41 @@ from sqlalchemy.orm import Session
|
|||
from sqlalchemy import text, select
|
||||
from backend.app.db.connection import db_read_session
|
||||
from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
|
||||
from backend.app.db.models.recommendations import Plan
|
||||
from backend.app.db.models.recommendations import PlanModel
|
||||
|
||||
PORTFOLIO_ID = 435
|
||||
|
||||
with db_read_session() as session:
|
||||
# Get all properties from PropertyDetailsEpcModel, where estimated is True, for portfolio 419
|
||||
estimated_epcs = session.query(PropertyDetailsEpcModel).filter(
|
||||
# PropertyDetailsEpcModel.estimated == True,
|
||||
PropertyDetailsEpcModel.property_id.in_(
|
||||
session.query(PropertyModel.id).filter(PropertyModel.portfolio_id == PORTFOLIO_ID)
|
||||
estimated_epcs = (
|
||||
session.query(PropertyDetailsEpcModel)
|
||||
.filter(
|
||||
# PropertyDetailsEpcModel.estimated == True,
|
||||
PropertyDetailsEpcModel.property_id.in_(
|
||||
session.query(PropertyModel.id).filter(
|
||||
PropertyModel.portfolio_id == PORTFOLIO_ID
|
||||
)
|
||||
)
|
||||
)
|
||||
).all()
|
||||
.all()
|
||||
)
|
||||
|
||||
# Get the ids
|
||||
estimated_epc_ids = [epc.property_id for epc in estimated_epcs]
|
||||
|
||||
# I want to get the UPRNS for these properties, from the property model
|
||||
with db_read_session() as session:
|
||||
estimated_uprns = session.query(PropertyModel.uprn).filter(
|
||||
PropertyModel.id.in_(
|
||||
session.query(PropertyDetailsEpcModel.property_id).filter(
|
||||
PropertyDetailsEpcModel.id.in_(estimated_epc_ids)
|
||||
estimated_uprns = (
|
||||
session.query(PropertyModel.uprn)
|
||||
.filter(
|
||||
PropertyModel.id.in_(
|
||||
session.query(PropertyDetailsEpcModel.property_id).filter(
|
||||
PropertyDetailsEpcModel.id.in_(estimated_epc_ids)
|
||||
)
|
||||
)
|
||||
)
|
||||
).all()
|
||||
.all()
|
||||
)
|
||||
|
||||
estimated_uprns_list = [uprn for (uprn,) in estimated_uprns]
|
||||
|
||||
|
|
@ -35,16 +45,16 @@ with db_read_session() as session:
|
|||
sal_1 = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20251213 Model "
|
||||
"data.xlsx",
|
||||
sheet_name="Standardised Asset List"
|
||||
sheet_name="Standardised Asset List",
|
||||
)
|
||||
sal_2 = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20260105 - additional "
|
||||
"UPRNS.xlsx",
|
||||
sheet_name="Standardised Asset List"
|
||||
sheet_name="Standardised Asset List",
|
||||
)
|
||||
|
||||
sal = pd.concat([sal_1, sal_2])
|
||||
sal = sal.drop_duplicates(subset=['epc_os_uprn'])
|
||||
sal = sal.drop_duplicates(subset=["epc_os_uprn"])
|
||||
|
||||
estimated_to_refresh = sal[sal["epc_os_uprn"].isin(estimated_uprns_list)].copy()
|
||||
|
||||
|
|
@ -55,20 +65,24 @@ SCENARIOS = [
|
|||
# 861, # EPC C, No EWI/IWI, No Solid Floor, ASHP 3.0 COP
|
||||
# 859, # EPC C - no solid floor, ashp 3.0
|
||||
# 885, # EPC B - fabric first, no solid floor, ashp 3.0
|
||||
908, 909, 910
|
||||
908,
|
||||
909,
|
||||
910,
|
||||
]
|
||||
|
||||
# Get all plans, associated to these properties - the property IDs are in estimated_epc_ids
|
||||
with db_read_session() as session:
|
||||
result = session.execute(
|
||||
select(Plan.id, Plan.property_id)
|
||||
.where(Plan.property_id.in_(estimated_epc_ids))
|
||||
select(PlanModel.id, PlanModel.property_id).where(
|
||||
PlanModel.property_id.in_(estimated_epc_ids)
|
||||
)
|
||||
)
|
||||
plans = [
|
||||
{
|
||||
"plan_id": row.id,
|
||||
"property_id": row.property_id,
|
||||
} for row in result
|
||||
}
|
||||
for row in result
|
||||
]
|
||||
|
||||
df = pd.DataFrame(plans)
|
||||
|
|
@ -96,12 +110,14 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
|
|||
# recommendation_materials
|
||||
# ----------------------------
|
||||
session.execute(
|
||||
text("""
|
||||
text(
|
||||
"""
|
||||
DELETE FROM recommendation_materials rm
|
||||
USING plan_recommendations pr
|
||||
WHERE rm.recommendation_id = pr.recommendation_id
|
||||
AND pr.plan_id = ANY(:plan_ids)
|
||||
"""),
|
||||
"""
|
||||
),
|
||||
params,
|
||||
)
|
||||
|
||||
|
|
@ -109,10 +125,12 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
|
|||
# plan_recommendations
|
||||
# ----------------------------
|
||||
session.execute(
|
||||
text("""
|
||||
text(
|
||||
"""
|
||||
DELETE FROM plan_recommendations
|
||||
WHERE plan_id = ANY(:plan_ids)
|
||||
"""),
|
||||
"""
|
||||
),
|
||||
params,
|
||||
)
|
||||
|
||||
|
|
@ -120,14 +138,16 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
|
|||
# recommendations (only those used by these plans)
|
||||
# ----------------------------
|
||||
session.execute(
|
||||
text("""
|
||||
text(
|
||||
"""
|
||||
DELETE FROM recommendation r
|
||||
WHERE r.id IN (
|
||||
SELECT DISTINCT recommendation_id
|
||||
FROM plan_recommendations
|
||||
WHERE plan_id = ANY(:plan_ids)
|
||||
)
|
||||
"""),
|
||||
"""
|
||||
),
|
||||
params,
|
||||
)
|
||||
|
||||
|
|
@ -135,17 +155,21 @@ def delete_plan_batch(session: Session, plan_ids: list[int]):
|
|||
# plans LAST
|
||||
# ----------------------------
|
||||
session.execute(
|
||||
text("""
|
||||
text(
|
||||
"""
|
||||
DELETE FROM plan
|
||||
WHERE id = ANY(:plan_ids)
|
||||
"""),
|
||||
"""
|
||||
),
|
||||
params,
|
||||
)
|
||||
|
||||
|
||||
# Store the SAL
|
||||
filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20260101 "
|
||||
"sal.xlsx")
|
||||
filename = (
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20260101 "
|
||||
"sal.xlsx"
|
||||
)
|
||||
|
||||
with pd.ExcelWriter(filename) as writer:
|
||||
sal.to_excel(writer, sheet_name="Standardised Asset List", index=False)
|
||||
|
|
@ -164,34 +188,36 @@ with pd.ExcelWriter(filename) as writer:
|
|||
b1 = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 "
|
||||
"sal.xlsx",
|
||||
sheet_name="batch 1"
|
||||
sheet_name="batch 1",
|
||||
)
|
||||
b2 = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 "
|
||||
"sal.xlsx",
|
||||
sheet_name="batch 2"
|
||||
sheet_name="batch 2",
|
||||
)
|
||||
b3 = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 "
|
||||
"sal.xlsx",
|
||||
sheet_name="batch 3"
|
||||
sheet_name="batch 3",
|
||||
)
|
||||
b4 = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 "
|
||||
"sal.xlsx",
|
||||
sheet_name="batch 4"
|
||||
sheet_name="batch 4",
|
||||
)
|
||||
b5 = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 "
|
||||
"sal.xlsx",
|
||||
sheet_name="batch 5"
|
||||
sheet_name="batch 5",
|
||||
)
|
||||
# Batch 6 should be the remaining
|
||||
total = pd.concat([b1, b2, b3, b4, b5])
|
||||
remaining = sal[~sal["epc_os_uprn"].isin(total["epc_os_uprn"].values)]
|
||||
# Create new output
|
||||
filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/"
|
||||
"20260107 corrected batch 6 sal.xlsx")
|
||||
filename = (
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/"
|
||||
"20260107 corrected batch 6 sal.xlsx"
|
||||
)
|
||||
|
||||
with pd.ExcelWriter(filename) as writer:
|
||||
sal.to_excel(writer, sheet_name="Standardised Asset List", index=False)
|
||||
|
|
@ -206,6 +232,4 @@ with pd.ExcelWriter(filename) as writer:
|
|||
b5.to_excel(writer, sheet_name="batch 5", index=False)
|
||||
remaining.to_excel(writer, sheet_name="batch 6", index=False)
|
||||
|
||||
all_together = pd.concat(
|
||||
[b1, b2, b3, b4, b5, remaining]
|
||||
)
|
||||
all_together = pd.concat([b1, b2, b3, b4, b5, remaining])
|
||||
|
|
|
|||
|
|
@ -110,14 +110,17 @@ import pandas as pd
|
|||
# Solar PV savings - we need the amount of solar PV bill savings
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
from backend.app.db.connection import db_engine
|
||||
from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations, RecommendationMaterials
|
||||
from backend.app.db.models.recommendations import (
|
||||
Recommendation,
|
||||
PlanModel,
|
||||
PlanRecommendations,
|
||||
RecommendationMaterials,
|
||||
)
|
||||
from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
|
||||
from collections import defaultdict
|
||||
|
||||
PORTFOLIO_ID = 485 # Peabody
|
||||
SCENARIOS = [
|
||||
970
|
||||
]
|
||||
SCENARIOS = [970]
|
||||
scenario_names = {
|
||||
970: "EPC C - no solid floor, ashp 3.0",
|
||||
}
|
||||
|
|
@ -130,22 +133,26 @@ def get_data(portfolio_id, scenario_ids):
|
|||
# --------------------
|
||||
# Properties
|
||||
# --------------------
|
||||
properties_query = session.query(
|
||||
PropertyModel,
|
||||
PropertyDetailsEpcModel
|
||||
).join(
|
||||
PropertyDetailsEpcModel,
|
||||
PropertyModel.id == PropertyDetailsEpcModel.property_id
|
||||
).filter(
|
||||
PropertyModel.portfolio_id == portfolio_id
|
||||
).all()
|
||||
properties_query = (
|
||||
session.query(PropertyModel, PropertyDetailsEpcModel)
|
||||
.join(
|
||||
PropertyDetailsEpcModel,
|
||||
PropertyModel.id == PropertyDetailsEpcModel.property_id,
|
||||
)
|
||||
.filter(PropertyModel.portfolio_id == portfolio_id)
|
||||
.all()
|
||||
)
|
||||
|
||||
properties_data = [
|
||||
{
|
||||
**{col.name: getattr(p.PropertyModel, col.name)
|
||||
for col in PropertyModel.__table__.columns},
|
||||
**{col.name: getattr(p.PropertyDetailsEpcModel, col.name)
|
||||
for col in PropertyDetailsEpcModel.__table__.columns},
|
||||
**{
|
||||
col.name: getattr(p.PropertyModel, col.name)
|
||||
for col in PropertyModel.__table__.columns
|
||||
},
|
||||
**{
|
||||
col.name: getattr(p.PropertyDetailsEpcModel, col.name)
|
||||
for col in PropertyDetailsEpcModel.__table__.columns
|
||||
},
|
||||
}
|
||||
for p in properties_query
|
||||
]
|
||||
|
|
@ -153,12 +160,12 @@ def get_data(portfolio_id, scenario_ids):
|
|||
# --------------------
|
||||
# Plans
|
||||
# --------------------
|
||||
plans_query = session.query(Plan).filter(
|
||||
Plan.scenario_id.in_(scenario_ids)
|
||||
).all()
|
||||
plans_query = (
|
||||
session.query(PlanModel).filter(PlanModel.scenario_id.in_(scenario_ids)).all()
|
||||
)
|
||||
|
||||
plans_data = [
|
||||
{col.name: getattr(plan, col.name) for col in Plan.__table__.columns}
|
||||
{col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns}
|
||||
for plan in plans_query
|
||||
]
|
||||
|
||||
|
|
@ -167,27 +174,29 @@ def get_data(portfolio_id, scenario_ids):
|
|||
# --------------------
|
||||
# Recommendations (NO materials yet)
|
||||
# --------------------
|
||||
recommendations_query = session.query(
|
||||
Recommendation,
|
||||
Plan.scenario_id
|
||||
).join(
|
||||
PlanRecommendations,
|
||||
Recommendation.id == PlanRecommendations.recommendation_id
|
||||
).join(
|
||||
Plan,
|
||||
Plan.id == PlanRecommendations.plan_id
|
||||
).filter(
|
||||
PlanRecommendations.plan_id.in_(plan_ids),
|
||||
Recommendation.default.is_(True),
|
||||
Recommendation.already_installed.is_(False)
|
||||
).all()
|
||||
recommendations_query = (
|
||||
session.query(Recommendation, PlanModel.scenario_id)
|
||||
.join(
|
||||
PlanRecommendations,
|
||||
Recommendation.id == PlanRecommendations.recommendation_id,
|
||||
)
|
||||
.join(PlanModel, PlanModel.id == PlanRecommendations.plan_id)
|
||||
.filter(
|
||||
PlanRecommendations.plan_id.in_(plan_ids),
|
||||
Recommendation.default.is_(True),
|
||||
Recommendation.already_installed.is_(False),
|
||||
)
|
||||
.all()
|
||||
)
|
||||
|
||||
recommendations_data = [
|
||||
{
|
||||
**{col.name: getattr(r.Recommendation, col.name)
|
||||
for col in Recommendation.__table__.columns},
|
||||
**{
|
||||
col.name: getattr(r.Recommendation, col.name)
|
||||
for col in Recommendation.__table__.columns
|
||||
},
|
||||
"scenario_id": r.scenario_id,
|
||||
"materials": [] # placeholder
|
||||
"materials": [], # placeholder
|
||||
}
|
||||
for r in recommendations_query
|
||||
]
|
||||
|
|
@ -197,23 +206,25 @@ def get_data(portfolio_id, scenario_ids):
|
|||
# --------------------
|
||||
# Recommendation materials (SEPARATE QUERY)
|
||||
# --------------------
|
||||
materials_query = session.query(
|
||||
RecommendationMaterials
|
||||
).filter(
|
||||
RecommendationMaterials.recommendation_id.in_(recommendation_ids)
|
||||
).all()
|
||||
materials_query = (
|
||||
session.query(RecommendationMaterials)
|
||||
.filter(RecommendationMaterials.recommendation_id.in_(recommendation_ids))
|
||||
.all()
|
||||
)
|
||||
|
||||
# Group materials by recommendation_id
|
||||
materials_by_recommendation = defaultdict(list)
|
||||
|
||||
for m in materials_query:
|
||||
materials_by_recommendation[m.recommendation_id].append({
|
||||
"material_id": m.material_id,
|
||||
"depth": m.depth,
|
||||
"quantity": m.quantity,
|
||||
"quantity_unit": m.quantity_unit,
|
||||
"estimated_cost": m.estimated_cost,
|
||||
})
|
||||
materials_by_recommendation[m.recommendation_id].append(
|
||||
{
|
||||
"material_id": m.material_id,
|
||||
"depth": m.depth,
|
||||
"quantity": m.quantity,
|
||||
"quantity_unit": m.quantity_unit,
|
||||
"estimated_cost": m.estimated_cost,
|
||||
}
|
||||
)
|
||||
|
||||
# Attach materials safely (no filtering side effects)
|
||||
for r in recommendations_data:
|
||||
|
|
@ -236,12 +247,11 @@ with pd.ExcelWriter("hackney.xlsx", engine="openpyxl") as writer:
|
|||
recommendations_df.to_excel(writer, sheet_name="recommendations", index=False)
|
||||
properties_df.to_excel(writer, sheet_name="properties", index=False)
|
||||
|
||||
|
||||
|
||||
# solar_pv_recommendations = recommendations_df[recommendations_df["measure_type"] == "solar_pv"]
|
||||
# average_savings = solar_pv_recommendations.groupby("scenario_id")["energy_cost_savings"].mean().reset_index()
|
||||
|
||||
|
||||
|
||||
# # Check tenures
|
||||
# initial_asset_data = pd.read_excel(
|
||||
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody "
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ import pandas as pd
|
|||
full_sal = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final "
|
||||
"SAL/Depracated/20260107 corrected batch 6 sal.xlsx",
|
||||
sheet_name="Standardised Asset List"
|
||||
sheet_name="Standardised Asset List",
|
||||
)
|
||||
|
||||
# ------Pull in the reduced sample ------
|
||||
|
|
@ -12,7 +12,7 @@ full_sal = pd.read_excel(
|
|||
reduced_sal = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260112 - "
|
||||
"ownership filtered sal.xlsx",
|
||||
sheet_name="Standardised Asset List"
|
||||
sheet_name="Standardised Asset List",
|
||||
)
|
||||
|
||||
# ------ Pull in the confirmed ownership column from Peabody ------
|
||||
|
|
@ -20,18 +20,20 @@ new_asset_data = pd.read_excel(
|
|||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/2025_11_11 "
|
||||
"- Peabody "
|
||||
"- Data Extracts for Domna v2.xlsx",
|
||||
sheet_name="Properties"
|
||||
sheet_name="Properties",
|
||||
)
|
||||
|
||||
correct_sample = new_asset_data[
|
||||
~new_asset_data["AH Tenure"].isin(
|
||||
["Commercial",
|
||||
"Freeholder",
|
||||
"HOMEBUY / EQUITY LOAN",
|
||||
"Leaseholder",
|
||||
"Outright Sale",
|
||||
"SHARED EQUITY",
|
||||
"Shared Ownership"]
|
||||
[
|
||||
"Commercial",
|
||||
"Freeholder",
|
||||
"HOMEBUY / EQUITY LOAN",
|
||||
"Leaseholder",
|
||||
"Outright Sale",
|
||||
"SHARED EQUITY",
|
||||
"Shared Ownership",
|
||||
]
|
||||
)
|
||||
].copy()
|
||||
|
||||
|
|
@ -41,9 +43,7 @@ stuff_to_add = correct_sample[
|
|||
~correct_sample["UPRN"].isin(reduced_sal["landlord_property_id"].values)
|
||||
]["UPRN"].values
|
||||
|
||||
sal_to_add = full_sal[
|
||||
full_sal["domna_property_id"].isin(stuff_to_add)
|
||||
].copy()
|
||||
sal_to_add = full_sal[full_sal["domna_property_id"].isin(stuff_to_add)].copy()
|
||||
|
||||
# ------- Stuff to remove -------
|
||||
stuff_to_remove = reduced_sal[
|
||||
|
|
@ -88,7 +88,7 @@ from backend.app.db.models.portfolio import PropertyModel
|
|||
from backend.app.db.connection import db_session, db_read_session
|
||||
from sqlalchemy import select, func
|
||||
from sqlalchemy.orm import Session
|
||||
from backend.app.db.models.recommendations import Plan
|
||||
from backend.app.db.models.recommendations import PlanModel
|
||||
|
||||
uprns_to_be_deleted = to_delete["epc_os_uprn"].values.tolist()
|
||||
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ from sqlalchemy.sql import true
|
|||
from backend.app.db.utils import row2dict
|
||||
from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel
|
||||
from backend.app.db.models.recommendations import Recommendation
|
||||
from backend.app.db.models.recommendations import Plan
|
||||
from backend.app.db.models.recommendations import PlanModel
|
||||
from backend.app.utils import sap_to_epc
|
||||
|
||||
EPC_COLOURS = {
|
||||
|
|
@ -17,7 +17,7 @@ EPC_COLOURS = {
|
|||
"D": "#fdd401",
|
||||
"E": "#fdab67",
|
||||
"F": "#ee8023",
|
||||
"G": "#e71437"
|
||||
"G": "#e71437",
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -33,22 +33,27 @@ def get_properties_with_default_recommendations(session: Session, portfolio_id:
|
|||
its associated default recommendations if any.
|
||||
"""
|
||||
# Adjust the join to correctly filter recommendations while including all properties
|
||||
query = session.query(PropertyModel, Recommendation).outerjoin(Recommendation,
|
||||
(Recommendation.property_id == PropertyModel.id) & (
|
||||
Recommendation.default == true())) \
|
||||
.filter(PropertyModel.portfolio_id == portfolio_id) \
|
||||
query = (
|
||||
session.query(PropertyModel, Recommendation)
|
||||
.outerjoin(
|
||||
Recommendation,
|
||||
(Recommendation.property_id == PropertyModel.id)
|
||||
& (Recommendation.default == true()),
|
||||
)
|
||||
.filter(PropertyModel.portfolio_id == portfolio_id)
|
||||
.all()
|
||||
)
|
||||
|
||||
properties = {}
|
||||
for property, recommendation in query:
|
||||
# Ensure the property is added once with an empty list of recommendations initially
|
||||
if property.id not in properties:
|
||||
properties[property.id] = row2dict(property)
|
||||
properties[property.id]['recommendations'] = []
|
||||
properties[property.id]["recommendations"] = []
|
||||
|
||||
# Append recommendations if they exist and meet the criteria (already filtered by the query)
|
||||
if recommendation and recommendation.default:
|
||||
properties[property.id]['recommendations'].append(row2dict(recommendation))
|
||||
properties[property.id]["recommendations"].append(row2dict(recommendation))
|
||||
|
||||
return list(properties.values())
|
||||
|
||||
|
|
@ -62,11 +67,16 @@ def get_property_details_by_portfolio_id(session: Session, portfolio_id: int):
|
|||
:return: A list of dictionaries, where each dictionary represents a property's details.
|
||||
Returns an empty list if no property details are found.
|
||||
"""
|
||||
property_details = session.query(PropertyDetailsEpcModel).filter(
|
||||
PropertyDetailsEpcModel.portfolio_id == portfolio_id).all()
|
||||
property_details = (
|
||||
session.query(PropertyDetailsEpcModel)
|
||||
.filter(PropertyDetailsEpcModel.portfolio_id == portfolio_id)
|
||||
.all()
|
||||
)
|
||||
|
||||
# Convert the SQLAlchemy objects to dictionaries
|
||||
property_details_dict = [row2dict(pd) for pd in property_details] if property_details else []
|
||||
property_details_dict = (
|
||||
[row2dict(pd) for pd in property_details] if property_details else []
|
||||
)
|
||||
|
||||
return property_details_dict
|
||||
|
||||
|
|
@ -80,7 +90,9 @@ def get_plan_by_portfolio_id(session: Session, portfolio_id: int):
|
|||
:return: A list of dictionaries, where each dictionary represents a plan.
|
||||
Returns an empty list if no plans are found.
|
||||
"""
|
||||
plans = session.query(Plan).filter(Plan.portfolio_id == portfolio_id).all()
|
||||
plans = (
|
||||
session.query(PlanModel).filter(PlanModel.portfolio_id == portfolio_id).all()
|
||||
)
|
||||
|
||||
# Convert the SQLAlchemy objects to dictionaries
|
||||
plans_dict = [row2dict(plan) for plan in plans] if plans else []
|
||||
|
|
@ -88,7 +100,14 @@ def get_plan_by_portfolio_id(session: Session, portfolio_id: int):
|
|||
return plans_dict
|
||||
|
||||
|
||||
def plot_epc_distribution(df, customer_key, title='Your Units', background_color='white', bar_height=0.4, font_size=15):
|
||||
def plot_epc_distribution(
|
||||
df,
|
||||
customer_key,
|
||||
title="Your Units",
|
||||
background_color="white",
|
||||
bar_height=0.4,
|
||||
font_size=15,
|
||||
):
|
||||
"""
|
||||
Plots a horizontal bar chart of EPC rating distribution with adjustable bar thickness and text sizes.
|
||||
Allows setting the plot background color and dynamically adjusts text size and bar spacing.
|
||||
|
|
@ -100,75 +119,113 @@ def plot_epc_distribution(df, customer_key, title='Your Units', background_color
|
|||
:param font_size: Base font size for text annotations (default 15)
|
||||
"""
|
||||
# Calculate dynamic figure size or adjust based on preferences
|
||||
square_size = max(6, len(df) * 0.6) # Ensure minimum size and adjust based on number of entries
|
||||
square_size = max(
|
||||
6, len(df) * 0.6
|
||||
) # Ensure minimum size and adjust based on number of entries
|
||||
fig, ax = plt.subplots(figsize=(square_size, square_size))
|
||||
fig.patch.set_facecolor(background_color) # Set figure background color
|
||||
ax.set_facecolor(background_color) # Set axes background color
|
||||
|
||||
df['percentage'] = df['percentage'].round(1) # Round the percentage values to 1 decimal place
|
||||
df_sorted = df.sort_values('percentage', ascending=True)
|
||||
df["percentage"] = df["percentage"].round(
|
||||
1
|
||||
) # Round the percentage values to 1 decimal place
|
||||
df_sorted = df.sort_values("percentage", ascending=True)
|
||||
|
||||
# Plot bars with specified height for adjustable thickness
|
||||
bars = ax.barh(df_sorted['current_epc_rating'], df_sorted['percentage'],
|
||||
color=df_sorted['current_epc_rating'].map(EPC_COLOURS), edgecolor='none', height=bar_height)
|
||||
bars = ax.barh(
|
||||
df_sorted["current_epc_rating"],
|
||||
df_sorted["percentage"],
|
||||
color=df_sorted["current_epc_rating"].map(EPC_COLOURS),
|
||||
edgecolor="none",
|
||||
height=bar_height,
|
||||
)
|
||||
|
||||
epc_rating_font_size = font_size * 2 # EPC rating font size larger than base font size
|
||||
count_percentage_font_size = font_size # Count (percentage) font size as base font size
|
||||
epc_rating_font_size = (
|
||||
font_size * 2
|
||||
) # EPC rating font size larger than base font size
|
||||
count_percentage_font_size = (
|
||||
font_size # Count (percentage) font size as base font size
|
||||
)
|
||||
|
||||
# Annotate bars with EPC ratings inside and count with percentage values outside
|
||||
for index, bar in enumerate(bars):
|
||||
width = bar.get_width()
|
||||
epc_rating = df_sorted.iloc[index]['current_epc_rating']
|
||||
count = df_sorted.iloc[index]['count']
|
||||
percentage = df_sorted.iloc[index]['percentage']
|
||||
epc_rating = df_sorted.iloc[index]["current_epc_rating"]
|
||||
count = df_sorted.iloc[index]["count"]
|
||||
percentage = df_sorted.iloc[index]["percentage"]
|
||||
|
||||
# EPC rating inside the bar with increased font size
|
||||
ax.text(width - (width * 0.05), bar.get_y() + bar.get_height() / 2,
|
||||
f"{epc_rating}", va='center', ha='right', color='white', fontsize=epc_rating_font_size)
|
||||
ax.text(
|
||||
width - (width * 0.05),
|
||||
bar.get_y() + bar.get_height() / 2,
|
||||
f"{epc_rating}",
|
||||
va="center",
|
||||
ha="right",
|
||||
color="white",
|
||||
fontsize=epc_rating_font_size,
|
||||
)
|
||||
|
||||
# Count and percentage outside the bar, original font size
|
||||
ax.text(width + 1, bar.get_y() + bar.get_height() / 2,
|
||||
f"{count} ({percentage}%)", va='center', color='black', fontsize=count_percentage_font_size)
|
||||
ax.text(
|
||||
width + 1,
|
||||
bar.get_y() + bar.get_height() / 2,
|
||||
f"{count} ({percentage}%)",
|
||||
va="center",
|
||||
color="black",
|
||||
fontsize=count_percentage_font_size,
|
||||
)
|
||||
|
||||
ax.set_title(title, fontsize=font_size * 1.2) # Adjust title font size proportionally
|
||||
ax.tick_params(axis='x', which='both', bottom=False, top=False,
|
||||
labelbottom=False) # Remove x-axis tick marks and values
|
||||
ax.tick_params(axis='y', which='both', left=False, right=False,
|
||||
labelleft=False) # Remove y-axis tick marks and labels
|
||||
ax.spines['top'].set_visible(False) # Remove top spine
|
||||
ax.spines['right'].set_visible(False) # Remove right spine
|
||||
ax.spines['left'].set_visible(False) # Remove left spine
|
||||
ax.spines['bottom'].set_visible(False) # Remove bottom spine
|
||||
ax.set_title(
|
||||
title, fontsize=font_size * 1.2
|
||||
) # Adjust title font size proportionally
|
||||
ax.tick_params(
|
||||
axis="x", which="both", bottom=False, top=False, labelbottom=False
|
||||
) # Remove x-axis tick marks and values
|
||||
ax.tick_params(
|
||||
axis="y", which="both", left=False, right=False, labelleft=False
|
||||
) # Remove y-axis tick marks and labels
|
||||
ax.spines["top"].set_visible(False) # Remove top spine
|
||||
ax.spines["right"].set_visible(False) # Remove right spine
|
||||
ax.spines["left"].set_visible(False) # Remove left spine
|
||||
ax.spines["bottom"].set_visible(False) # Remove bottom spine
|
||||
|
||||
plt.tight_layout() # Adjust layout
|
||||
plt.show()
|
||||
|
||||
# Save the figure as an image
|
||||
figure_path = f'etl/customers/{customer_key}/epc_distribution_plot.png'
|
||||
fig.savefig(figure_path, bbox_inches='tight')
|
||||
figure_path = f"etl/customers/{customer_key}/epc_distribution_plot.png"
|
||||
fig.savefig(figure_path, bbox_inches="tight")
|
||||
plt.close(fig) # Close the figure to free memory
|
||||
|
||||
return fig, figure_path
|
||||
|
||||
|
||||
def save_plot_to_image(figure, path='plot.png'):
|
||||
def save_plot_to_image(figure, path="plot.png"):
|
||||
"""
|
||||
Saves a matplotlib figure to an image file for insertion into PowerPoint.
|
||||
"""
|
||||
figure.savefig(path, bbox_inches='tight')
|
||||
figure.savefig(path, bbox_inches="tight")
|
||||
plt.close(figure)
|
||||
|
||||
|
||||
def save_figure_as_image(figure, filename='temp_plot.png'):
|
||||
def save_figure_as_image(figure, filename="temp_plot.png"):
|
||||
"""
|
||||
Saves a matplotlib figure to an image file.
|
||||
"""
|
||||
figure.savefig(filename, dpi=300)
|
||||
plt.close(figure) # Close the figure to prevent it from displaying in notebooks or Python environments
|
||||
plt.close(
|
||||
figure
|
||||
) # Close the figure to prevent it from displaying in notebooks or Python environments
|
||||
|
||||
|
||||
def add_commentary_with_bullets(slide, commentary, top_inches, left_inches=Inches(1), width_inches=Inches(8),
|
||||
height_inches=Inches(2)):
|
||||
def add_commentary_with_bullets(
|
||||
slide,
|
||||
commentary,
|
||||
top_inches,
|
||||
left_inches=Inches(1),
|
||||
width_inches=Inches(8),
|
||||
height_inches=Inches(2),
|
||||
):
|
||||
"""
|
||||
Adds commentary with bullet points to a slide.
|
||||
|
||||
|
|
@ -179,7 +236,9 @@ def add_commentary_with_bullets(slide, commentary, top_inches, left_inches=Inche
|
|||
:param width_inches: The width of the commentary text box.
|
||||
:param height_inches: The height of the commentary text box.
|
||||
"""
|
||||
txBox = slide.shapes.add_textbox(left_inches, top_inches, width_inches, height_inches)
|
||||
txBox = slide.shapes.add_textbox(
|
||||
left_inches, top_inches, width_inches, height_inches
|
||||
)
|
||||
tf = txBox.text_frame
|
||||
|
||||
# Configure text frame
|
||||
|
|
@ -192,7 +251,9 @@ def add_commentary_with_bullets(slide, commentary, top_inches, left_inches=Inche
|
|||
|
||||
for i, section in enumerate(sections):
|
||||
if i > 0:
|
||||
p = tf.add_paragraph() # Add a new paragraph for each section after the first
|
||||
p = (
|
||||
tf.add_paragraph()
|
||||
) # Add a new paragraph for each section after the first
|
||||
else:
|
||||
p = tf.paragraphs[0] # Use the first paragraph for the first section
|
||||
p.text = section
|
||||
|
|
@ -215,7 +276,9 @@ def add_slide_with_image(prs, title, img_path=None, commentary=None):
|
|||
# Determine the position of the commentary text box based on whether an image is included
|
||||
if img_path:
|
||||
# Add the image
|
||||
slide.shapes.add_picture(img_path, Inches(1), Inches(1.5), Inches(8), Inches(4.5))
|
||||
slide.shapes.add_picture(
|
||||
img_path, Inches(1), Inches(1.5), Inches(8), Inches(4.5)
|
||||
)
|
||||
# Position for commentary when image is present
|
||||
commentary_top = Inches(6)
|
||||
else:
|
||||
|
|
@ -237,16 +300,18 @@ def create_powerpoint(data, save_location):
|
|||
prs = Presentation()
|
||||
|
||||
for slide, slide_data in data.items():
|
||||
slide_figure_path = data[slide].get('image_path')
|
||||
text = data[slide].get('text')
|
||||
title = data[slide].get('title', "")
|
||||
slide_figure_path = data[slide].get("image_path")
|
||||
text = data[slide].get("text")
|
||||
title = data[slide].get("title", "")
|
||||
add_slide_with_image(prs, title, slide_figure_path, text)
|
||||
|
||||
# Save the presentation
|
||||
prs.save(save_location)
|
||||
|
||||
|
||||
def create_recommendations_summary(recommendations_df, properties_df, property_details_df, sap_target):
|
||||
def create_recommendations_summary(
|
||||
recommendations_df, properties_df, property_details_df, sap_target
|
||||
):
|
||||
# Aggregate the impact of the recommendations
|
||||
# We want:
|
||||
# Total number of sap points
|
||||
|
|
@ -254,40 +319,52 @@ def create_recommendations_summary(recommendations_df, properties_df, property_d
|
|||
# total bill savings
|
||||
# total cost
|
||||
# Total Co2 impact
|
||||
recommendations_summary = recommendations_df.groupby(["property_id"]).agg(
|
||||
total_sap_points=("sap_points", "sum"),
|
||||
total_valuation_impact=("property_valuation_increase", "sum"),
|
||||
total_bill_savings=("energy_cost_savings", "sum"),
|
||||
total_cost=("estimated_cost", "sum"),
|
||||
total_carbon=("co2_equivalent_savings", "sum"),
|
||||
adjusted_heat_demand=("adjusted_heat_demand", "sum")
|
||||
).reset_index()
|
||||
recommendations_summary = (
|
||||
recommendations_df.groupby(["property_id"])
|
||||
.agg(
|
||||
total_sap_points=("sap_points", "sum"),
|
||||
total_valuation_impact=("property_valuation_increase", "sum"),
|
||||
total_bill_savings=("energy_cost_savings", "sum"),
|
||||
total_cost=("estimated_cost", "sum"),
|
||||
total_carbon=("co2_equivalent_savings", "sum"),
|
||||
adjusted_heat_demand=("adjusted_heat_demand", "sum"),
|
||||
)
|
||||
.reset_index()
|
||||
)
|
||||
# Merge on current sap points, current CO2, current adjusted_heat_demand, current annual bill
|
||||
recommendations_summary = recommendations_summary.merge(
|
||||
properties_df[["id", "uprn", "current_sap_points"]].rename(columns={"id": "property_id"}), on="property_id",
|
||||
how="left"
|
||||
properties_df[["id", "uprn", "current_sap_points"]].rename(
|
||||
columns={"id": "property_id"}
|
||||
),
|
||||
on="property_id",
|
||||
how="left",
|
||||
)
|
||||
|
||||
recommendations_summary["expected_sap_points"] = (
|
||||
recommendations_summary["current_sap_points"] + recommendations_summary["total_sap_points"]
|
||||
recommendations_summary["current_sap_points"]
|
||||
+ recommendations_summary["total_sap_points"]
|
||||
)
|
||||
recommendations_summary["expected_epc_rating"] = recommendations_summary["expected_sap_points"].apply(
|
||||
lambda x: sap_to_epc(x)
|
||||
recommendations_summary["expected_epc_rating"] = recommendations_summary[
|
||||
"expected_sap_points"
|
||||
].apply(lambda x: sap_to_epc(x))
|
||||
recommendations_summary["sap_difference"] = (
|
||||
sap_target - recommendations_summary["expected_sap_points"]
|
||||
)
|
||||
recommendations_summary["sap_difference"] = sap_target - recommendations_summary["expected_sap_points"]
|
||||
|
||||
if property_details_df is not None:
|
||||
recommendations_summary = recommendations_summary.merge(
|
||||
property_details_df[["uprn", "co2_emissions", "adjusted_energy_consumption", "energy_bill"]].rename(
|
||||
property_details_df[
|
||||
["uprn", "co2_emissions", "adjusted_energy_consumption", "energy_bill"]
|
||||
].rename(
|
||||
columns={
|
||||
"id": "property_id",
|
||||
"co2_emissions": "current_co2",
|
||||
"adjusted_energy_consumption": "current_energy",
|
||||
"energy_bill": "current_energy_bill"
|
||||
"energy_bill": "current_energy_bill",
|
||||
}
|
||||
),
|
||||
on="uprn",
|
||||
how="left"
|
||||
how="left",
|
||||
)
|
||||
|
||||
return recommendations_summary
|
||||
|
|
|
|||
|
|
@ -1,3 +1,30 @@
|
|||
# ==============================================================================
|
||||
# TEMPLATE: Lambda Configuration with Optional S3 IAM Policy
|
||||
# ==============================================================================
|
||||
# Instructions:
|
||||
# 1. Replace "REPLACE ME" with your lambda name (e.g., "my-lambda-name")
|
||||
# 2. Add any additional environment variables as needed
|
||||
# 3. To attach S3 IAM policies from shared state:
|
||||
# - Uncomment the S3 policy attachment section below
|
||||
# - Update the policy_arn to match the output from shared/main.tf
|
||||
# - Available shared outputs (examples):
|
||||
# - data.terraform_remote_state.shared.outputs.condition_etl_s3_read_arn
|
||||
# - data.terraform_remote_state.shared.outputs.postcode_splitter_s3_read_arn
|
||||
# 4. To create a NEW S3 policy:
|
||||
# - Add a new module "lambda_s3_policy" in shared/main.tf using the
|
||||
# s3_iam_policy module (see examples in shared/main.tf)
|
||||
# - Then reference it here using data.terraform_remote_state.shared.outputs
|
||||
# ==============================================================================
|
||||
|
||||
data "terraform_remote_state" "shared" {
|
||||
backend = "s3"
|
||||
config = {
|
||||
bucket = "assessment-model-terraform-state"
|
||||
key = "env:/${var.stage}/terraform.tfstate"
|
||||
region = "eu-west-2"
|
||||
}
|
||||
}
|
||||
|
||||
module "lambda" {
|
||||
source = "../modules/lambda_with_sqs"
|
||||
|
||||
|
|
@ -12,3 +39,25 @@ module "lambda" {
|
|||
LOG_LEVEL = "info"
|
||||
}
|
||||
}
|
||||
|
||||
# ======================================================================
|
||||
# OPTIONAL: Attach S3 IAM policy to Lambda execution role
|
||||
# ======================================================================
|
||||
# Uncomment and configure the resource below to attach S3 permissions
|
||||
#
|
||||
# Example 1: Attach existing policy from shared state
|
||||
# resource "aws_iam_role_policy_attachment" "lambda_s3_policy" {
|
||||
# role = module.lambda.role_name
|
||||
# policy_arn = data.terraform_remote_state.shared.outputs.YOUR_POLICY_OUTPUT_NAME_arn
|
||||
# }
|
||||
#
|
||||
# Example 2: Attach multiple policies
|
||||
# resource "aws_iam_role_policy_attachment" "lambda_read_policy" {
|
||||
# role = module.lambda.role_name
|
||||
# policy_arn = data.terraform_remote_state.shared.outputs.postcode_splitter_s3_read_arn
|
||||
# }
|
||||
#
|
||||
# resource "aws_iam_role_policy_attachment" "lambda_write_policy" {
|
||||
# role = module.lambda.role_name
|
||||
# policy_arn = data.terraform_remote_state.shared.outputs.another_policy_arn
|
||||
# }
|
||||
|
|
|
|||
|
|
@ -1,3 +1,19 @@
|
|||
data "terraform_remote_state" "shared" {
|
||||
backend = "s3"
|
||||
config = {
|
||||
bucket = "assessment-model-terraform-state"
|
||||
key = "env:/${var.stage}/terraform.tfstate"
|
||||
region = "eu-west-2"
|
||||
}
|
||||
}
|
||||
data "aws_secretsmanager_secret_version" "db_credentials" {
|
||||
secret_id = "${var.stage}/assessment_model/db_credentials"
|
||||
}
|
||||
|
||||
locals {
|
||||
db_credentials = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string)
|
||||
}
|
||||
|
||||
module "address2uprn" {
|
||||
source = "../modules/lambda_with_sqs"
|
||||
|
||||
|
|
@ -6,9 +22,32 @@ module "address2uprn" {
|
|||
|
||||
image_uri = local.image_uri
|
||||
|
||||
|
||||
environment = {
|
||||
STAGE = var.stage
|
||||
LOG_LEVEL = "info"
|
||||
}
|
||||
environment = merge(
|
||||
{
|
||||
STAGE = var.stage
|
||||
LOG_LEVEL = "info"
|
||||
DB_USERNAME = local.db_credentials.db_assessment_model_username
|
||||
DB_PASSWORD = local.db_credentials.db_assessment_model_password
|
||||
GOOGLE_SOLAR_API_KEY = "test"
|
||||
SAP_PREDICTIONS_BUCKET = "test"
|
||||
CARBON_PREDICTIONS_BUCKET = "test"
|
||||
HEAT_PREDICTIONS_BUCKET = "test"
|
||||
HEATING_KWH_PREDICTIONS_BUCKET = "test"
|
||||
HOTWATER_KWH_PREDICTIONS_BUCKET = "test"
|
||||
API_KEY = "test"
|
||||
ENVIRONMENT = "test"
|
||||
SECRET_KEY = "test"
|
||||
PLAN_TRIGGER_BUCKET = "test"
|
||||
DATA_BUCKET = "test"
|
||||
ENGINE_SQS_URL = "test"
|
||||
ENERGY_ASSESSMENTS_BUCKET = "test"
|
||||
S3_BUCKET_NAME = data.terraform_remote_state.shared.outputs.retrofit_sap_data_bucket_name
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
# Attach S3 read policy to the Lambda execution role
|
||||
resource "aws_iam_role_policy_attachment" "address2uprn_read_and_write" {
|
||||
role = module.address2uprn.role_name
|
||||
policy_arn = data.terraform_remote_state.shared.outputs.address_2_uprn_s3_read_and_write_arn
|
||||
}
|
||||
14
infrastructure/terraform/lambda/address2UPRN/outputs.tf
Normal file
14
infrastructure/terraform/lambda/address2UPRN/outputs.tf
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
output "address2uprn_queue_url" {
|
||||
value = module.address2uprn.queue_url
|
||||
description = "URL of the address2UPRN SQS queue"
|
||||
}
|
||||
|
||||
output "address2uprn_queue_arn" {
|
||||
value = module.address2uprn.queue_arn
|
||||
description = "ARN of the address2UPRN SQS queue"
|
||||
}
|
||||
|
||||
output "address2uprn_lambda_arn" {
|
||||
value = module.address2uprn.lambda_arn
|
||||
description = "ARN of the address2UPRN Lambda function"
|
||||
}
|
||||
|
|
@ -23,7 +23,6 @@ module "lambda" {
|
|||
stage = var.stage
|
||||
|
||||
image_uri = local.image_uri
|
||||
timeout = 180
|
||||
|
||||
|
||||
environment = merge(
|
||||
|
|
|
|||
|
|
@ -9,3 +9,4 @@ output "queue_arn" {
|
|||
output "queue_url" {
|
||||
value = module.queue.queue_url
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,3 +1,30 @@
|
|||
data "terraform_remote_state" "shared" {
|
||||
backend = "s3"
|
||||
config = {
|
||||
bucket = "assessment-model-terraform-state"
|
||||
key = "env:/${var.stage}/terraform.tfstate"
|
||||
region = "eu-west-2"
|
||||
}
|
||||
}
|
||||
data "aws_secretsmanager_secret_version" "db_credentials" {
|
||||
secret_id = "${var.stage}/assessment_model/db_credentials"
|
||||
}
|
||||
|
||||
|
||||
locals {
|
||||
db_credentials = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string)
|
||||
}
|
||||
|
||||
# Reference the existing address2UPRN Lambda outputs from address2uprn state
|
||||
data "terraform_remote_state" "address2uprn" {
|
||||
backend = "s3"
|
||||
config = {
|
||||
bucket = "address2uprn-terraform-state"
|
||||
key = "env:/${var.stage}/terraform.tfstate"
|
||||
region = "eu-west-2"
|
||||
}
|
||||
}
|
||||
|
||||
module "lambda" {
|
||||
source = "../modules/lambda_with_sqs"
|
||||
|
||||
|
|
@ -7,8 +34,56 @@ module "lambda" {
|
|||
image_uri = local.image_uri
|
||||
|
||||
|
||||
environment = {
|
||||
STAGE = var.stage
|
||||
LOG_LEVEL = "info"
|
||||
}
|
||||
environment = merge(
|
||||
{
|
||||
STAGE = var.stage
|
||||
LOG_LEVEL = "info"
|
||||
DB_USERNAME = local.db_credentials.db_assessment_model_username
|
||||
DB_PASSWORD = local.db_credentials.db_assessment_model_password
|
||||
GOOGLE_SOLAR_API_KEY = "test"
|
||||
SAP_PREDICTIONS_BUCKET = "test"
|
||||
CARBON_PREDICTIONS_BUCKET = "test"
|
||||
HEAT_PREDICTIONS_BUCKET = "test"
|
||||
HEATING_KWH_PREDICTIONS_BUCKET = "test"
|
||||
HOTWATER_KWH_PREDICTIONS_BUCKET = "test"
|
||||
API_KEY = "test"
|
||||
ENVIRONMENT = "test"
|
||||
SECRET_KEY = "test"
|
||||
PLAN_TRIGGER_BUCKET = "test"
|
||||
DATA_BUCKET = "test"
|
||||
EPC_AUTH_TOKEN = "test"
|
||||
ENGINE_SQS_URL = "test"
|
||||
ENERGY_ASSESSMENTS_BUCKET = "test"
|
||||
ADDRESS2UPRN_QUEUE_URL = data.terraform_remote_state.address2uprn.outputs.address2uprn_queue_url
|
||||
S3_BUCKET_NAME = data.terraform_remote_state.shared.outputs.retrofit_sap_data_bucket_name
|
||||
},
|
||||
)
|
||||
}
|
||||
|
||||
# Attach S3 read policy to the Lambda execution role
|
||||
resource "aws_iam_role_policy_attachment" "postcode_splitter_s3_read" {
|
||||
role = module.lambda.role_name
|
||||
policy_arn = data.terraform_remote_state.shared.outputs.postcode_splitter_s3_read_arn
|
||||
}
|
||||
|
||||
# Create SQS send policy for address2UPRN queue
|
||||
module "postcode_splitter_sqs_policy" {
|
||||
source = "../../modules/general_iam_policy"
|
||||
|
||||
policy_name = "postcode-splitter-sqs-send-${var.stage}"
|
||||
policy_description = "Allow postcode-splitter Lambda to send messages to address2UPRN queue"
|
||||
|
||||
actions = [
|
||||
"sqs:SendMessage"
|
||||
]
|
||||
|
||||
resources = [
|
||||
data.terraform_remote_state.address2uprn.outputs.address2uprn_queue_arn
|
||||
]
|
||||
}
|
||||
|
||||
# Attach SQS policy to the Lambda execution role
|
||||
resource "aws_iam_role_policy_attachment" "postcode_splitter_sqs_send" {
|
||||
role = module.lambda.role_name
|
||||
policy_arn = module.postcode_splitter_sqs_policy.policy_arn
|
||||
}
|
||||
|
|
@ -24,3 +24,12 @@ locals {
|
|||
output "resolved_image_uri" {
|
||||
value = local.image_uri
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
21
infrastructure/terraform/modules/general_iam_policy/main.tf
Normal file
21
infrastructure/terraform/modules/general_iam_policy/main.tf
Normal file
|
|
@ -0,0 +1,21 @@
|
|||
# IAM Policy with dynamic actions and resources
|
||||
resource "aws_iam_policy" "policy" {
|
||||
name = var.policy_name
|
||||
description = var.policy_description
|
||||
|
||||
policy = jsonencode({
|
||||
Version = "2012-10-17"
|
||||
Statement = [
|
||||
merge(
|
||||
{
|
||||
Effect = "Allow"
|
||||
Action = var.actions
|
||||
Resource = var.resources
|
||||
},
|
||||
var.conditions != null ? { Condition = var.conditions } : {}
|
||||
)
|
||||
]
|
||||
})
|
||||
|
||||
tags = var.tags
|
||||
}
|
||||
|
|
@ -0,0 +1,9 @@
|
|||
output "policy_arn" {
|
||||
value = aws_iam_policy.policy.arn
|
||||
description = "ARN of the created IAM policy"
|
||||
}
|
||||
|
||||
output "policy_name" {
|
||||
value = aws_iam_policy.policy.name
|
||||
description = "Name of the created IAM policy"
|
||||
}
|
||||
|
|
@ -0,0 +1,32 @@
|
|||
variable "policy_name" {
|
||||
description = "Name of the IAM policy"
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "policy_description" {
|
||||
description = "Description of the IAM policy"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "actions" {
|
||||
description = "List of IAM actions allowed by this policy"
|
||||
type = list(string)
|
||||
}
|
||||
|
||||
variable "resources" {
|
||||
description = "List of AWS resources this policy applies to"
|
||||
type = list(string)
|
||||
}
|
||||
|
||||
variable "conditions" {
|
||||
description = "Optional IAM policy conditions"
|
||||
type = any
|
||||
default = null
|
||||
}
|
||||
|
||||
variable "tags" {
|
||||
description = "Tags to apply to the policy"
|
||||
type = map(string)
|
||||
default = {}
|
||||
}
|
||||
|
|
@ -19,19 +19,3 @@ resource "aws_iam_role_policy_attachment" "basic_logs" {
|
|||
policy_arn = "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole"
|
||||
}
|
||||
|
||||
resource "aws_iam_role_policy" "ecr_pull" {
|
||||
role = aws_iam_role.this.name
|
||||
|
||||
policy = jsonencode({
|
||||
Version = "2012-10-17"
|
||||
Statement = [{
|
||||
Effect = "Allow"
|
||||
Action = [
|
||||
"ecr:GetAuthorizationToken",
|
||||
"ecr:BatchGetImage",
|
||||
"ecr:GetDownloadUrlForLayer"
|
||||
]
|
||||
Resource = "*"
|
||||
}]
|
||||
})
|
||||
}
|
||||
|
|
|
|||
31
infrastructure/terraform/modules/s3_iam_policy/main.tf
Normal file
31
infrastructure/terraform/modules/s3_iam_policy/main.tf
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
# Dynamically build S3 resources list from bucket ARNs and resource paths
|
||||
locals {
|
||||
# Generate full resource ARNs by combining bucket ARNs with resource paths
|
||||
resources = flatten([
|
||||
for bucket_arn in var.bucket_arns : [
|
||||
for path in var.resource_paths : "${bucket_arn}${path}"
|
||||
]
|
||||
])
|
||||
}
|
||||
|
||||
# IAM Policy with dynamic actions and resources
|
||||
resource "aws_iam_policy" "s3_policy" {
|
||||
name = var.policy_name
|
||||
description = var.policy_description
|
||||
|
||||
policy = jsonencode({
|
||||
Version = "2012-10-17"
|
||||
Statement = [
|
||||
merge(
|
||||
{
|
||||
Effect = "Allow"
|
||||
Action = var.actions
|
||||
Resource = local.resources
|
||||
},
|
||||
var.conditions != null ? { Condition = var.conditions } : {}
|
||||
)
|
||||
]
|
||||
})
|
||||
|
||||
tags = var.tags
|
||||
}
|
||||
14
infrastructure/terraform/modules/s3_iam_policy/outputs.tf
Normal file
14
infrastructure/terraform/modules/s3_iam_policy/outputs.tf
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
output "policy_arn" {
|
||||
description = "ARN of the S3 IAM policy"
|
||||
value = aws_iam_policy.s3_policy.arn
|
||||
}
|
||||
|
||||
output "policy_name" {
|
||||
description = "Name of the S3 IAM policy"
|
||||
value = aws_iam_policy.s3_policy.name
|
||||
}
|
||||
|
||||
output "policy_id" {
|
||||
description = "ID of the S3 IAM policy"
|
||||
value = aws_iam_policy.s3_policy.id
|
||||
}
|
||||
42
infrastructure/terraform/modules/s3_iam_policy/variables.tf
Normal file
42
infrastructure/terraform/modules/s3_iam_policy/variables.tf
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
variable "policy_name" {
|
||||
description = "Name of the IAM policy"
|
||||
type = string
|
||||
}
|
||||
|
||||
variable "policy_description" {
|
||||
description = "Description of the IAM policy"
|
||||
type = string
|
||||
default = ""
|
||||
}
|
||||
|
||||
variable "bucket_arns" {
|
||||
description = "List of S3 bucket ARNs to grant access to"
|
||||
type = list(string)
|
||||
}
|
||||
|
||||
variable "actions" {
|
||||
description = "List of S3 actions to allow (e.g., ['s3:GetObject'], ['s3:PutObject'], ['s3:DeleteObject'])"
|
||||
type = list(string)
|
||||
default = ["s3:GetObject"]
|
||||
}
|
||||
|
||||
variable "resource_paths" {
|
||||
description = "List of resource paths within buckets (e.g., ['/*'] for all objects, ['/specific-prefix/*'] for specific prefix)"
|
||||
type = list(string)
|
||||
default = ["/*"]
|
||||
}
|
||||
|
||||
variable "conditions" {
|
||||
description = "Optional IAM policy conditions to apply to the statement"
|
||||
type = any
|
||||
default = null
|
||||
}
|
||||
|
||||
variable "tags" {
|
||||
description = "Tags to apply to the policy"
|
||||
type = map(string)
|
||||
default = {}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
|
@ -133,6 +133,11 @@ module "retrofit_sap_data" {
|
|||
allowed_origins = var.allowed_origins
|
||||
}
|
||||
|
||||
output "retrofit_sap_data_bucket_name" {
|
||||
value = module.retrofit_sap_data.bucket_name
|
||||
description = "Name of the retrofit SAP data bucket"
|
||||
}
|
||||
|
||||
module "retrofit_carbon_predictions" {
|
||||
source = "../modules/s3"
|
||||
bucketname = "retrofit-carbon-predictions-${var.stage}"
|
||||
|
|
@ -305,6 +310,21 @@ module "address2uprn_registry" {
|
|||
|
||||
}
|
||||
|
||||
# S3 policy for postcode splitter to read from retrofit data bucket
|
||||
module "address2uprn_s3_read_and_write" {
|
||||
source = "../modules/s3_iam_policy"
|
||||
|
||||
policy_name = "Address2UPRNReadandWriteS3"
|
||||
policy_description = "Allow address2uprn Lambda to read and write from retrofit-data bucket"
|
||||
bucket_arns = ["arn:aws:s3:::retrofit-data-${var.stage}"]
|
||||
actions = ["s3:GetObject", "s3:ListBucket", "s3:PutObject"]
|
||||
resource_paths = ["/*"]
|
||||
}
|
||||
|
||||
output "address_2_uprn_s3_read_and_write_arn" {
|
||||
value = module.address2uprn_s3_read_and_write.policy_arn
|
||||
}
|
||||
|
||||
################################################
|
||||
# Condition ETL – Lambda ECR
|
||||
################################################
|
||||
|
|
@ -321,6 +341,28 @@ module "condition_etl_registry" {
|
|||
|
||||
}
|
||||
|
||||
# Condition Data S3 Bucket to store initial data
|
||||
module "condition_data_bucket" {
|
||||
source = "../modules/s3"
|
||||
bucketname = "condition-data-${var.stage}"
|
||||
allowed_origins = var.allowed_origins
|
||||
}
|
||||
|
||||
module "condition_etl_s3_read" {
|
||||
source = "../modules/s3_iam_policy"
|
||||
|
||||
policy_name = "ConditionETLReadS3"
|
||||
policy_description = "Allow Lambda to read objects from condition-data-${var.stage}"
|
||||
bucket_arns = ["arn:aws:s3:::condition-data-${var.stage}"]
|
||||
actions = ["s3:GetObject"]
|
||||
resource_paths = ["/*"]
|
||||
}
|
||||
|
||||
output "condition_etl_s3_read_arn" {
|
||||
value = module.condition_etl_s3_read.policy_arn
|
||||
}
|
||||
|
||||
|
||||
################################################
|
||||
# Postcode Splitter – Lambda ECR
|
||||
################################################
|
||||
|
|
@ -337,30 +379,17 @@ module "postcode_splitter_registry" {
|
|||
|
||||
}
|
||||
|
||||
################################################
|
||||
# Conidition data – S3 bucket
|
||||
################################################
|
||||
module "condition_data_bucket" {
|
||||
source = "../modules/s3"
|
||||
bucketname = "condition-data-${var.stage}"
|
||||
allowed_origins = var.allowed_origins
|
||||
# S3 policy for postcode splitter to read from retrofit data bucket
|
||||
module "postcode_splitter_s3_read" {
|
||||
source = "../modules/s3_iam_policy"
|
||||
|
||||
policy_name = "PostcodeSplitterReadS3"
|
||||
policy_description = "Allow postcode splitter Lambda to read from retrofit-data bucket"
|
||||
bucket_arns = ["arn:aws:s3:::retrofit-data-${var.stage}"]
|
||||
actions = ["s3:GetObject", "s3:ListBucket", "s3:PutObject"]
|
||||
resource_paths = ["/*"]
|
||||
}
|
||||
|
||||
resource "aws_iam_policy" "condition_etl_s3_read" {
|
||||
name = "ConditionETLReadS3"
|
||||
description = "Allow Lambda to read objects from condition-data-${var.stage}"
|
||||
policy = jsonencode({
|
||||
Version = "2012-10-17"
|
||||
Statement = [
|
||||
{
|
||||
Effect = "Allow"
|
||||
Action = ["s3:GetObject"]
|
||||
Resource = "arn:aws:s3:::condition-data-${var.stage}/*"
|
||||
}
|
||||
]
|
||||
})
|
||||
}
|
||||
|
||||
output "condition_etl_s3_read_arn" {
|
||||
value = aws_iam_policy.condition_etl_s3_read.arn
|
||||
output "postcode_splitter_s3_read_arn" {
|
||||
value = module.postcode_splitter_s3_read.policy_arn
|
||||
}
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
[pytest]
|
||||
pythonpath = .
|
||||
addopts = --cov-report term-missing --cov=etl/epc --cov=recommendations --cov=backend --cov=etl/epc_clean --cov=etl/spatial
|
||||
testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests backend/address2UPRN/tests backend/onboarders/tests
|
||||
testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests backend/address2UPRN/tests backend/onboarders/tests backend/categorisation/tests
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ from sqlalchemy.orm import sessionmaker
|
|||
from backend.app.db.connection import db_engine, db_read_session
|
||||
from backend.app.db.models.recommendations import (
|
||||
Recommendation,
|
||||
Plan,
|
||||
PlanModel,
|
||||
PlanRecommendations,
|
||||
RecommendationMaterials,
|
||||
)
|
||||
|
|
@ -36,6 +36,8 @@ scenario_names = {
|
|||
1059: "EPC C - 10k budget",
|
||||
}
|
||||
|
||||
project_name = "manchester"
|
||||
|
||||
|
||||
def get_data(portfolio_id, scenario_ids):
|
||||
session = sessionmaker(bind=db_engine)()
|
||||
|
|
@ -73,12 +75,12 @@ def get_data(portfolio_id, scenario_ids):
|
|||
# --------------------
|
||||
latest_plans_subq = (
|
||||
session.query(
|
||||
Plan.scenario_id,
|
||||
Plan.property_id,
|
||||
func.max(Plan.created_at).label("latest_created_at"),
|
||||
PlanModel.scenario_id,
|
||||
PlanModel.property_id,
|
||||
func.max(PlanModel.created_at).label("latest_created_at"),
|
||||
)
|
||||
.filter(Plan.scenario_id.in_(scenario_ids))
|
||||
.group_by(Plan.scenario_id, Plan.property_id)
|
||||
.filter(PlanModel.scenario_id.in_(scenario_ids))
|
||||
.group_by(PlanModel.scenario_id, PlanModel.property_id)
|
||||
.subquery()
|
||||
)
|
||||
|
||||
|
|
@ -87,12 +89,12 @@ def get_data(portfolio_id, scenario_ids):
|
|||
# ).all()
|
||||
|
||||
plans_query = (
|
||||
session.query(Plan)
|
||||
session.query(PlanModel)
|
||||
.join(
|
||||
latest_plans_subq,
|
||||
(Plan.scenario_id == latest_plans_subq.c.scenario_id)
|
||||
& (Plan.property_id == latest_plans_subq.c.property_id)
|
||||
& (Plan.created_at == latest_plans_subq.c.latest_created_at),
|
||||
(PlanModel.scenario_id == latest_plans_subq.c.scenario_id)
|
||||
& (PlanModel.property_id == latest_plans_subq.c.property_id)
|
||||
& (PlanModel.created_at == latest_plans_subq.c.latest_created_at),
|
||||
)
|
||||
.all()
|
||||
)
|
||||
|
|
@ -108,7 +110,7 @@ def get_data(portfolio_id, scenario_ids):
|
|||
# )
|
||||
|
||||
plans_data = [
|
||||
{col.name: getattr(plan, col.name) for col in Plan.__table__.columns}
|
||||
{col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns}
|
||||
for plan in plans_query
|
||||
]
|
||||
|
||||
|
|
@ -118,12 +120,14 @@ def get_data(portfolio_id, scenario_ids):
|
|||
# Recommendations (NO materials yet)
|
||||
# --------------------
|
||||
recommendations_query = (
|
||||
session.query(Recommendation, Plan.scenario_id, PlanRecommendations.plan_id)
|
||||
session.query(
|
||||
Recommendation, PlanModel.scenario_id, PlanRecommendations.plan_id
|
||||
)
|
||||
.join(
|
||||
PlanRecommendations,
|
||||
Recommendation.id == PlanRecommendations.recommendation_id,
|
||||
)
|
||||
.join(Plan, Plan.id == PlanRecommendations.plan_id)
|
||||
.join(PlanModel, PlanModel.id == PlanRecommendations.plan_id)
|
||||
.filter(
|
||||
PlanRecommendations.plan_id.in_(plan_ids),
|
||||
Recommendation.default.is_(True),
|
||||
|
|
@ -230,7 +234,7 @@ for scenario_id in SCENARIOS:
|
|||
# Get recs for this scenario
|
||||
recommended_measures_df = recommendations_df[
|
||||
recommendations_df["scenario_id"] == scenario_id
|
||||
][["property_id", "measure_type", "estimated_cost", "default"]]
|
||||
][["property_id", "measure_type", "estimated_cost", "default"]]
|
||||
recommended_measures_df = recommended_measures_df[
|
||||
recommended_measures_df["default"]
|
||||
]
|
||||
|
|
@ -238,7 +242,7 @@ for scenario_id in SCENARIOS:
|
|||
|
||||
post_install_sap = recommendations_df[
|
||||
recommendations_df["scenario_id"] == scenario_id
|
||||
][["property_id", "default", "sap_points"]]
|
||||
][["property_id", "default", "sap_points"]]
|
||||
post_install_sap = post_install_sap[post_install_sap["default"]]
|
||||
# Sum up the sap points by property id
|
||||
post_install_sap = (
|
||||
|
|
@ -284,6 +288,8 @@ for scenario_id in SCENARIOS:
|
|||
"current_sap_points",
|
||||
"total_floor_area",
|
||||
"number_of_rooms",
|
||||
"lodgement_date",
|
||||
"is_expired",
|
||||
"id",
|
||||
]
|
||||
]
|
||||
|
|
@ -301,7 +307,58 @@ for scenario_id in SCENARIOS:
|
|||
)
|
||||
df["uprn"] = df["uprn"].astype(str)
|
||||
|
||||
relevant_plans = plans_df[plans_df["scenario_id"] == scenario_id]
|
||||
df2 = df.merge(
|
||||
relevant_plans[["property_id", "post_sap_points", "post_epc_rating"]],
|
||||
how="left",
|
||||
on="property_id",
|
||||
suffixes=("", "_plan"),
|
||||
)
|
||||
print(df2["predicted_post_works_epc"].value_counts())
|
||||
print(df2["post_epc_rating"].value_counts())
|
||||
|
||||
z = df2[
|
||||
(df2["predicted_post_works_epc"] != "D")
|
||||
& (df2["post_epc_rating"].astype(str) == "Epc.D")
|
||||
]
|
||||
|
||||
df2["predicted_post_works_epc"].value_counts()
|
||||
df2["post_epc_rating"].astype(str).value_counts()
|
||||
|
||||
df2[df2["total_retrofit_cost"] > 0].shape
|
||||
|
||||
getting_works = df[df["total_retrofit_cost"] > 0]
|
||||
getting_works["predicted_post_works_epc"].value_counts()
|
||||
|
||||
32565 / getting_works.shape[0]
|
||||
|
||||
df[df["predicted_post_works_sap"] == ""]
|
||||
|
||||
# Expected columns list
|
||||
expected_columns = [
|
||||
"suspended_floor_insulation",
|
||||
"solid_floor_insulation",
|
||||
"external_wall_insulation",
|
||||
"internal_wall_insulation",
|
||||
"cavity_wall_insulation",
|
||||
"loft_insulation",
|
||||
"flat_roof_insulation",
|
||||
"room_roof_insulation",
|
||||
"secondary_glazing",
|
||||
"double_glazing",
|
||||
"solar_pv",
|
||||
"high_heat_retention_storage_heaters",
|
||||
"air_source_heat_pump",
|
||||
"boiler_upgrade",
|
||||
"roomstat_programmer_trvs",
|
||||
"time_temperature_zone_control",
|
||||
]
|
||||
# Add missing columns with default values
|
||||
for col in expected_columns:
|
||||
if col not in df.columns:
|
||||
df[col] = ""
|
||||
|
||||
# Create excel to store to
|
||||
filename = f"{scenario_names[scenario_id]} - 20250113 final.xlsx"
|
||||
filename = f"{scenario_names[scenario_id]} - {project_name}.xlsx"
|
||||
with pd.ExcelWriter(filename) as writer:
|
||||
df.to_excel(writer, sheet_name="properties", index=False)
|
||||
|
|
|
|||
|
|
@ -1,7 +1,13 @@
|
|||
import logging
|
||||
from os import PathLike
|
||||
from typing import Optional, Union
|
||||
|
||||
|
||||
def setup_logger(log_file=None, level=logging.INFO, overwrite_handler=False):
|
||||
def setup_logger(
|
||||
log_file: Optional[Union[str, PathLike[str]]] = None,
|
||||
level: int = logging.INFO,
|
||||
overwrite_handler: bool = False,
|
||||
) -> logging.Logger:
|
||||
# Create a logger and set the logging level
|
||||
logger = logging.getLogger()
|
||||
logger.setLevel(level)
|
||||
|
|
|
|||
169
utils/s3.py
169
utils/s3.py
|
|
@ -3,12 +3,62 @@ import boto3
|
|||
import csv
|
||||
import pandas as pd
|
||||
from io import BytesIO, StringIO
|
||||
from urllib.parse import unquote
|
||||
from utils.logger import setup_logger
|
||||
from botocore.exceptions import NoCredentialsError, PartialCredentialsError
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
def parse_s3_uri(s3_uri: str) -> tuple[str, str]:
|
||||
"""
|
||||
Parse S3 URI to extract bucket and key.
|
||||
|
||||
Supports two formats:
|
||||
1. S3 URI format: s3://bucket/key
|
||||
2. AWS console URL format with query parameters
|
||||
"""
|
||||
logger.info("Parsing S3 URI")
|
||||
|
||||
try:
|
||||
# Check if it's an S3 URI format
|
||||
if s3_uri.startswith("s3://"):
|
||||
parts = s3_uri[5:].split("/", 1)
|
||||
if len(parts) < 2:
|
||||
raise ValueError("S3 URI must include both bucket and key")
|
||||
bucket = parts[0]
|
||||
key = parts[1]
|
||||
logger.info(f"Extracted bucket: {bucket}, key: {key}")
|
||||
return bucket, key
|
||||
|
||||
# Otherwise, treat as AWS console URL
|
||||
logger.info("Parsing as AWS console URL")
|
||||
|
||||
# Split base URL and query string
|
||||
if "?" not in s3_uri:
|
||||
raise ValueError("No query string found")
|
||||
|
||||
base, query = s3_uri.split("?", 1)
|
||||
|
||||
# Extract bucket from base URL
|
||||
if "/s3/object/" not in base:
|
||||
raise ValueError("No '/s3/object/' found in URL path")
|
||||
|
||||
path_parts = base.split("/s3/object/")
|
||||
bucket = path_parts[1]
|
||||
logger.info(f"Extracted bucket: {bucket}")
|
||||
|
||||
# Extract prefix from query parameters
|
||||
params = dict(item.split("=") for item in query.split("&") if "=" in item)
|
||||
key = unquote(params.get("prefix", ""))
|
||||
logger.info(f"Extracted key: {key}")
|
||||
|
||||
return bucket, key
|
||||
except Exception as e:
|
||||
logger.error(f"Error parsing S3 URI: {type(e).__name__}: {e}")
|
||||
raise ValueError(f"Could not parse S3 URI") from e
|
||||
|
||||
|
||||
def read_from_s3(bucket_name, s3_file_name):
|
||||
"""
|
||||
Read an object from s3. Decoding of the data is left for outside of this function
|
||||
|
|
@ -17,11 +67,11 @@ def read_from_s3(bucket_name, s3_file_name):
|
|||
:param s3_file_name: The file name to use for the saved data in S3
|
||||
"""
|
||||
# Initialize a session using Amazon S3
|
||||
s3 = boto3.resource('s3')
|
||||
s3 = boto3.resource("s3")
|
||||
|
||||
# Get the MessagePack data from S3
|
||||
obj = s3.Object(bucket_name, s3_file_name)
|
||||
data = obj.get()['Body'].read()
|
||||
data = obj.get()["Body"].read()
|
||||
|
||||
return data
|
||||
|
||||
|
|
@ -36,7 +86,7 @@ def save_data_to_s3(data, bucket_name, s3_file_name):
|
|||
"""
|
||||
# Ensure you have AWS credentials set up - either via environment variables, AWS CLI, or IAM roles
|
||||
try:
|
||||
s3 = boto3.client('s3')
|
||||
s3 = boto3.client("s3")
|
||||
except NoCredentialsError:
|
||||
print("Credentials not available.")
|
||||
return
|
||||
|
|
@ -46,12 +96,12 @@ def save_data_to_s3(data, bucket_name, s3_file_name):
|
|||
|
||||
try:
|
||||
s3.put_object(Bucket=bucket_name, Key=s3_file_name, Body=data)
|
||||
print(f'Successfully uploaded data to {bucket_name}/{s3_file_name}')
|
||||
print(f"Successfully uploaded data to {bucket_name}/{s3_file_name}")
|
||||
except Exception as e:
|
||||
print(f'Failed to upload data to {bucket_name}/{s3_file_name}: {str(e)}')
|
||||
print(f"Failed to upload data to {bucket_name}/{s3_file_name}: {str(e)}")
|
||||
|
||||
|
||||
def read_io_from_s3(bucket_name, file_key):
|
||||
def read_io_from_s3(bucket_name: str, file_key: str) -> BytesIO:
|
||||
"""
|
||||
Read a file from S3 into a BytesIO object. This can be used by other methods to parse the response
|
||||
|
||||
|
|
@ -61,13 +111,13 @@ def read_io_from_s3(bucket_name, file_key):
|
|||
:param file_key: The file name of the shapefile in S3
|
||||
:return: Io file to be parsed by another method
|
||||
"""
|
||||
client = boto3.client('s3')
|
||||
client = boto3.client("s3")
|
||||
|
||||
# Get the Parquet file from S3
|
||||
response = client.get_object(Bucket=bucket_name, Key=file_key)
|
||||
|
||||
# Read the file into an io object
|
||||
buffer = BytesIO(response['Body'].read())
|
||||
buffer = BytesIO(response["Body"].read())
|
||||
|
||||
return buffer
|
||||
|
||||
|
|
@ -86,7 +136,7 @@ def save_dataframe_to_s3_parquet(df, bucket_name, file_key):
|
|||
df.to_parquet(parquet_buffer)
|
||||
|
||||
# Create the boto3 client
|
||||
client = boto3.client('s3')
|
||||
client = boto3.client("s3")
|
||||
|
||||
# Upload the Parquet file to S3
|
||||
client.put_object(Bucket=bucket_name, Key=file_key, Body=parquet_buffer.getvalue())
|
||||
|
|
@ -102,15 +152,14 @@ def read_dataframe_from_s3_parquet(bucket_name, file_key):
|
|||
"""
|
||||
|
||||
if bucket_name is None:
|
||||
raise ValueError("Bucket name is None when trying to read dataframe from parquet")
|
||||
raise ValueError(
|
||||
"Bucket name is None when trying to read dataframe from parquet"
|
||||
)
|
||||
|
||||
if not file_key.endswith(".parquet"):
|
||||
raise ValueError("This file doesn't look like a parquet file")
|
||||
|
||||
parquet_buffer = read_io_from_s3(
|
||||
bucket_name=bucket_name,
|
||||
file_key=file_key
|
||||
)
|
||||
parquet_buffer = read_io_from_s3(bucket_name=bucket_name, file_key=file_key)
|
||||
|
||||
df = pd.read_parquet(parquet_buffer)
|
||||
|
||||
|
|
@ -130,7 +179,7 @@ def save_csv_to_s3(dataframe, bucket_name, file_name):
|
|||
bool: True if the file was successfully saved, False otherwise.
|
||||
"""
|
||||
# Initialize S3 client
|
||||
s3 = boto3.client('s3')
|
||||
s3 = boto3.client("s3")
|
||||
|
||||
# Create an in-memory text stream
|
||||
csv_buffer = StringIO()
|
||||
|
|
@ -159,7 +208,7 @@ def save_pickle_to_s3(data, bucket_name, s3_file_name):
|
|||
try:
|
||||
serialized_data = pickle.dumps(data)
|
||||
except Exception as e:
|
||||
print(f'Failed to serialize data: {str(e)}')
|
||||
print(f"Failed to serialize data: {str(e)}")
|
||||
return
|
||||
|
||||
# Use save_data_to_s3 function to upload the serialized data to S3
|
||||
|
|
@ -175,9 +224,9 @@ def read_pickle_from_s3(bucket_name, s3_file_name):
|
|||
:return: The data read from the pickle file
|
||||
"""
|
||||
try:
|
||||
s3 = boto3.client('s3')
|
||||
s3 = boto3.client("s3")
|
||||
s3_response = s3.get_object(Bucket=bucket_name, Key=s3_file_name)
|
||||
serialized_data = s3_response['Body'].read()
|
||||
serialized_data = s3_response["Body"].read()
|
||||
except NoCredentialsError:
|
||||
logger.errpr("Credentials not available.")
|
||||
return None
|
||||
|
|
@ -185,20 +234,24 @@ def read_pickle_from_s3(bucket_name, s3_file_name):
|
|||
logger.errpr("Incomplete credentials provided.")
|
||||
return None
|
||||
except Exception as e:
|
||||
logger.error(f'Failed to download data from {bucket_name}/{s3_file_name}: {str(e)}')
|
||||
logger.error(
|
||||
f"Failed to download data from {bucket_name}/{s3_file_name}: {str(e)}"
|
||||
)
|
||||
return None
|
||||
|
||||
# Deserialize data from pickle format
|
||||
try:
|
||||
data = pickle.loads(serialized_data)
|
||||
except Exception as e:
|
||||
logger.error(f'Failed to deserialize data: {str(e)}')
|
||||
logger.error(f"Failed to deserialize data: {str(e)}")
|
||||
return None
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def read_excel_from_s3(bucket_name, file_key, header_row, drop_all_na=True, sheet_name=None):
|
||||
def read_excel_from_s3(
|
||||
bucket_name, file_key, header_row, drop_all_na=True, sheet_name=None
|
||||
):
|
||||
"""
|
||||
Read an Excel file from an S3 bucket and return it as a pandas DataFrame.
|
||||
|
||||
|
|
@ -222,7 +275,7 @@ def read_excel_from_s3(bucket_name, file_key, header_row, drop_all_na=True, shee
|
|||
|
||||
# Drop columns where all values are NaN
|
||||
if drop_all_na:
|
||||
df.dropna(axis=1, how='all', inplace=True)
|
||||
df.dropna(axis=1, how="all", inplace=True)
|
||||
|
||||
# Reset index if the first column is just an index or entirely NaN
|
||||
df.reset_index(drop=True, inplace=True)
|
||||
|
|
@ -254,7 +307,7 @@ def save_excel_to_s3(df, bucket_name, file_key):
|
|||
|
||||
# Initialize a session using boto3
|
||||
session = boto3.session.Session()
|
||||
s3 = session.resource('s3')
|
||||
s3 = session.resource("s3")
|
||||
|
||||
# Upload the Excel file from the buffer to S3
|
||||
bucket = s3.Bucket(bucket_name)
|
||||
|
|
@ -264,17 +317,19 @@ def save_excel_to_s3(df, bucket_name, file_key):
|
|||
|
||||
|
||||
def read_csv_from_s3(bucket_name, filepath):
|
||||
logger.info(f"Reading CSV file from S3 bucket '{bucket_name}' with key '{filepath}'")
|
||||
s3 = boto3.client('s3')
|
||||
logger.info(
|
||||
f"Reading CSV file from S3 bucket '{bucket_name}' with key '{filepath}'"
|
||||
)
|
||||
s3 = boto3.client("s3")
|
||||
|
||||
# Get the object from s3
|
||||
s3_object = s3.get_object(Bucket=bucket_name, Key=filepath)
|
||||
|
||||
# Read the CSV body from the s3 object
|
||||
body = s3_object['Body'].read()
|
||||
body = s3_object["Body"].read()
|
||||
|
||||
# Use StringIO to create a file-like object from the string
|
||||
csv_data = StringIO(body.decode('utf-8'))
|
||||
csv_data = StringIO(body.decode("utf-8"))
|
||||
|
||||
# Use csv library to read it into a list of dictionaries
|
||||
reader = csv.DictReader(csv_data)
|
||||
|
|
@ -292,14 +347,16 @@ def list_files_in_s3_folder(bucket_name, folder_name):
|
|||
:return: A list of file keys in the specified S3 folder.
|
||||
"""
|
||||
try:
|
||||
s3 = boto3.client('s3')
|
||||
s3 = boto3.client("s3")
|
||||
response = s3.list_objects_v2(Bucket=bucket_name, Prefix=folder_name)
|
||||
|
||||
if 'Contents' not in response:
|
||||
logger.info(f"No files found in folder {folder_name} in bucket {bucket_name}.")
|
||||
if "Contents" not in response:
|
||||
logger.info(
|
||||
f"No files found in folder {folder_name} in bucket {bucket_name}."
|
||||
)
|
||||
return []
|
||||
|
||||
file_keys = [content['Key'] for content in response['Contents']]
|
||||
file_keys = [content["Key"] for content in response["Contents"]]
|
||||
return file_keys
|
||||
|
||||
except NoCredentialsError:
|
||||
|
|
@ -309,7 +366,9 @@ def list_files_in_s3_folder(bucket_name, folder_name):
|
|||
logger.error("Incomplete credentials provided.")
|
||||
return []
|
||||
except Exception as e:
|
||||
logger.error(f'Failed to list files in folder {folder_name} in bucket {bucket_name}: {str(e)}')
|
||||
logger.error(
|
||||
f"Failed to list files in folder {folder_name} in bucket {bucket_name}: {str(e)}"
|
||||
)
|
||||
return []
|
||||
|
||||
|
||||
|
|
@ -335,22 +394,30 @@ def list_files_and_subfolders_in_s3_folder(bucket_name, folder_name):
|
|||
"""
|
||||
|
||||
# For this function, folder_name should end with a forward slash
|
||||
if not folder_name.endswith('/'):
|
||||
folder_name += '/'
|
||||
if not folder_name.endswith("/"):
|
||||
folder_name += "/"
|
||||
|
||||
try:
|
||||
s3 = boto3.client('s3')
|
||||
response = s3.list_objects_v2(Bucket=bucket_name, Prefix=folder_name, Delimiter='/')
|
||||
s3 = boto3.client("s3")
|
||||
response = s3.list_objects_v2(
|
||||
Bucket=bucket_name, Prefix=folder_name, Delimiter="/"
|
||||
)
|
||||
|
||||
items = []
|
||||
|
||||
# Add files to the list
|
||||
if 'Contents' in response:
|
||||
items.extend([content['Key'] for content in response['Contents'] if content['Key'] != folder_name])
|
||||
if "Contents" in response:
|
||||
items.extend(
|
||||
[
|
||||
content["Key"]
|
||||
for content in response["Contents"]
|
||||
if content["Key"] != folder_name
|
||||
]
|
||||
)
|
||||
|
||||
# Add immediate subfolders to the list
|
||||
if 'CommonPrefixes' in response:
|
||||
items.extend([prefix['Prefix'] for prefix in response['CommonPrefixes']])
|
||||
if "CommonPrefixes" in response:
|
||||
items.extend([prefix["Prefix"] for prefix in response["CommonPrefixes"]])
|
||||
|
||||
return items
|
||||
|
||||
|
|
@ -361,7 +428,9 @@ def list_files_and_subfolders_in_s3_folder(bucket_name, folder_name):
|
|||
logger.error("Incomplete credentials provided.")
|
||||
return []
|
||||
except Exception as e:
|
||||
logger.error(f'Failed to list files and subfolders in folder {folder_name} in bucket {bucket_name}: {str(e)}')
|
||||
logger.error(
|
||||
f"Failed to list files and subfolders in folder {folder_name} in bucket {bucket_name}: {str(e)}"
|
||||
)
|
||||
return []
|
||||
|
||||
|
||||
|
|
@ -374,15 +443,21 @@ def list_xmls_in_s3_folder(bucket_name, folder_name):
|
|||
:return: A list of XML file keys in the specified S3 folder.
|
||||
"""
|
||||
try:
|
||||
s3 = boto3.client('s3')
|
||||
s3 = boto3.client("s3")
|
||||
response = s3.list_objects_v2(Bucket=bucket_name, Prefix=folder_name)
|
||||
|
||||
if 'Contents' not in response:
|
||||
logger.info(f"No files found in folder {folder_name} in bucket {bucket_name}.")
|
||||
if "Contents" not in response:
|
||||
logger.info(
|
||||
f"No files found in folder {folder_name} in bucket {bucket_name}."
|
||||
)
|
||||
return []
|
||||
|
||||
# Filter XML files
|
||||
xml_files = [content['Key'] for content in response['Contents'] if content['Key'].endswith('.xml')]
|
||||
xml_files = [
|
||||
content["Key"]
|
||||
for content in response["Contents"]
|
||||
if content["Key"].endswith(".xml")
|
||||
]
|
||||
return xml_files
|
||||
|
||||
except NoCredentialsError:
|
||||
|
|
@ -392,5 +467,7 @@ def list_xmls_in_s3_folder(bucket_name, folder_name):
|
|||
logger.error("Incomplete credentials provided.")
|
||||
return []
|
||||
except Exception as e:
|
||||
logger.error(f'Failed to list XML files in folder {folder_name} in bucket {bucket_name}: {str(e)}')
|
||||
logger.error(
|
||||
f"Failed to list XML files in folder {folder_name} in bucket {bucket_name}: {str(e)}"
|
||||
)
|
||||
return []
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue