diff --git a/.github/workflows/actions/actions/lambda-deploy/action.yml b/.github/workflows/actions/actions/lambda-deploy/action.yml new file mode 100644 index 00000000..3ca0fc8d --- /dev/null +++ b/.github/workflows/actions/actions/lambda-deploy/action.yml @@ -0,0 +1,86 @@ +name: "Build and Push Lambda Image to ECR" +description: "Reusable action for building and pushing lambda Docker image to ECR" + +inputs: + ecr_name: + description: "Lambda name / ECR repo name" + required: true + dockerfile_path: + description: "Path to Dockerfile" + required: true + ecr_tf_dir: + description: "Path to ECR terraform directory" + required: true + lambda_tf_dir: + description: "Path to Lambda terraform directory" + required: true + aws-access-key-id: + description: "AWS access key" + required: true + aws-secret-access-key: + description: "AWS secret key" + required: true + aws-region: + description: "AWS region" + required: true + git-sha: + description: "Git commit SHA" + required: true + git-ref: + description: "Git ref name" + required: true + +runs: + using: "composite" + steps: + - uses: actions/checkout@v4 + + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-access-key-id: ${{ inputs.aws-access-key-id }} + aws-secret-access-key: ${{ inputs.aws-secret-access-key }} + aws-region: ${{ inputs.aws-region }} + + - name: Log in to Amazon ECR + id: login-ecr + uses: aws-actions/amazon-ecr-login@v2 + + - name: Deploy ECR + uses: ./.github/workflows/actions/terraform-deploy + with: + working_directory: ${{ inputs.ecr_tf_dir }} + aws-access-key-id: ${{ inputs.aws-access-key-id }} + aws-secret-access-key: ${{ inputs.aws-secret-access-key }} + aws-region: ${{ inputs.aws-region }} + - name: Set Docker image tag + id: set_tag + shell: bash + run: | + SHORT_SHA=$(echo "${{ inputs.git-sha }}" | cut -c1-7) + BRANCH=$(echo "${{ inputs.git-ref }}" | tr '/' '-') + TAG="${BRANCH}-${SHORT_SHA}" + echo "IMAGE_TAG=${TAG}" >> $GITHUB_ENV + echo "tag=$TAG" >> $GITHUB_OUTPUT + + - name: Build and push Docker image + shell: bash + run: | + IMAGE_URI=${{ steps.login-ecr.outputs.registry }}/${{ inputs.ecr_name }}:${{ steps.set_tag.outputs.tag }} + echo "Building Docker image for ${{ inputs.ecr_name }}..." + docker build -t $IMAGE_URI -f ${{ inputs.dockerfile_path }} . + + echo "Pushing to ECR..." + docker push $IMAGE_URI + + - name: Deploy Lambda + uses: ./.github/workflows/actions/terraform-deploy + with: + working_directory: ${{ inputs.lambda_tf_dir }} + aws-access-key-id: ${{ inputs.aws-access-key-id }} + aws-secret-access-key: ${{ inputs.aws-secret-access-key }} + aws-region: ${{ inputs.aws-region }} + lambda-image-tag: ${{ steps.set_tag.outputs.tag }} + + + diff --git a/.github/workflows/actions/actions/terraform-deploy/action.yml b/.github/workflows/actions/actions/terraform-deploy/action.yml new file mode 100644 index 00000000..56133299 --- /dev/null +++ b/.github/workflows/actions/actions/terraform-deploy/action.yml @@ -0,0 +1,55 @@ +name: "Terraform Plan Shared Config" +description: "Plans shared Terraform config for Lambdas" + +inputs: + working_directory: + description: "Directory containing Terraform config" + required: true + aws-access-key-id: + description: "AWS access key" + required: true + aws-secret-access-key: + description: "AWS secret key" + required: true + aws-region: + description: "AWS region" + required: true + lambda-image-tag: + description: "Tag of the Lambda image (e.g., GitHub SHA)" + required: false + +runs: + using: "composite" + steps: + - uses: actions/checkout@v4 + + - name: Configure AWS credentials + uses: aws-actions/configure-aws-credentials@v4 + with: + aws-access-key-id: ${{ inputs.aws-access-key-id }} + aws-secret-access-key: ${{ inputs.aws-secret-access-key }} + aws-region: ${{ inputs.aws-region }} + + - name: Setup Terraform + uses: hashicorp/setup-terraform@v3 + + - name: Terraform Init + working-directory: ${{ inputs.working_directory }} + shell: bash + run: terraform init -reconfigure + + - name: Terraform Plan + working-directory: ${{ inputs.working_directory }} + shell: bash + run: | + if [ -n "${{ inputs.lambda-image-tag }}" ]; then + terraform plan -out=tfplan -var="lambda_image_tag=${{ inputs.lambda-image-tag }}" + else + terraform plan -out=tfplan + fi + + - name: Terraform Apply + working-directory: ${{ inputs.working_directory }} + shell: bash + run: terraform apply -auto-approve tfplan + diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index 7210a8e2..fe52a1e2 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -43,40 +43,16 @@ jobs: env: AWS_PROFILE: "DevAdmin" + # Deploy shared terrform things - name: Terraform Init - run: cd infrastructure/terraform && terraform init + run: cd infrastructure/terraform/shared && terraform init - name: Terraform Workspace run: | - # BRANCH_NAME=$(echo "${{ github.ref }}" | sed -e "s/^refs\/heads\///") - cd infrastructure/terraform - # terraform workspace select ${BRANCH_NAME} || terraform workspace new ${BRANCH_NAME} - # Until Khalim makes a different environment for us - terraform workspace select dev + cd infrastructure/terraform/shared + terraform workspace select dev || terraform workspace new dev - - name: Terraform Plan + - name: Terraform Plan (shared) run: | - BRANCH_NAME=$(echo "${{ github.ref }}" | sed -e "s/^refs\/heads\///") - cd infrastructure/terraform && terraform plan -var-file=dev.tfvars - - - name: Deploy to Dev - if: github.ref == 'refs/heads/dev' - run: echo "hello world" - # run: cd infrastructure/terraform && terraform apply -var-file=dev.tfvars -auto-approve - env: - name: dev - - # - name: Configure AWS credentials (ProdAdmin) - # uses: aws-actions/configure-aws-credentials@v1 - # with: - # aws-access-key-id: ${{ secrets.PROD_AWS_ACCESS_KEY_ID }} - # aws-secret-access-key: ${{ secrets.PROD_AWS_SECRET_ACCESS_KEY }} - # aws-region: eu-west-2 - # env: - # AWS_PROFILE: "ProdAdmin" - - # - name: Deploy to Prod - # if: github.ref == 'refs/heads/prod' - # run: cd infrastructure/terraform && terraform apply -var-file=prod.tfvars -auto-approve - # env: - # name: prod + cd infrastructure/terraform/shared + terraform plan -var-file=dev.tfvars \ No newline at end of file diff --git a/asset_list/app.py b/asset_list/app.py index 1c7200fd..8b3abefb 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -72,21 +72,21 @@ def app(): data_folder = "/workspaces/model/asset_list" data_filename = "assets.xlsx" sheet_name = "Sheet1" - postcode_column = "Post Code" + postcode_column = "POSTCODE" address1_column = None address1_method = "house_number_extraction" - fulladdress_column = "User Input" + fulladdress_column = "ADDRESS" address_cols_to_concat = None missing_postcodes_method = None landlord_year_built = None landlord_os_uprn = "UPRN" landlord_property_type = None - landlord_built_form = None + landlord_built_form = "BUILD FORM" landlord_wall_construction = None landlord_roof_construction = None landlord_heating_system = None landlord_existing_pv = None - landlord_property_id = "LLUPRN" + landlord_property_id = "UPRN" landlord_sap = None outcomes_filename = None outcomes_sheetname = None diff --git a/backend/address2UPRN/Dockerfile b/backend/address2UPRN/Dockerfile new file mode 100644 index 00000000..d7485a3f --- /dev/null +++ b/backend/address2UPRN/Dockerfile @@ -0,0 +1,7 @@ +FROM public.ecr.aws/lambda/python:3.10 + +# Copy function code +COPY app.py ${LAMBDA_TASK_ROOT} + +# Set the handler +CMD ["main.handler"] diff --git a/backend/address2UPRN/main.py b/backend/address2UPRN/main.py index 58b25d74..9d27a5ce 100644 --- a/backend/address2UPRN/main.py +++ b/backend/address2UPRN/main.py @@ -14,6 +14,9 @@ EPC_AUTH_TOKEN = os.getenv( "EPC_AUTH_TOKEN", ) +if EPC_AUTH_TOKEN is None: + raise RuntimeError("EPC_AUTH_TOKEN not defined in env") + import re from difflib import SequenceMatcher from typing import Set @@ -38,6 +41,34 @@ def levenshtein(a: str, b: str) -> float: def tokenise(s: str) -> Set[str]: return set(s.split()) + def extract_building_number(s: str) -> str | None: + """ + Extract the main building number (NOT flat/unit). + Assumes formats like: + - '42 moreton road' + - 'flat 3 42 moreton road' + """ + tokens = s.split() + + # remove flat/unit context + cleaned = [] + skip_next = False + for t in tokens: + if t in ("flat", "apt", "apartment", "unit"): + skip_next = True + continue + if skip_next: + skip_next = False + continue + cleaned.append(t) + + # first remaining number is building number + for t in cleaned: + if re.fullmatch(r"\d+[a-z]?", t): + return t + + return None + a_norm = normalise_address(a) b_norm = normalise_address(b) @@ -52,6 +83,13 @@ def levenshtein(a: str, b: str) -> float: if nums_a and nums_b and nums_a.isdisjoint(nums_b): return 0.0 + # 🔒 HARD GUARD: building number must match + bld_a = extract_building_number(a_norm) + bld_b = extract_building_number(b_norm) + + if bld_a and bld_b and bld_a != bld_b: + return 0.0 + # --- order-sensitive flat/building guard --- seq_a = extract_number_sequence(a_norm) seq_b = extract_number_sequence(b_norm) @@ -418,6 +456,10 @@ def run_all_test(): get_uprn("46 Oswald Street", "E5 0BT"), False ) # this one return "flat 1, in 1 semley gate" get_uprn_candidates(get_epc_data_with_postcode("e5 0bt"), "48 Oswald Street") + get_uprn_candidates( + get_epc_data_with_postcode("Cr2 7dl"), + "FLAT 3; 42 MORETON ROAD, SOUTH CROYDON, SURREY", + ) if __name__ == "__main__": @@ -511,6 +553,11 @@ if __name__ == "__main__": ) +def handler(event, context): + print("hello world") + return {"statusCode": 200, "body": "hello world"} + + # TO do function dispatcher, # get_uprn_candidates(get_epc_data_with_postcode("E9 5NH"),"Flat 1, 5 Semley Gate" and Flat 5, 1 Semley Gate) diff --git a/backend/address2UPRN/tests/test_data.csv b/backend/address2UPRN/tests/test_data.csv index 91bc49e1..ee23813b 100644 --- a/backend/address2UPRN/tests/test_data.csv +++ b/backend/address2UPRN/tests/test_data.csv @@ -115,11 +115,16 @@ FLAT 43 Goodstone Court,HA1 4FL,10070269095 8 Genteel House Samara Drive,UB1 1FJ,12189842 9 Genteel House Samara Drive,UB1 1FJ,12189843 10 Genteel House Samara Drive,UB1 1FJ,12189844 -1 ASH TREE HOUSE,SE5 0TE,10009803979 -3 ASH TREE HOUSE,SE5 0TE,10009803981 -5 ASH TREE HOUSE,SE5 0TE,10009803983 -8 ASH TREE HOUSE,SE5 0TE,10009803986 -12 ASH TREE HOUSE,SE5 0TE,10009803990 +1 ASH TREE HOUSE,SE5 0TE,None +"Flat 1 Ash Tree House, 2, Thompson Avenue",SE5 0TE,10009803979 +3 ASH TREE HOUSE,SE5 0TE,None +Flat 3 ASH TREE HOUSE,SE5 0TE,10009803981 +5 ASH TREE HOUSE,SE5 0TE,None +Flat 5 ASH TREE HOUSE,SE5 0TE,10009803983 +Flat 8 ASH TREE HOUSE,SE5 0TE,10009803986 +8 ASH TREE HOUSE,SE5 0TE,None +Flat 12 ASH TREE HOUSE,SE5 0TE,10009803990 +12 ASH TREE HOUSE,SE5 0TE,None FLAT 1 599 HARROW ROAD,W10 4RA,217113930 FLAT 2 599 HARROW ROAD,W10 4RA,217113931 FLAT 3 599 HARROW ROAD,W10 4RA,None @@ -332,7 +337,7 @@ FLAT B 158 LEAHURST ROAD,SE13 5NL,100021976974 138a Victoria Square,M4 5FA,77211289 139a Victoria Square,M4 5FA,77211290 140a Victoria Square,M4 5FA,77211291 -141a Victoria Square,M4 5FA,None +141a Victoria Square,M4 5FA,77211292 142a Victoria Square,M4 5FA,77211293 143a Victoria Square,M4 5FA,77211294 144a Victoria Square,M4 5FA,77211295 @@ -357,4 +362,5 @@ FLAT B 158 LEAHURST ROAD,SE13 5NL,100021976974 163a Victoria Square,M4 5FA,77211314 164a Victoria Square,M4 5FA,77211315 165a Victoria Square,M4 5FA,77211316 -166a Victoria Square,M4 5FA,None \ No newline at end of file +166a Victoria Square,M4 5FA,None +"FLAT 3; 42 MORETON ROAD, SOUTH CROYDON, SURREY",CR2 7DL,None \ No newline at end of file diff --git a/infrastructure/terraform/lamdas/backend.tf b/infrastructure/terraform/lamdas/backend.tf new file mode 100644 index 00000000..e69de29b diff --git a/infrastructure/terraform/lamdas/dev.tfvars b/infrastructure/terraform/lamdas/dev.tfvars new file mode 100644 index 00000000..e69de29b diff --git a/infrastructure/terraform/lamdas/main.tf b/infrastructure/terraform/lamdas/main.tf new file mode 100644 index 00000000..e69de29b diff --git a/infrastructure/terraform/lamdas/variables.tf b/infrastructure/terraform/lamdas/variables.tf new file mode 100644 index 00000000..e69de29b diff --git a/infrastructure/terraform/modules/lambda_with_sqs/main.tf b/infrastructure/terraform/modules/lambda_with_sqs/main.tf new file mode 100644 index 00000000..1b4e1847 --- /dev/null +++ b/infrastructure/terraform/modules/lambda_with_sqs/main.tf @@ -0,0 +1,23 @@ +resource "aws_sqs_queue" "this" { + name = "${var.name}-queue" + tags = var.tags +} + +resource "aws_lambda_function" "this" { + function_name = var.name + role = var.lambda_role_arn + + package_type = "Image" + image_uri = var.image_uri + + timeout = var.timeout + + tags = var.tags +} + +resource "aws_lambda_event_source_mapping" "this" { + event_source_arn = aws_sqs_queue.this.arn + function_name = aws_lambda_function.this.arn + + batch_size = var.sqs_batch_size +} diff --git a/infrastructure/terraform/modules/lambda_with_sqs/outputs.tf b/infrastructure/terraform/modules/lambda_with_sqs/outputs.tf new file mode 100644 index 00000000..dc755850 --- /dev/null +++ b/infrastructure/terraform/modules/lambda_with_sqs/outputs.tf @@ -0,0 +1,15 @@ +output "lambda_name" { + value = aws_lambda_function.this.function_name +} + +output "lambda_arn" { + value = aws_lambda_function.this.arn +} + +output "sqs_queue_url" { + value = aws_sqs_queue.this.url +} + +output "sqs_queue_arn" { + value = aws_sqs_queue.this.arn +} diff --git a/infrastructure/terraform/modules/lambda_with_sqs/variables.tf b/infrastructure/terraform/modules/lambda_with_sqs/variables.tf new file mode 100644 index 00000000..8ac24942 --- /dev/null +++ b/infrastructure/terraform/modules/lambda_with_sqs/variables.tf @@ -0,0 +1,32 @@ +variable "name" { + description = "Base name for lambda and related resources" + type = string +} + +variable "image_uri" { + description = "ECR image URI with tag" + type = string +} + +variable "lambda_role_arn" { + description = "IAM role ARN for Lambda execution" + type = string +} + +variable "timeout" { + description = "Lambda timeout in seconds" + type = number + default = 10 +} + +variable "sqs_batch_size" { + description = "Number of SQS messages per batch" + type = number + default = 1 +} + +variable "tags" { + description = "Tags to apply to resources" + type = map(string) + default = {} +} diff --git a/infrastructure/terraform/dev.tfvars b/infrastructure/terraform/shared/dev.tfvars similarity index 100% rename from infrastructure/terraform/dev.tfvars rename to infrastructure/terraform/shared/dev.tfvars diff --git a/infrastructure/terraform/main.tf b/infrastructure/terraform/shared/main.tf similarity index 100% rename from infrastructure/terraform/main.tf rename to infrastructure/terraform/shared/main.tf diff --git a/infrastructure/terraform/secrets.tf b/infrastructure/terraform/shared/secrets.tf similarity index 100% rename from infrastructure/terraform/secrets.tf rename to infrastructure/terraform/shared/secrets.tf diff --git a/infrastructure/terraform/variables.tf b/infrastructure/terraform/shared/variables.tf similarity index 100% rename from infrastructure/terraform/variables.tf rename to infrastructure/terraform/shared/variables.tf diff --git a/sfr/principal_pitch/2_export_data.py b/sfr/principal_pitch/2_export_data.py index f0fc5cd1..ae807654 100644 --- a/sfr/principal_pitch/2_export_data.py +++ b/sfr/principal_pitch/2_export_data.py @@ -7,20 +7,29 @@ import numpy as np from backend.app.utils import sap_to_epc from sqlalchemy.orm import sessionmaker from backend.app.db.connection import db_engine, db_read_session -from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations, RecommendationMaterials -from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel, PropertyDetailsSpatial +from backend.app.db.models.recommendations import ( + Recommendation, + Plan, + PlanRecommendations, + RecommendationMaterials, +) +from backend.app.db.models.portfolio import ( + PropertyModel, + PropertyDetailsEpcModel, + PropertyDetailsSpatial, +) from backend.app.db.functions.materials_functions import get_materials from collections import defaultdict from sqlalchemy import func # PORTFOLIO_ID = 206 # SCENARIOS = [389] -PORTFOLIO_ID = 485 # Peabody +PORTFOLIO_ID = 502 # Peabody SCENARIOS = [ - 970, + 986, ] scenario_names = { - 970: "EPC C - No solid floor, EQI, IWI", + 986: "EPC C", } @@ -31,22 +40,26 @@ def get_data(portfolio_id, scenario_ids): # -------------------- # Properties # -------------------- - properties_query = session.query( - PropertyModel, - PropertyDetailsEpcModel - ).join( - PropertyDetailsEpcModel, - PropertyModel.id == PropertyDetailsEpcModel.property_id - ).filter( - PropertyModel.portfolio_id == portfolio_id - ).all() + properties_query = ( + session.query(PropertyModel, PropertyDetailsEpcModel) + .join( + PropertyDetailsEpcModel, + PropertyModel.id == PropertyDetailsEpcModel.property_id, + ) + .filter(PropertyModel.portfolio_id == portfolio_id) + .all() + ) properties_data = [ { - **{col.name: getattr(p.PropertyModel, col.name) - for col in PropertyModel.__table__.columns}, - **{col.name: getattr(p.PropertyDetailsEpcModel, col.name) - for col in PropertyDetailsEpcModel.__table__.columns}, + **{ + col.name: getattr(p.PropertyModel, col.name) + for col in PropertyModel.__table__.columns + }, + **{ + col.name: getattr(p.PropertyDetailsEpcModel, col.name) + for col in PropertyDetailsEpcModel.__table__.columns + }, } for p in properties_query ] @@ -58,13 +71,10 @@ def get_data(portfolio_id, scenario_ids): session.query( Plan.scenario_id, Plan.property_id, - func.max(Plan.created_at).label("latest_created_at") + func.max(Plan.created_at).label("latest_created_at"), ) .filter(Plan.scenario_id.in_(scenario_ids)) - .group_by( - Plan.scenario_id, - Plan.property_id - ) + .group_by(Plan.scenario_id, Plan.property_id) .subquery() ) @@ -76,9 +86,9 @@ def get_data(portfolio_id, scenario_ids): session.query(Plan) .join( latest_plans_subq, - (Plan.scenario_id == latest_plans_subq.c.scenario_id) & - (Plan.property_id == latest_plans_subq.c.property_id) & - (Plan.created_at == latest_plans_subq.c.latest_created_at) + (Plan.scenario_id == latest_plans_subq.c.scenario_id) + & (Plan.property_id == latest_plans_subq.c.property_id) + & (Plan.created_at == latest_plans_subq.c.latest_created_at), ) .all() ) @@ -103,28 +113,29 @@ def get_data(portfolio_id, scenario_ids): # -------------------- # Recommendations (NO materials yet) # -------------------- - recommendations_query = session.query( - Recommendation, - Plan.scenario_id, - PlanRecommendations.plan_id - ).join( - PlanRecommendations, - Recommendation.id == PlanRecommendations.recommendation_id - ).join( - Plan, - Plan.id == PlanRecommendations.plan_id - ).filter( - PlanRecommendations.plan_id.in_(plan_ids), - Recommendation.default.is_(True), - Recommendation.already_installed.is_(False) - ).all() + recommendations_query = ( + session.query(Recommendation, Plan.scenario_id, PlanRecommendations.plan_id) + .join( + PlanRecommendations, + Recommendation.id == PlanRecommendations.recommendation_id, + ) + .join(Plan, Plan.id == PlanRecommendations.plan_id) + .filter( + PlanRecommendations.plan_id.in_(plan_ids), + Recommendation.default.is_(True), + Recommendation.already_installed.is_(False), + ) + .all() + ) recommendations_data = [ { - **{col.name: getattr(r.Recommendation, col.name) - for col in Recommendation.__table__.columns}, + **{ + col.name: getattr(r.Recommendation, col.name) + for col in Recommendation.__table__.columns + }, "scenario_id": r.scenario_id, - "materials": [] # placeholder + "materials": [], # placeholder } for r in recommendations_query ] @@ -134,23 +145,25 @@ def get_data(portfolio_id, scenario_ids): # -------------------- # Recommendation materials (SEPARATE QUERY) # -------------------- - materials_query = session.query( - RecommendationMaterials - ).filter( - RecommendationMaterials.recommendation_id.in_(recommendation_ids) - ).all() + materials_query = ( + session.query(RecommendationMaterials) + .filter(RecommendationMaterials.recommendation_id.in_(recommendation_ids)) + .all() + ) # Group materials by recommendation_id materials_by_recommendation = defaultdict(list) for m in materials_query: - materials_by_recommendation[m.recommendation_id].append({ - "material_id": m.material_id, - "depth": m.depth, - "quantity": m.quantity, - "quantity_unit": m.quantity_unit, - "estimated_cost": m.estimated_cost, - }) + materials_by_recommendation[m.recommendation_id].append( + { + "material_id": m.material_id, + "depth": m.depth, + "quantity": m.quantity, + "quantity_unit": m.quantity_unit, + "estimated_cost": m.estimated_cost, + } + ) # Attach materials safely (no filtering side effects) for r in recommendations_data: @@ -161,7 +174,9 @@ def get_data(portfolio_id, scenario_ids): return properties_data, plans_data, recommendations_data -properties_data, plans_data, recommendations_data = get_data(portfolio_id=PORTFOLIO_ID, scenario_ids=SCENARIOS) +properties_data, plans_data, recommendations_data = get_data( + portfolio_id=PORTFOLIO_ID, scenario_ids=SCENARIOS +) properties_df = pd.DataFrame(properties_data) plans_df = pd.DataFrame(plans_data) @@ -172,10 +187,8 @@ with db_read_session() as session: materials = pd.DataFrame(materials) -material_lookup = ( - materials - .set_index("id")[["type", "includes_battery"]] - .to_dict("index") +material_lookup = materials.set_index("id")[["type", "includes_battery"]].to_dict( + "index" ) @@ -189,14 +202,14 @@ def has_solar_with_battery(materials_list): return False -recommendations_df["has_solar_with_battery"] = ( - recommendations_df["materials"].apply(has_solar_with_battery) +recommendations_df["has_solar_with_battery"] = recommendations_df["materials"].apply( + has_solar_with_battery ) recommendations_df["measure_type"] = np.where( recommendations_df["has_solar_with_battery"] == True, recommendations_df["measure_type"] + "_with_battery", - recommendations_df["measure_type"] + recommendations_df["measure_type"], ) # Adjust material type to indicate if there is a battery included @@ -211,50 +224,67 @@ from utils.s3 import read_csv_from_s3, read_excel_from_s3 for scenario_id in SCENARIOS: # Get recs for this scenario - recommended_measures_df = recommendations_df[recommendations_df["scenario_id"] == scenario_id][ - ["property_id", "measure_type", "estimated_cost", "default"] + recommended_measures_df = recommendations_df[ + recommendations_df["scenario_id"] == scenario_id + ][["property_id", "measure_type", "estimated_cost", "default"]] + recommended_measures_df = recommended_measures_df[ + recommended_measures_df["default"] ] - recommended_measures_df = recommended_measures_df[recommended_measures_df["default"]] recommended_measures_df = recommended_measures_df.drop(columns=["default"]) - post_install_sap = recommendations_df[recommendations_df["scenario_id"] == scenario_id][ - ["property_id", "default", "sap_points"]] + post_install_sap = recommendations_df[ + recommendations_df["scenario_id"] == scenario_id + ][["property_id", "default", "sap_points"]] post_install_sap = post_install_sap[post_install_sap["default"]] # Sum up the sap points by property id - post_install_sap = post_install_sap.groupby(["property_id"])[["sap_points"]].sum().reset_index() + post_install_sap = ( + post_install_sap.groupby(["property_id"])[["sap_points"]].sum().reset_index() + ) # Find dupes by property id and measure type - dupes = recommended_measures_df.duplicated(subset=["property_id", "measure_type"], keep=False) + dupes = recommended_measures_df.duplicated( + subset=["property_id", "measure_type"], keep=False + ) dupe_df = recommended_measures_df[dupes] if dupe_df.shape: # Drop dupes - happened due to a funny bug recommended_measures_df = recommended_measures_df.drop_duplicates( - subset=["property_id", "measure_type"], keep='first' + subset=["property_id", "measure_type"], keep="first" ) recommendations_measures_pivot = recommended_measures_df.pivot( - index='property_id', - columns='measure_type', - values='estimated_cost' + index="property_id", columns="measure_type", values="estimated_cost" ) recommendations_measures_pivot = recommendations_measures_pivot.reset_index() # Total cost is the row sum, excluding the property_id column - recommendations_measures_pivot["total_retrofit_cost"] = recommendations_measures_pivot.drop( - columns=["property_id"] - ).sum(axis=1) + recommendations_measures_pivot["total_retrofit_cost"] = ( + recommendations_measures_pivot.drop(columns=["property_id"]).sum(axis=1) + ) - df = properties_df[ - [ - "landlord_property_id", "property_id", "uprn", "address", "postcode", "property_type", "walls", "roof", - "heating", "windows", "current_epc_rating", "current_sap_points", "total_floor_area", "number_of_rooms", - "id" + df = ( + properties_df[ + [ + "landlord_property_id", + "property_id", + "uprn", + "address", + "postcode", + "property_type", + "walls", + "roof", + "heating", + "windows", + "current_epc_rating", + "current_sap_points", + "total_floor_area", + "number_of_rooms", + "id", + ] ] - ].merge( - recommendations_measures_pivot, how="left", on="property_id" - ).merge( - post_install_sap, how="left", on="property_id" + .merge(recommendations_measures_pivot, how="left", on="property_id") + .merge(post_install_sap, how="left", on="property_id") ) # df = df.drop(columns=["property_id"]) @@ -262,21 +292,25 @@ for scenario_id in SCENARIOS: df["predicted_post_works_sap"] = df["current_sap_points"] + df["sap_points"] df["predicted_post_works_sap"] = df["predicted_post_works_sap"] - df["predicted_post_works_epc"] = df["predicted_post_works_sap"].apply(lambda x: sap_to_epc(x)) + df["predicted_post_works_epc"] = df["predicted_post_works_sap"].apply( + lambda x: sap_to_epc(x) + ) df["uprn"] = df["uprn"].astype(str) relevant_plans = plans_df[plans_df["scenario_id"] == scenario_id] df2 = df.merge( - relevant_plans[["property_id", "post_sap_points", "post_epc_rating"]], how="left", on="property_id", - suffixes=("", "_plan") + relevant_plans[["property_id", "post_sap_points", "post_epc_rating"]], + how="left", + on="property_id", + suffixes=("", "_plan"), ) print(df2["predicted_post_works_epc"].value_counts()) print(df2["post_epc_rating"].value_counts()) z = df2[ - (df2["predicted_post_works_epc"] != "D") & - (df2["post_epc_rating"].astype(str) == "Epc.D") - ] + (df2["predicted_post_works_epc"] != "D") + & (df2["post_epc_rating"].astype(str) == "Epc.D") + ] df2["predicted_post_works_epc"].value_counts() df2["post_epc_rating"].astype(str).value_counts() @@ -291,189 +325,6 @@ for scenario_id in SCENARIOS: df[df["predicted_post_works_sap"] == ""] # Create excel to store to -<<<<<<< HEAD - filename = (f"{scenario_names[scenario_id]} - 20250113 final.xlsx") + filename = f"{scenario_names[scenario_id]} - 20250113 final.xlsx" with pd.ExcelWriter(filename) as writer: df.to_excel(writer, sheet_name="properties", index=False) -======= - filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting " - f"Project/Final SAL/scenarios/{scenario_names[scenario_id]} - 20250114 final.xlsx") - with pd.ExcelWriter(filename) as writer: - df.to_excel(writer, sheet_name="properties", index=False) - - -# asset_list = pd.DataFrame(asset_list) -# asset_list = asset_list.rename( -# columns={ -# "postcode": "domna_postcode" -# } -# ) -# if "domna_full_address": -# # For Peabody -# asset_list["domna_full_address"] = asset_list["domna_address_1"] -# -# asset_list = asset_list[["domna_full_address", "domna_postcode", "epc_os_uprn", ]].copy() -# asset_list = asset_list.rename(columns={"epc_os_uprn": "uprn"}) -# asset_list["uprn"] = asset_list["uprn"].astype("Int64").astype(str) -# asset_list = asset_list.merge( -# df.drop(columns=["address", "postcode", "property_type", "total_floor_area"]), -# how="left", -# on="uprn" -# ) - - -# Get conservation area data from property details spatial. based on the UPRNs -def get_conservation_area_data(uprns): - session = sessionmaker(bind=db_engine)() - session.begin() - - # Query to get conservation area data - spatial_query = session.query( - PropertyDetailsSpatial - ).filter( - PropertyDetailsSpatial.uprn.in_(uprns) # Filter by UPRNs - ).all() - - # Transform spatial data to include all fields dynamically - spatial_data = [ - {col.name: getattr(spatial, col.name) for col in PropertyDetailsSpatial.__table__.columns} - for spatial in spatial_query - ] - - session.close() - return pd.DataFrame(spatial_data) - - -uprns = asset_list[ - ~pd.isna(asset_list["uprn"]) & (asset_list["uprn"] != "") - ]["uprn"].astype(int).unique().tolist() -conservation_area_data = get_conservation_area_data(uprns) -conservation_area_data["uprn"] = conservation_area_data["uprn"].astype(str) -asset_list = asset_list.merge( - conservation_area_data[["uprn", "conservation_status", "is_listed_building", "is_heritage_building"]], - how="left", - on="uprn" -) - -# For exporting -df.to_excel( - "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lincs Rural/EPC C -without floors proposed measures - " - "with ID.xlsx", - index=False -) -# asset_list.to_excel( -# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lincs Rural/epc_measures.xlsx", -# index=False -# ) - -condition_costs = pd.read_excel( - "/Users/khalimconn-kowlessar/Documents/hestia/sfr/Spring JV/Condition costs.xlsx", - sheet_name="Prices - Khalim", - header=35 -) -# Remove unnamed columns and reset index -condition_costs = condition_costs.loc[:, ~condition_costs.columns.str.contains('^Unnamed')] -condition_costs = condition_costs.reset_index(drop=True) - - -# We now estimate condition cost -def simulate_condition(asset_list, condition_costs): - """ - This function is for testing, and will simulate condition cost from 1-10 for each property to see what the - costing array looks like. - :param df: - :return: - """ - - condition_df = [] - for _, row in asset_list.iterrows(): - - n_bathrooms = row["bathrooms"] - - conditions = {} - for condition in reversed(range(1, 11)): - condition_cost = condition_costs[ - condition_costs["Condition"] == condition - ].drop(columns=["Condition"]).iloc[0] - - # Each cost is scaled by floor area - condition_cost = condition_cost * row["total_floor_area"] - condition_cost["Bathroom"] = condition_cost["Bathroom"] * n_bathrooms - - total_condition_cost = condition_cost.sum() - conditions["Condition " + str(condition)] = (total_condition_cost) - - condition_df.append( - { - "uprn": row["uprn"], - **conditions - } - ) - - condition_df = pd.DataFrame(condition_df) - - asset_list = asset_list.merge( - condition_df, - how="left", - on="uprn" - ) - - return asset_list - - -# asset_list = simulate_condition(asset_list, condition_costs) - -# We calculate the condition cost based on the condition -for _, row in asset_list.iterrows(): - - condition = row["condition_score"] - if condition in [None, ""]: - continue - condition = int(float(condition)) - - condition_cost = condition_costs[ - condition_costs["Condition"] == condition - ].drop(columns=["Condition"]).iloc[0] - - # Each cost is scaled by floor area - condition_cost = condition_cost * float(row["total_floor_area"]) - n_bathrooms = row["n_bathrooms"] - condition_cost["Bathroom"] = condition_cost["Bathroom"] * float(n_bathrooms) - - total_condition_cost = condition_cost.sum() - asset_list.loc[asset_list["uprn"] == row["uprn"], "domna_condition_cost"] = total_condition_cost - -# Store output -asset_list.to_excel( - "/Users/khalimconn-kowlessar/Documents/hestia/sfr/Spring JV/20250624_portfolio_retrofit_packages.xlsx", - index=False -) - -condition_cost_comparison = asset_list[ - ["condition_score", "decoration_sum_min ", "decoration_sum_max", "domna_condition_cost"] -] - -# Testing -plans_df.head() - -example = pd.read_excel( - "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final " - "SAL/scenarios/EPC C - no solid floor, no EWI or IWI, ashp 3.0 - 20250114 final.xlsx" -) - -plans_df2 = plans_df.merge( - properties_df[["property_id", "landlord_property_id"]], - left_on="property_id", - right_on="property_id", - how="left" -) - -plans_df2 = plans_df2[plans_df2["scenario_id"] == 909] - -dupes = plans_df2[plans_df2["property_id"].duplicated()] - -# merge on plans -example = example.merge( - plans_df, how="left", -) ->>>>>>> 3874da6177cbcc37f7a488bec0a06e387906653c