From 68a95d02965ce78045118a51d6522f391c03fc39 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Thu, 5 Feb 2026 17:46:23 +0000 Subject: [PATCH 001/340] merged peters code --- .devcontainer/asset_list/requirements.txt | 2 +- .devcontainer/backend/requirements.txt | 2 +- asset_list/app.py | 53 ++++------------------- backend/address2UPRN/main.py | 13 ++++-- backend/address2UPRN/script.py | 15 ++++--- backend/app/requirements/requirements.txt | 2 +- sfr/principal_pitch/2_export_data.py | 6 +-- 7 files changed, 34 insertions(+), 59 deletions(-) diff --git a/.devcontainer/asset_list/requirements.txt b/.devcontainer/asset_list/requirements.txt index fe536a81..28730ed5 100644 --- a/.devcontainer/asset_list/requirements.txt +++ b/.devcontainer/asset_list/requirements.txt @@ -7,7 +7,7 @@ mangum==0.19.0 # AWS boto3==1.35.44 # Data -openpyxl==3.1.2 +openpyxl==3.1.5 # Basic pytz uvicorn[standard] diff --git a/.devcontainer/backend/requirements.txt b/.devcontainer/backend/requirements.txt index 9562aa6a..9814c8d4 100644 --- a/.devcontainer/backend/requirements.txt +++ b/.devcontainer/backend/requirements.txt @@ -9,7 +9,7 @@ mangum==0.19.0 # AWS boto3==1.35.44 # Data -openpyxl==3.1.2 +openpyxl==3.1.5 # Basic pytz uvicorn[standard] diff --git a/asset_list/app.py b/asset_list/app.py index b46254f9..9bb0c1f4 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -69,61 +69,24 @@ def app(): Property UPRN """ - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Hackney" - data_filename = "Domna SHF Wave 3 (3).xlsx" - sheet_name = "Domna Wave 3" - postcode_column = "Postcode" - address1_column = "Address 1" - address1_method = None - fulladdress_column = None - address_cols_to_concat = ["Address 1"] - missing_postcodes_method = None - landlord_year_built = "Construction Years" - landlord_os_uprn = "UPRN" - landlord_property_type = "Type" - landlord_built_form = "Attachment" - landlord_wall_construction = "Wall type" - landlord_roof_construction = None - landlord_heating_system = None - landlord_existing_pv = None - landlord_property_id = "Row ID" - landlord_sap = None - outcomes_filename = None - outcomes_sheetname = None - outcomes_postcode = None - outcomes_houseno = None - outcomes_id = None - outcomes_address = None - master_filepaths = [] - master_id_colnames = [] - master_to_asset_list_filepath = None - phase = False - ecosurv_landlords = None - asset_list_header = 0 - landlord_block_reference = None - - # Peabody data for cleaning - data_folder = ( - "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting " - "Project/data_validation" - ) - data_filename = "to_standardise_uprns.xlsx" + data_folder = "/workspaces/model/asset_list/" + data_filename = "assets.xlsx" sheet_name = "Sheet1" postcode_column = "Postcode" - address1_column = None - address1_method = "house_number_extraction" - fulladdress_column = "Address" - address_cols_to_concat = None + address1_column = "junte found address" + address1_method = None + fulladdress_column = None + address_cols_to_concat = ["junte found address"] missing_postcodes_method = None landlord_year_built = None - landlord_os_uprn = None + landlord_os_uprn = "juntes uprn" landlord_property_type = None landlord_built_form = None landlord_wall_construction = None landlord_roof_construction = None landlord_heating_system = None landlord_existing_pv = None - landlord_property_id = "LLUPRN" + landlord_property_id = "landlordid" landlord_sap = None outcomes_filename = None outcomes_sheetname = None diff --git a/backend/address2UPRN/main.py b/backend/address2UPRN/main.py index ba386e0a..5f4fed74 100644 --- a/backend/address2UPRN/main.py +++ b/backend/address2UPRN/main.py @@ -12,6 +12,7 @@ import re EPC_AUTH_TOKEN = os.getenv( "EPC_AUTH_TOKEN", + "a2Nvbm5rb3dsZXNzYXJAZ21haWwuY29tOjY5MGJiMWM0NmIyOGI5ZDUxYzAxMzQzYzNiZGNlZGJjZDNmODQwMzA=", ) if EPC_AUTH_TOKEN is None: @@ -300,7 +301,9 @@ def get_uprn_candidates( ) -def get_uprn(user_inputed_address: str, postcode: str, return_address=False): +def get_uprn( + user_inputed_address: str, postcode: str, return_address=False, return_EPC=False +): """ Return uprn (str) Return False if failed to find a sensible matching epc @@ -331,8 +334,9 @@ def get_uprn(user_inputed_address: str, postcode: str, return_address=False): address = top_rank_df["address"].values[0] lexiscore = float(top_rank_df["lexiscore"].values[0]) + epc = top_rank_df["current-energy-rating"].values[0] - logger.info(f"Address found to be: {address}, with lexiscore {lexiscore}") + # logger.info(f"Address found to be: {address}, with lexiscore {lexiscore}") # Safe to return the agreed UPRN found_uprn = top_rank_df.iloc[0]["uprn"] @@ -340,7 +344,10 @@ def get_uprn(user_inputed_address: str, postcode: str, return_address=False): return None if return_address: - return found_uprn, address + if return_EPC is False: + return found_uprn, address + else: + return found_uprn, address, epc return found_uprn diff --git a/backend/address2UPRN/script.py b/backend/address2UPRN/script.py index a71b5827..0582450b 100644 --- a/backend/address2UPRN/script.py +++ b/backend/address2UPRN/script.py @@ -5,12 +5,15 @@ from backend.address2UPRN.main import get_uprn # Enable tqdm for pandas tqdm.pandas() -df = pd.read_excel("address2.xlsx") +file_name = "brentwood.xlsx" + +df = pd.read_excel(file_name) def extract_uprn(row): - print(row["User Input"], row["Postcode"]) - result = get_uprn(row["User Input"], row["Postcode"], return_address=True) + user_input = "Address" + postcode = "Postcode" + result = get_uprn(row[user_input], row[postcode], return_address=True) if result is None: return pd.Series([None, None]) @@ -19,6 +22,8 @@ def extract_uprn(row): return pd.Series([uprn, found_address]) -df[["juntes uprn", "junte found address"]] = df.progress_apply(extract_uprn, axis=1) +df[["juntes uprn", "junte found address", "junte found epc"]] = df.progress_apply( + extract_uprn, axis=1 +) -df.to_excel("outputs2.xlsx", index=False) +df.to_excel(f"{file_name}_outputs.xlsx", index=False) diff --git a/backend/app/requirements/requirements.txt b/backend/app/requirements/requirements.txt index 3124034e..9fdbfe4c 100644 --- a/backend/app/requirements/requirements.txt +++ b/backend/app/requirements/requirements.txt @@ -10,7 +10,7 @@ mangum==0.19.0 # AWS boto3==1.35.44 # Data -openpyxl==3.1.2 +openpyxl==3.1.5 # Basic pytz sqlmodel \ No newline at end of file diff --git a/sfr/principal_pitch/2_export_data.py b/sfr/principal_pitch/2_export_data.py index a65509d5..4e8cd157 100644 --- a/sfr/principal_pitch/2_export_data.py +++ b/sfr/principal_pitch/2_export_data.py @@ -28,12 +28,12 @@ from sqlalchemy import func # PORTFOLIO_ID = 206 # SCENARIOS = [389] -PORTFOLIO_ID = 524 +PORTFOLIO_ID = 506 SCENARIOS = [ - 1009, + 987, ] scenario_names = { - 1009: "EPC C; Most Economic", + 987: "EPC C", } From d29ccecefb20c2cf15d44efa67c9a1e5fb5cb94f Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Thu, 5 Feb 2026 17:54:10 +0000 Subject: [PATCH 002/340] more logs --- .github/workflows/deploy_terraform.yml | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index f8718119..61ab586a 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -10,13 +10,23 @@ jobs: runs-on: ubuntu-latest outputs: stage: ${{ steps.set-stage.outputs.stage }} - + secrets: + AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} + AWS_REGION: ${{ secrets.DEV_AWS_REGION }} + DEV_DB_HOST: ${{ secrets.DEV_DB_HOST }} steps: - name: Determine stage from branch id: set-stage shell: bash run: | + echo $AWS_ACCESS_KEY_ID + echo $AWS_SECRET_ACCESS_KEY + echo $AWS_REGION + echo $DEV_DB_HOST + env + BRANCH="${GITHUB_REF_NAME}" if [[ "$BRANCH" == "prod" ]]; then From 09905cf68170b5c97c1d927c9ebc5c30f3e3bdec Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Thu, 5 Feb 2026 17:55:24 +0000 Subject: [PATCH 003/340] more logs --- .github/workflows/deploy_terraform.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index 61ab586a..963160ae 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -24,6 +24,7 @@ jobs: echo $AWS_SECRET_ACCESS_KEY echo $AWS_REGION echo $DEV_DB_HOST + echo " dev db host${{ secrets.DEV_DB_HOST }}"" env From f986f85cfade72ea68fd23bb88fbd2621f2869ce Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Thu, 5 Feb 2026 17:56:22 +0000 Subject: [PATCH 004/340] m ore logs --- .github/workflows/deploy_terraform.yml | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index 963160ae..4f941462 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -8,34 +8,30 @@ on: jobs: determine_stage: runs-on: ubuntu-latest + outputs: stage: ${{ steps.set-stage.outputs.stage }} - secrets: + + env: AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} AWS_REGION: ${{ secrets.DEV_AWS_REGION }} DEV_DB_HOST: ${{ secrets.DEV_DB_HOST }} + steps: - name: Determine stage from branch id: set-stage shell: bash run: | - echo $AWS_ACCESS_KEY_ID - echo $AWS_SECRET_ACCESS_KEY - echo $AWS_REGION - echo $DEV_DB_HOST - echo " dev db host${{ secrets.DEV_DB_HOST }}"" - - env + echo "AWS_ACCESS_KEY_ID is set? ${AWS_ACCESS_KEY_ID:+yes}" + echo "AWS_SECRET_ACCESS_KEY is set? ${AWS_SECRET_ACCESS_KEY:+yes}" + echo "AWS_REGION=$AWS_REGION" + echo "DEV_DB_HOST=$DEV_DB_HOST" BRANCH="${GITHUB_REF_NAME}" if [[ "$BRANCH" == "prod" ]]; then echo "stage=prod" >> "$GITHUB_OUTPUT" - - elif [[ "$BRANCH" == "dev" ]]; then - echo "stage=dev" >> "$GITHUB_OUTPUT" - else echo "stage=dev" >> "$GITHUB_OUTPUT" fi From 7c8a3858e79862d5db8fe8c1c482784d4cf9fb8d Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Thu, 5 Feb 2026 18:03:35 +0000 Subject: [PATCH 005/340] DEV DB_HSOT --- .github/workflows/_build_image.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/_build_image.yml b/.github/workflows/_build_image.yml index fce856b6..8b0d74ef 100644 --- a/.github/workflows/_build_image.yml +++ b/.github/workflows/_build_image.yml @@ -40,6 +40,8 @@ on: jobs: build: runs-on: ubuntu-latest + env: + DEV_DB_HOST: ${{ secrets.DEV_DB_HOST }} outputs: image_digest: ${{ steps.digest.outputs.image_digest }} From 18396d94944d4ec130e20af340de561aeb2baa23 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Fri, 6 Feb 2026 15:45:25 +0000 Subject: [PATCH 006/340] temporary script built --- .devcontainer/asset_list/devcontainer.json | 3 ++- .devcontainer/backend/devcontainer.json | 3 ++- asset_list/app.py | 14 ++++++------- backend/address2UPRN/main.py | 17 +++++++++++++-- backend/address2UPRN/script.py | 24 +++++++++++++++------- sfr/principal_pitch/2_export_data.py | 10 +++++---- 6 files changed, 49 insertions(+), 22 deletions(-) diff --git a/.devcontainer/asset_list/devcontainer.json b/.devcontainer/asset_list/devcontainer.json index 4834d559..7c597859 100644 --- a/.devcontainer/asset_list/devcontainer.json +++ b/.devcontainer/asset_list/devcontainer.json @@ -22,7 +22,8 @@ "jgclark.vscode-todo-highlight", "corentinartaud.pdfpreview", "ms-python.vscode-python-envs", - "ms-python.black-formatter" + "ms-python.black-formatter", + "GrapeCity.gc-excelviewer" ], "settings": { "files.defaultWorkspace": "/workspaces/model", diff --git a/.devcontainer/backend/devcontainer.json b/.devcontainer/backend/devcontainer.json index c672b1bf..377adf1e 100644 --- a/.devcontainer/backend/devcontainer.json +++ b/.devcontainer/backend/devcontainer.json @@ -22,7 +22,8 @@ "corentinartaud.pdfpreview", "ms-python.vscode-python-envs", "ms-python.black-formatter", - "waderyan.gitblame" + "waderyan.gitblame", + "GrapeCity.gc-excelviewer" ], "settings": { "files.defaultWorkspace": "/workspaces/model", diff --git a/asset_list/app.py b/asset_list/app.py index 9bb0c1f4..da4eb6bb 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -70,23 +70,23 @@ def app(): """ data_folder = "/workspaces/model/asset_list/" - data_filename = "assets.xlsx" - sheet_name = "Sheet1" - postcode_column = "Postcode" - address1_column = "junte found address" + data_filename = "manchester.xlsx" + sheet_name = "PW0099 - Property List" + postcode_column = "post Code" + address1_column = "address" address1_method = None fulladdress_column = None - address_cols_to_concat = ["junte found address"] + address_cols_to_concat = ["address"] missing_postcodes_method = None landlord_year_built = None - landlord_os_uprn = "juntes uprn" + landlord_os_uprn = None landlord_property_type = None landlord_built_form = None landlord_wall_construction = None landlord_roof_construction = None landlord_heating_system = None landlord_existing_pv = None - landlord_property_id = "landlordid" + landlord_property_id = "UHTprop Ref" landlord_sap = None outcomes_filename = None outcomes_sheetname = None diff --git a/backend/address2UPRN/main.py b/backend/address2UPRN/main.py index 5f4fed74..1b3a6c8a 100644 --- a/backend/address2UPRN/main.py +++ b/backend/address2UPRN/main.py @@ -302,7 +302,11 @@ def get_uprn_candidates( def get_uprn( - user_inputed_address: str, postcode: str, return_address=False, return_EPC=False + user_inputed_address: str, + postcode: str, + return_address=False, + return_EPC=False, + return_score=True, ): """ Return uprn (str) @@ -335,6 +339,7 @@ def get_uprn( address = top_rank_df["address"].values[0] lexiscore = float(top_rank_df["lexiscore"].values[0]) epc = top_rank_df["current-energy-rating"].values[0] + score = float(top_rank_df["lexiscore"].values[0]) # logger.info(f"Address found to be: {address}, with lexiscore {lexiscore}") # Safe to return the agreed UPRN @@ -347,7 +352,15 @@ def get_uprn( if return_EPC is False: return found_uprn, address else: - return found_uprn, address, epc + if return_score is False: + return found_uprn, address, epc + else: + return ( + found_uprn, + address, + epc, + score, + ) return found_uprn diff --git a/backend/address2UPRN/script.py b/backend/address2UPRN/script.py index 0582450b..59855dbc 100644 --- a/backend/address2UPRN/script.py +++ b/backend/address2UPRN/script.py @@ -5,7 +5,7 @@ from backend.address2UPRN.main import get_uprn # Enable tqdm for pandas tqdm.pandas() -file_name = "brentwood.xlsx" +file_name = "forhousing.xlsx" df = pd.read_excel(file_name) @@ -13,17 +13,27 @@ df = pd.read_excel(file_name) def extract_uprn(row): user_input = "Address" postcode = "Postcode" - result = get_uprn(row[user_input], row[postcode], return_address=True) + result = get_uprn( + row[user_input], + row[postcode], + return_address=True, + return_EPC=True, + return_score=True, + ) if result is None: - return pd.Series([None, None]) + return pd.Series([None, None, None, None]) - uprn, found_address = result - return pd.Series([uprn, found_address]) + uprn, found_address, epc, score = result + return pd.Series([uprn, found_address, epc, score]) -df[["juntes uprn", "junte found address", "junte found epc"]] = df.progress_apply( - extract_uprn, axis=1 +df[["juntes uprn", "junte found address", "junte found epc", "junte score"]] = ( + df.progress_apply(extract_uprn, axis=1) ) df.to_excel(f"{file_name}_outputs.xlsx", index=False) + +# TODO: add lexiscore +# TODO: run it +# TODO: give it to danny diff --git a/sfr/principal_pitch/2_export_data.py b/sfr/principal_pitch/2_export_data.py index 4e8cd157..1841cf3f 100644 --- a/sfr/principal_pitch/2_export_data.py +++ b/sfr/principal_pitch/2_export_data.py @@ -28,14 +28,16 @@ from sqlalchemy import func # PORTFOLIO_ID = 206 # SCENARIOS = [389] -PORTFOLIO_ID = 506 +PORTFOLIO_ID = 544 SCENARIOS = [ - 987, + 1027, ] scenario_names = { - 987: "EPC C", + 1027: "EPC C", } +project_name = "manchester" + def get_data(portfolio_id, scenario_ids): session = sessionmaker(bind=db_engine)() @@ -329,6 +331,6 @@ for scenario_id in SCENARIOS: df[df["predicted_post_works_sap"] == ""] # Create excel to store to - filename = f"{scenario_names[scenario_id]} - 20250113 final.xlsx" + filename = f"{scenario_names[scenario_id]} - {project_name}.xlsx" with pd.ExcelWriter(filename) as writer: df.to_excel(writer, sheet_name="properties", index=False) From 47fce5f3f8afce2f1b59b25b9c81b19901f72ea0 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 9 Feb 2026 11:35:00 +0000 Subject: [PATCH 007/340] added postcode splittelr handler code --- .devcontainer/asset_list/devcontainer.json | 3 ++- .devcontainer/backend/devcontainer.json | 3 ++- backend/postcode_splitter/handler/Dockerfile | 6 ++++++ 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/.devcontainer/asset_list/devcontainer.json b/.devcontainer/asset_list/devcontainer.json index 7c597859..945dcd88 100644 --- a/.devcontainer/asset_list/devcontainer.json +++ b/.devcontainer/asset_list/devcontainer.json @@ -23,7 +23,8 @@ "corentinartaud.pdfpreview", "ms-python.vscode-python-envs", "ms-python.black-formatter", - "GrapeCity.gc-excelviewer" + "GrapeCity.gc-excelviewer", + "jakobhoeg.vscode-pokemon" ], "settings": { "files.defaultWorkspace": "/workspaces/model", diff --git a/.devcontainer/backend/devcontainer.json b/.devcontainer/backend/devcontainer.json index 377adf1e..5d728dcd 100644 --- a/.devcontainer/backend/devcontainer.json +++ b/.devcontainer/backend/devcontainer.json @@ -23,7 +23,8 @@ "ms-python.vscode-python-envs", "ms-python.black-formatter", "waderyan.gitblame", - "GrapeCity.gc-excelviewer" + "GrapeCity.gc-excelviewer", + "jakobhoeg.vscode-pokemon" ], "settings": { "files.defaultWorkspace": "/workspaces/model", diff --git a/backend/postcode_splitter/handler/Dockerfile b/backend/postcode_splitter/handler/Dockerfile index 7c1a7989..4c002f1d 100644 --- a/backend/postcode_splitter/handler/Dockerfile +++ b/backend/postcode_splitter/handler/Dockerfile @@ -3,6 +3,12 @@ FROM public.ecr.aws/lambda/python:3.10 # Set working directory (Lambda task root) WORKDIR /var/task +COPY backend/postcode_splitter/handler/requirements.txt + +RUN pip install --no-cache-dir -r requirements.txt + +COPY utils/ utils/ +COPY backend/postcode_splitter/main.py . # ----------------------------- # Lambda handler # ----------------------------- From 53367bcb980aaa13b18c05a0f281d51ff6499c34 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 9 Feb 2026 11:43:01 +0000 Subject: [PATCH 008/340] docker build was wrong --- backend/postcode_splitter/handler/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/postcode_splitter/handler/Dockerfile b/backend/postcode_splitter/handler/Dockerfile index 4c002f1d..3f77f38f 100644 --- a/backend/postcode_splitter/handler/Dockerfile +++ b/backend/postcode_splitter/handler/Dockerfile @@ -3,7 +3,7 @@ FROM public.ecr.aws/lambda/python:3.10 # Set working directory (Lambda task root) WORKDIR /var/task -COPY backend/postcode_splitter/handler/requirements.txt +COPY backend/postcode_splitter/handler/requirements.txt . RUN pip install --no-cache-dir -r requirements.txt From 277588e629413e848e8d8776025ee55ac7447283 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 9 Feb 2026 11:49:49 +0000 Subject: [PATCH 009/340] check out manual button --- .github/workflows/_deploy_lambda.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/workflows/_deploy_lambda.yml b/.github/workflows/_deploy_lambda.yml index bff106c5..be7ac95b 100644 --- a/.github/workflows/_deploy_lambda.yml +++ b/.github/workflows/_deploy_lambda.yml @@ -86,6 +86,13 @@ jobs: -var="image_digest=${{ inputs.image_digest }}" \ -out=lambdaplan + - name: Manual Approval + uses: trstringer/manual-approval@v1 + with: + secret: ${{ github.TOKEN }} + approvers: ${{ github.repository_owner }} + issue-title: "Approve Terraform Apply for ${{ inputs.lambda_name }} (${{ inputs.stage }})" + - name: Terraform Apply working-directory: ${{ inputs.lambda_path }} run: terraform apply -auto-approve lambdaplan From 00ea86500687dddb51614b51611b7315b6645802 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 9 Feb 2026 11:58:20 +0000 Subject: [PATCH 010/340] check out manual button --- .github/workflows/_deploy_lambda.yml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/workflows/_deploy_lambda.yml b/.github/workflows/_deploy_lambda.yml index be7ac95b..24db77c5 100644 --- a/.github/workflows/_deploy_lambda.yml +++ b/.github/workflows/_deploy_lambda.yml @@ -86,12 +86,13 @@ jobs: -var="image_digest=${{ inputs.image_digest }}" \ -out=lambdaplan - - name: Manual Approval + - name: Wait for Approval uses: trstringer/manual-approval@v1 with: - secret: ${{ github.TOKEN }} - approvers: ${{ github.repository_owner }} - issue-title: "Approve Terraform Apply for ${{ inputs.lambda_name }} (${{ inputs.stage }})" + secret: ${{ secrets.GITHUB_TOKEN }} + approvers: ${{ github.actor }} + issue-title: "Click to approve Terraform Apply for ${{ inputs.lambda_name }} (${{ inputs.stage }})" + issue-body: "Press approve to proceed with Terraform Apply" - name: Terraform Apply working-directory: ${{ inputs.lambda_path }} From 3a2abca7472dae4f673194c38b8f44cf22bac79f Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 9 Feb 2026 12:05:28 +0000 Subject: [PATCH 011/340] check out manual button --- .github/workflows/_deploy_lambda.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/_deploy_lambda.yml b/.github/workflows/_deploy_lambda.yml index 24db77c5..02d95525 100644 --- a/.github/workflows/_deploy_lambda.yml +++ b/.github/workflows/_deploy_lambda.yml @@ -1,5 +1,9 @@ name: Deploy Lambda (Terraform) +permissions: + contents: write + issues: write + on: workflow_call: inputs: From 969084c649b64097d30911b0e6b96616f9ae65de Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 9 Feb 2026 12:11:27 +0000 Subject: [PATCH 012/340] check out manual button --- .github/workflows/_deploy_lambda.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/.github/workflows/_deploy_lambda.yml b/.github/workflows/_deploy_lambda.yml index 02d95525..24db77c5 100644 --- a/.github/workflows/_deploy_lambda.yml +++ b/.github/workflows/_deploy_lambda.yml @@ -1,9 +1,5 @@ name: Deploy Lambda (Terraform) -permissions: - contents: write - issues: write - on: workflow_call: inputs: From e6d994e0b0249a44fb512859ef1a9f63f536d0c1 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 9 Feb 2026 12:16:52 +0000 Subject: [PATCH 013/340] developers --- .github/workflows/_deploy_lambda.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/_deploy_lambda.yml b/.github/workflows/_deploy_lambda.yml index 24db77c5..8d399cde 100644 --- a/.github/workflows/_deploy_lambda.yml +++ b/.github/workflows/_deploy_lambda.yml @@ -90,7 +90,7 @@ jobs: uses: trstringer/manual-approval@v1 with: secret: ${{ secrets.GITHUB_TOKEN }} - approvers: ${{ github.actor }} + approvers: developers issue-title: "Click to approve Terraform Apply for ${{ inputs.lambda_name }} (${{ inputs.stage }})" issue-body: "Press approve to proceed with Terraform Apply" From ffbb6212822662aeb352095a0026f1d927370d9a Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 9 Feb 2026 12:26:59 +0000 Subject: [PATCH 014/340] made terraform apply work --- .github/workflows/_deploy_lambda.yml | 17 +++++++++-------- .github/workflows/deploy_terraform.yml | 2 ++ 2 files changed, 11 insertions(+), 8 deletions(-) diff --git a/.github/workflows/_deploy_lambda.yml b/.github/workflows/_deploy_lambda.yml index 8d399cde..d3a9f79a 100644 --- a/.github/workflows/_deploy_lambda.yml +++ b/.github/workflows/_deploy_lambda.yml @@ -23,6 +23,14 @@ on: required: true type: string + terraform_apply: + required: false + type: choice + default: 'false' + options: + - 'true' + - 'false' + secrets: AWS_ACCESS_KEY_ID: required: true @@ -86,14 +94,7 @@ jobs: -var="image_digest=${{ inputs.image_digest }}" \ -out=lambdaplan - - name: Wait for Approval - uses: trstringer/manual-approval@v1 - with: - secret: ${{ secrets.GITHUB_TOKEN }} - approvers: developers - issue-title: "Click to approve Terraform Apply for ${{ inputs.lambda_name }} (${{ inputs.stage }})" - issue-body: "Press approve to proceed with Terraform Apply" - - name: Terraform Apply + if: inputs.terraform_apply == 'true' || inputs.stage == 'dev' || inputs.stage == 'main' working-directory: ${{ inputs.lambda_path }} run: terraform apply -auto-approve lambdaplan diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index 4f941462..1356b341 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -133,6 +133,8 @@ jobs: stage: ${{ needs.determine_stage.outputs.stage }} ecr_repo: postcode_splitter-${{ needs.determine_stage.outputs.stage }} image_digest: ${{ needs.postcodeSplitter_image.outputs.image_digest }} + # This should not be deployed in production!!!! + terraform_apply: 'true' secrets: AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} From 50018934907014d979b33773f8515bb136d57bc2 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 9 Feb 2026 12:27:53 +0000 Subject: [PATCH 015/340] terraform apply as a string --- .github/workflows/_deploy_lambda.yml | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/.github/workflows/_deploy_lambda.yml b/.github/workflows/_deploy_lambda.yml index d3a9f79a..b3ca4583 100644 --- a/.github/workflows/_deploy_lambda.yml +++ b/.github/workflows/_deploy_lambda.yml @@ -25,11 +25,8 @@ on: terraform_apply: required: false - type: choice + type: string default: 'false' - options: - - 'true' - - 'false' secrets: AWS_ACCESS_KEY_ID: From 2881ecd2879d637ad9f5b544229a69521a5834d2 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 9 Feb 2026 12:35:18 +0000 Subject: [PATCH 016/340] terraform apply based on branch name --- .github/workflows/_deploy_lambda.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/_deploy_lambda.yml b/.github/workflows/_deploy_lambda.yml index b3ca4583..9bd686aa 100644 --- a/.github/workflows/_deploy_lambda.yml +++ b/.github/workflows/_deploy_lambda.yml @@ -27,6 +27,7 @@ on: required: false type: string default: 'false' + # can only be 'true' or 'false' secrets: AWS_ACCESS_KEY_ID: @@ -92,6 +93,6 @@ jobs: -out=lambdaplan - name: Terraform Apply - if: inputs.terraform_apply == 'true' || inputs.stage == 'dev' || inputs.stage == 'main' + if: inputs.terraform_apply == 'true' || github.ref == 'refs/heads/dev' || github.ref == 'refs/heads/main' working-directory: ${{ inputs.lambda_path }} run: terraform apply -auto-approve lambdaplan From 555544fc2da2e24923044bd6719f720225c53de0 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 9 Feb 2026 13:04:37 +0000 Subject: [PATCH 017/340] added requirements txt file --- backend/postcode_splitter/handler/requirements.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/backend/postcode_splitter/handler/requirements.txt b/backend/postcode_splitter/handler/requirements.txt index e69de29b..f6618d2b 100644 --- a/backend/postcode_splitter/handler/requirements.txt +++ b/backend/postcode_splitter/handler/requirements.txt @@ -0,0 +1,5 @@ +pandas>=1.3.0 +requests>=2.28.0 +tqdm>=4.64.0 +epc-api>=0.1.0 +openpyxl>=3.8.0 From 14dbc802c2644792ec8fe2b3df5c6d58bd881929 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 9 Feb 2026 13:58:45 +0000 Subject: [PATCH 018/340] postcode spliter --- backend/address2UPRN/handler/Dockerfile | 4 +++- backend/address2UPRN/handler/requirements.txt | 7 +++++-- backend/postcode_splitter/handler/Dockerfile | 8 ++++---- backend/postcode_splitter/handler/requirements.txt | 11 ++++++----- 4 files changed, 18 insertions(+), 12 deletions(-) diff --git a/backend/address2UPRN/handler/Dockerfile b/backend/address2UPRN/handler/Dockerfile index 3f7567d3..5ccb5590 100644 --- a/backend/address2UPRN/handler/Dockerfile +++ b/backend/address2UPRN/handler/Dockerfile @@ -1,4 +1,5 @@ -FROM public.ecr.aws/lambda/python:3.10 +# FROM public.ecr.aws/lambda/python:3.10 +# FROM python:3.11.10-bullseye # This is not going to be permenant - but until we solve for env variables in live prod ENV EPC_AUTH_TOKEN=a2Nvbm5rb3dsZXNzYXJAZ21haWwuY29tOjY5MGJiMWM0NmIyOGI5ZDUxYzAxMzQzYzNiZGNlZGJjZDNmODQwMzAg @@ -11,6 +12,7 @@ WORKDIR /var/task # ----------------------------- COPY backend/address2UPRN/handler/requirements.txt . + # Install dependencies into Lambda runtime RUN pip install --no-cache-dir -r requirements.txt diff --git a/backend/address2UPRN/handler/requirements.txt b/backend/address2UPRN/handler/requirements.txt index bc753841..eba2c846 100644 --- a/backend/address2UPRN/handler/requirements.txt +++ b/backend/address2UPRN/handler/requirements.txt @@ -1,3 +1,6 @@ -epc-api-python==1.0.2 +pandas==2.2.2 +numpy<2.0 +requests tqdm -pandas \ No newline at end of file +openpyxl +epc-api-python==1.0.2 diff --git a/backend/postcode_splitter/handler/Dockerfile b/backend/postcode_splitter/handler/Dockerfile index 3f77f38f..f8196297 100644 --- a/backend/postcode_splitter/handler/Dockerfile +++ b/backend/postcode_splitter/handler/Dockerfile @@ -1,4 +1,4 @@ -FROM public.ecr.aws/lambda/python:3.10 +FROM public.ecr.aws/lambda/python:3.11 # Set working directory (Lambda task root) WORKDIR /var/task @@ -9,7 +9,7 @@ RUN pip install --no-cache-dir -r requirements.txt COPY utils/ utils/ COPY backend/postcode_splitter/main.py . -# ----------------------------- -# Lambda handler -# ----------------------------- +# # ----------------------------- +# # Lambda handler +# # ----------------------------- CMD ["main.handler"] diff --git a/backend/postcode_splitter/handler/requirements.txt b/backend/postcode_splitter/handler/requirements.txt index f6618d2b..8adea4e7 100644 --- a/backend/postcode_splitter/handler/requirements.txt +++ b/backend/postcode_splitter/handler/requirements.txt @@ -1,5 +1,6 @@ -pandas>=1.3.0 -requests>=2.28.0 -tqdm>=4.64.0 -epc-api>=0.1.0 -openpyxl>=3.8.0 +pandas==2.2.2 +numpy<2.0 +requests +tqdm +openpyxl +epc-api-python==1.0.2 \ No newline at end of file From 9506b9f591fa107c8530a12f124adf428439c808 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 9 Feb 2026 14:01:28 +0000 Subject: [PATCH 019/340] lol compeltely skipped lambda --- backend/address2UPRN/handler/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/address2UPRN/handler/Dockerfile b/backend/address2UPRN/handler/Dockerfile index 5ccb5590..c6dc1180 100644 --- a/backend/address2UPRN/handler/Dockerfile +++ b/backend/address2UPRN/handler/Dockerfile @@ -1,4 +1,4 @@ -# FROM public.ecr.aws/lambda/python:3.10 +FROM public.ecr.aws/lambda/python:3.10 # FROM python:3.11.10-bullseye # This is not going to be permenant - but until we solve for env variables in live prod From 455a89aa1a2af649ae8bb235ea641c603bdcfc5e Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 9 Feb 2026 14:27:05 +0000 Subject: [PATCH 020/340] added backend code --- backend/postcode_splitter/handler/Dockerfile | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/backend/postcode_splitter/handler/Dockerfile b/backend/postcode_splitter/handler/Dockerfile index f8196297..ae9056ed 100644 --- a/backend/postcode_splitter/handler/Dockerfile +++ b/backend/postcode_splitter/handler/Dockerfile @@ -9,6 +9,12 @@ RUN pip install --no-cache-dir -r requirements.txt COPY utils/ utils/ COPY backend/postcode_splitter/main.py . + +COPY utils/ utils/ +COPY backend/ backend/ + +COPY backend/__init__.py backend/__init__.py + # # ----------------------------- # # Lambda handler # # ----------------------------- From 11510fbe836cb41197c713862935807404f7ed99 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 9 Feb 2026 15:41:22 +0000 Subject: [PATCH 021/340] added backend code --- backend/postcode_splitter/handler/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/backend/postcode_splitter/handler/Dockerfile b/backend/postcode_splitter/handler/Dockerfile index ae9056ed..72ce3094 100644 --- a/backend/postcode_splitter/handler/Dockerfile +++ b/backend/postcode_splitter/handler/Dockerfile @@ -19,3 +19,4 @@ COPY backend/__init__.py backend/__init__.py # # Lambda handler # # ----------------------------- CMD ["main.handler"] + From dd30d0d2a88eaefbd4aa839a03500cc2763c6585 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 9 Feb 2026 16:15:14 +0000 Subject: [PATCH 022/340] exr Pull remove --- .../modules/lambda_execution_role/main.tf | 30 +++++++++---------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/infrastructure/terraform/modules/lambda_execution_role/main.tf b/infrastructure/terraform/modules/lambda_execution_role/main.tf index fa657afd..af035ebb 100644 --- a/infrastructure/terraform/modules/lambda_execution_role/main.tf +++ b/infrastructure/terraform/modules/lambda_execution_role/main.tf @@ -19,19 +19,19 @@ resource "aws_iam_role_policy_attachment" "basic_logs" { policy_arn = "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole" } -resource "aws_iam_role_policy" "ecr_pull" { - role = aws_iam_role.this.name +# resource "aws_iam_role_policy" "ecr_pull" { +# role = aws_iam_role.this.name - policy = jsonencode({ - Version = "2012-10-17" - Statement = [{ - Effect = "Allow" - Action = [ - "ecr:GetAuthorizationToken", - "ecr:BatchGetImage", - "ecr:GetDownloadUrlForLayer" - ] - Resource = "*" - }] - }) -} +# policy = jsonencode({ +# Version = "2012-10-17" +# Statement = [{ +# Effect = "Allow" +# Action = [ +# "ecr:GetAuthorizationToken", +# "ecr:BatchGetImage", +# "ecr:GetDownloadUrlForLayer" +# ] +# Resource = "*" +# }] +# }) +# } From e1ce16e3cdf00e461b24ca619002e2e6c065c09b Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 9 Feb 2026 16:28:33 +0000 Subject: [PATCH 023/340] polciy --- .../modules/lambda_execution_role/main.tf | 16 ---------------- .../terraform/modules/lambda_sqs_trigger/main.tf | 15 --------------- 2 files changed, 31 deletions(-) diff --git a/infrastructure/terraform/modules/lambda_execution_role/main.tf b/infrastructure/terraform/modules/lambda_execution_role/main.tf index af035ebb..e593b17c 100644 --- a/infrastructure/terraform/modules/lambda_execution_role/main.tf +++ b/infrastructure/terraform/modules/lambda_execution_role/main.tf @@ -19,19 +19,3 @@ resource "aws_iam_role_policy_attachment" "basic_logs" { policy_arn = "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole" } -# resource "aws_iam_role_policy" "ecr_pull" { -# role = aws_iam_role.this.name - -# policy = jsonencode({ -# Version = "2012-10-17" -# Statement = [{ -# Effect = "Allow" -# Action = [ -# "ecr:GetAuthorizationToken", -# "ecr:BatchGetImage", -# "ecr:GetDownloadUrlForLayer" -# ] -# Resource = "*" -# }] -# }) -# } diff --git a/infrastructure/terraform/modules/lambda_sqs_trigger/main.tf b/infrastructure/terraform/modules/lambda_sqs_trigger/main.tf index 5919e10f..0cf9a353 100644 --- a/infrastructure/terraform/modules/lambda_sqs_trigger/main.tf +++ b/infrastructure/terraform/modules/lambda_sqs_trigger/main.tf @@ -5,19 +5,4 @@ resource "aws_lambda_event_source_mapping" "this" { enabled = true } -resource "aws_iam_role_policy" "allow_sqs" { - role = var.lambda_role_name - policy = jsonencode({ - Version = "2012-10-17" - Statement = [{ - Effect = "Allow" - Action = [ - "sqs:ReceiveMessage", - "sqs:DeleteMessage", - "sqs:GetQueueAttributes" - ] - Resource = var.queue_arn - }] - }) -} From 65daf388da8c1f5c877f6f43e8939bee5b7ccc77 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 9 Feb 2026 16:43:46 +0000 Subject: [PATCH 024/340] sqs policy --- .../terraform/modules/lambda_sqs_trigger/main.tf | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/infrastructure/terraform/modules/lambda_sqs_trigger/main.tf b/infrastructure/terraform/modules/lambda_sqs_trigger/main.tf index 0cf9a353..5919e10f 100644 --- a/infrastructure/terraform/modules/lambda_sqs_trigger/main.tf +++ b/infrastructure/terraform/modules/lambda_sqs_trigger/main.tf @@ -5,4 +5,19 @@ resource "aws_lambda_event_source_mapping" "this" { enabled = true } +resource "aws_iam_role_policy" "allow_sqs" { + role = var.lambda_role_name + policy = jsonencode({ + Version = "2012-10-17" + Statement = [{ + Effect = "Allow" + Action = [ + "sqs:ReceiveMessage", + "sqs:DeleteMessage", + "sqs:GetQueueAttributes" + ] + Resource = var.queue_arn + }] + }) +} From b9d31fa6157112525f5b2f482831652ae6f49881 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 9 Feb 2026 18:26:41 +0000 Subject: [PATCH 025/340] sqs policy --- .../terraform/lambda/modules/lambda_with_sqs/outputs.tf | 1 + 1 file changed, 1 insertion(+) diff --git a/infrastructure/terraform/lambda/modules/lambda_with_sqs/outputs.tf b/infrastructure/terraform/lambda/modules/lambda_with_sqs/outputs.tf index afc9246d..b408593f 100644 --- a/infrastructure/terraform/lambda/modules/lambda_with_sqs/outputs.tf +++ b/infrastructure/terraform/lambda/modules/lambda_with_sqs/outputs.tf @@ -9,3 +9,4 @@ output "queue_arn" { output "queue_url" { value = module.queue.queue_url } + From 10c552772b4efff0a04d4ed1556b415633e225f3 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 9 Feb 2026 18:53:49 +0000 Subject: [PATCH 026/340] more useful logs --- backend/postcode_splitter/main.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/backend/postcode_splitter/main.py b/backend/postcode_splitter/main.py index d55f618a..dda1163a 100644 --- a/backend/postcode_splitter/main.py +++ b/backend/postcode_splitter/main.py @@ -119,8 +119,17 @@ def main(): def handler(event, context): - print("hello Postcode splitter world") - return {"statusCode": 200, "body": "hello world"} + print(f"Function: {context.function_name}") + print(f"Function Version: {context.function_version}") + print(f"Log Group: {context.log_group_name}") + print(f"Log Stream: {context.log_stream_name}") + print(f"Request ID: {context.aws_request_id}") + print(f"Memory Limit: {context.memory_limit_in_mb} MB") + print(f"Remaining Time: {context.get_remaining_time_in_millis()} ms") + print(f"Event: {event}") + + print("Postcode splitter handler invoked") + return {"statusCode": 200, "body": "postcode splitter executed"} if __name__ == "__main__": From 79eb81fd94c474e21cd911d704d6bc73dc3f1f54 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 9 Feb 2026 20:28:16 +0000 Subject: [PATCH 027/340] force it to rerun --- backend/postcode_splitter/main.py | 1 + 1 file changed, 1 insertion(+) diff --git a/backend/postcode_splitter/main.py b/backend/postcode_splitter/main.py index dda1163a..da15a48a 100644 --- a/backend/postcode_splitter/main.py +++ b/backend/postcode_splitter/main.py @@ -127,6 +127,7 @@ def handler(event, context): print(f"Memory Limit: {context.memory_limit_in_mb} MB") print(f"Remaining Time: {context.get_remaining_time_in_millis()} ms") print(f"Event: {event}") + print(f"Event: {event}") print("Postcode splitter handler invoked") return {"statusCode": 200, "body": "postcode splitter executed"} From 53ec9c261c807c7b84ac8d16841956a2c3c5d1d5 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 9 Feb 2026 21:26:37 +0000 Subject: [PATCH 028/340] test post code splitter with csv file --- backend/postcode_splitter/main.py | 149 ++++++++++++++++++++++++++++-- 1 file changed, 140 insertions(+), 9 deletions(-) diff --git a/backend/postcode_splitter/main.py b/backend/postcode_splitter/main.py index da15a48a..d5fe3b1b 100644 --- a/backend/postcode_splitter/main.py +++ b/backend/postcode_splitter/main.py @@ -1,12 +1,34 @@ +import json import pandas as pd import requests +from uuid import UUID +from urllib.parse import unquote from backend.address2UPRN.main import ( resolve_uprns_for_postcode_group, get_epc_data_with_postcode, ) +from backend.app.db.functions.tasks.Tasks import SubTaskInterface +from utils.s3 import read_csv_from_s3 as read_csv_from_s3_dict from tqdm import tqdm +def parse_s3_console_url(s3_uri: str) -> tuple[str, str]: + """ + Parse AWS console S3 URL to extract bucket and key. + + Format: https://account-id-hash.region.console.aws.amazon.com/s3/object/bucket?region=...&prefix=path + """ + if "console.aws.amazon.com" in s3_uri and "?prefix=" in s3_uri: + base, query = s3_uri.split("?", 1) + path_parts = base.split("/s3/object/") + if len(path_parts) > 1: + bucket = path_parts[1] + params = dict(item.split("=") for item in query.split("&") if "=" in item) + key = unquote(params.get("prefix", "")) + return bucket, key + raise ValueError(f"Could not parse S3 URI: {s3_uri}") + + def sanitise_postcode(postcode: str) -> str | None: """ Normalise postcode for grouping. @@ -120,17 +142,126 @@ def main(): def handler(event, context): print(f"Function: {context.function_name}") - print(f"Function Version: {context.function_version}") - print(f"Log Group: {context.log_group_name}") - print(f"Log Stream: {context.log_stream_name}") print(f"Request ID: {context.aws_request_id}") - print(f"Memory Limit: {context.memory_limit_in_mb} MB") - print(f"Remaining Time: {context.get_remaining_time_in_millis()} ms") - print(f"Event: {event}") - print(f"Event: {event}") - print("Postcode splitter handler invoked") - return {"statusCode": 200, "body": "postcode splitter executed"} + # Example SQS message for testing (copy and paste into SQS): + # { + # "task_id": "e31f2f21-175b-4a91-a3ec-a6baa325e917", + # "s3_uri": "https://337213553626-7ovirzjr.eu-west-2.console.aws.amazon.com/s3/object/retrofit-data-dev?region=eu-west-2&prefix=ara_raw_inputs/peabody/2025_11_11+-+Peabody+-+Data+Extracts+for+Domna_transformed.csv" + # } + + # Handle both single event and batch events (SQS, etc.) + records = event.get("Records", [event]) + results = [] + errors = [] + subtask_interface = SubTaskInterface() + + for record in records: + task_id = None + subtask_id = None + try: + # Parse body + if isinstance(record.get("body"), str): + body = json.loads(record["body"]) + else: + body = record.get("body", {}) + + # Validate required fields + task_id = body.get("task_id") + s3_uri = body.get("s3_uri") + + if not task_id: + errors.append({"error": "Missing required field: task_id"}) + continue + + if not s3_uri: + errors.append({"error": "Missing required field: s3_uri"}) + continue + + # Convert task_id to UUID + try: + task_id = UUID(task_id) if isinstance(task_id, str) else task_id + except ValueError as e: + errors.append({"error": f"Invalid UUID format for task_id: {str(e)}"}) + continue + + # Create a new subtask for this postcode splitter invocation + subtask_id = subtask_interface.create_subtask( + task_id=task_id, inputs={"s3_uri": s3_uri} + ) + print(f"Created subtask {subtask_id} for task {task_id}") + + # Process normal flow + print(f"Processing task_id: {task_id}") + print(f"Processing s3_uri: {s3_uri}") + + # Read CSV from S3 + print("Reading CSV from S3...") + bucket, key = parse_s3_console_url(s3_uri) + print(f"Parsed S3 - Bucket: {bucket}, Key: {key}") + csv_data = read_csv_from_s3_dict(bucket, key) + df = pd.DataFrame(csv_data) + print(f"CSV loaded: {len(df)} rows, {len(df.columns)} columns") + + # Get head for demo + df_head = df.head() + print("DataFrame head:") + print(df_head) + df_head_dict = df_head.to_dict("records") + + results.append( + { + "message": "Postcode splitter processing started", + "task_id": str(task_id), + "s3_uri": s3_uri, + "subtask_id": str(subtask_id), + } + ) + + # Mark subtask as complete after successful processing + subtask_interface.update_subtask_status( + subtask_id, + "complete", + outputs={ + "status": "processing_complete", + "s3_uri": s3_uri, + "rows_processed": len(df), + }, + ) + print(f"Subtask {subtask_id} marked as complete") + + except json.JSONDecodeError as e: + errors.append({"error": "Invalid JSON in request body", "details": str(e)}) + # Mark subtask as failed if we have one + if subtask_id: + try: + subtask_interface.update_subtask_status( + subtask_id, "failed", outputs={"error": str(e)} + ) + except Exception as db_error: + print(f"Failed to update subtask status: {db_error}") + except Exception as e: + print(f"Unexpected error processing record: {e}") + errors.append({"error": "Unexpected error", "details": str(e)}) + # Mark subtask as failed if we have one + if subtask_id: + try: + subtask_interface.update_subtask_status( + subtask_id, "failed", outputs={"error": str(e)} + ) + except Exception as db_error: + print(f"Failed to update subtask status: {db_error}") + + # Return error if all records failed + if errors and not results: + return {"statusCode": 500, "body": json.dumps({"errors": errors})} + + return { + "statusCode": 200, + "body": json.dumps( + {"processed": results, "errors": errors if errors else None} + ), + } if __name__ == "__main__": From e5cf3a426e3d0b762e95af0984b883eeb6c31972 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 9 Feb 2026 21:32:26 +0000 Subject: [PATCH 029/340] imports --- backend/postcode_splitter/handler/Dockerfile | 18 +++++++++++------- .../postcode_splitter/handler/requirements.txt | 6 +++++- 2 files changed, 16 insertions(+), 8 deletions(-) diff --git a/backend/postcode_splitter/handler/Dockerfile b/backend/postcode_splitter/handler/Dockerfile index 72ce3094..7ddd1e11 100644 --- a/backend/postcode_splitter/handler/Dockerfile +++ b/backend/postcode_splitter/handler/Dockerfile @@ -7,16 +7,20 @@ COPY backend/postcode_splitter/handler/requirements.txt . RUN pip install --no-cache-dir -r requirements.txt -COPY utils/ utils/ -COPY backend/postcode_splitter/main.py . - +# Copy necessary files for database and utility imports COPY utils/ utils/ COPY backend/ backend/ -COPY backend/__init__.py backend/__init__.py +# Copy the handler +COPY backend/postcode_splitter/main.py . -# # ----------------------------- -# # Lambda handler -# # ----------------------------- +# Ensure __init__.py files exist for proper module importing +RUN touch backend/__init__.py +RUN touch backend/app/__init__.py +RUN touch backend/db/__init__.py +RUN touch backend/postcode_splitter/__init__.py +RUN touch utils/__init__.py + +# Lambda handler CMD ["main.handler"] diff --git a/backend/postcode_splitter/handler/requirements.txt b/backend/postcode_splitter/handler/requirements.txt index 8adea4e7..a718b818 100644 --- a/backend/postcode_splitter/handler/requirements.txt +++ b/backend/postcode_splitter/handler/requirements.txt @@ -3,4 +3,8 @@ numpy<2.0 requests tqdm openpyxl -epc-api-python==1.0.2 \ No newline at end of file +epc-api-python==1.0.2 +boto3==1.35.44 +sqlmodel +sqlalchemy==2.0.36 +psycopg2-binary==2.9.10 \ No newline at end of file From e3e024f70c869cc5ef73ee84eea9ba740f111468 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 9 Feb 2026 21:37:02 +0000 Subject: [PATCH 030/340] imports --- backend/postcode_splitter/handler/Dockerfile | 7 ------- 1 file changed, 7 deletions(-) diff --git a/backend/postcode_splitter/handler/Dockerfile b/backend/postcode_splitter/handler/Dockerfile index 7ddd1e11..0ec53108 100644 --- a/backend/postcode_splitter/handler/Dockerfile +++ b/backend/postcode_splitter/handler/Dockerfile @@ -14,13 +14,6 @@ COPY backend/ backend/ # Copy the handler COPY backend/postcode_splitter/main.py . -# Ensure __init__.py files exist for proper module importing -RUN touch backend/__init__.py -RUN touch backend/app/__init__.py -RUN touch backend/db/__init__.py -RUN touch backend/postcode_splitter/__init__.py -RUN touch utils/__init__.py - # Lambda handler CMD ["main.handler"] From c673604ec4b98a1fcae55ef010c236d62a658e5f Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 9 Feb 2026 21:43:03 +0000 Subject: [PATCH 031/340] imports --- backend/postcode_splitter/handler/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/backend/postcode_splitter/handler/Dockerfile b/backend/postcode_splitter/handler/Dockerfile index 0ec53108..13ac309e 100644 --- a/backend/postcode_splitter/handler/Dockerfile +++ b/backend/postcode_splitter/handler/Dockerfile @@ -10,6 +10,7 @@ RUN pip install --no-cache-dir -r requirements.txt # Copy necessary files for database and utility imports COPY utils/ utils/ COPY backend/ backend/ +COPY datatypes/ datatypes/ # Copy the handler COPY backend/postcode_splitter/main.py . From 45026b402fb6004bbbe4d7178f78466d4fb0bdbf Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 9 Feb 2026 21:47:23 +0000 Subject: [PATCH 032/340] pydantic settings --- backend/postcode_splitter/handler/requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/backend/postcode_splitter/handler/requirements.txt b/backend/postcode_splitter/handler/requirements.txt index a718b818..6ef41b2d 100644 --- a/backend/postcode_splitter/handler/requirements.txt +++ b/backend/postcode_splitter/handler/requirements.txt @@ -7,4 +7,5 @@ epc-api-python==1.0.2 boto3==1.35.44 sqlmodel sqlalchemy==2.0.36 -psycopg2-binary==2.9.10 \ No newline at end of file +psycopg2-binary==2.9.10 +pydantic-settings==2.6.0 \ No newline at end of file From 5a995c8443de38b184cfff9ed82bb95fad5b7df0 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 9 Feb 2026 21:57:19 +0000 Subject: [PATCH 033/340] save a random port number --- backend/.env.local | 2 +- backend/postcode_splitter/main.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/backend/.env.local b/backend/.env.local index 22e1db35..9b478e53 100644 --- a/backend/.env.local +++ b/backend/.env.local @@ -30,7 +30,7 @@ GOOGLE_SOLAR_API_KEY="test" DB_HOST="test" DB_PASSWORD="test" DB_USERNAME="test" -DB_PORT="test" +DB_PORT="5432" DB_NAME="test" SAP_PREDICTIONS_BUCKET="test" CARBON_PREDICTIONS_BUCKET="test" diff --git a/backend/postcode_splitter/main.py b/backend/postcode_splitter/main.py index d5fe3b1b..740d1c7d 100644 --- a/backend/postcode_splitter/main.py +++ b/backend/postcode_splitter/main.py @@ -146,8 +146,8 @@ def handler(event, context): # Example SQS message for testing (copy and paste into SQS): # { - # "task_id": "e31f2f21-175b-4a91-a3ec-a6baa325e917", - # "s3_uri": "https://337213553626-7ovirzjr.eu-west-2.console.aws.amazon.com/s3/object/retrofit-data-dev?region=eu-west-2&prefix=ara_raw_inputs/peabody/2025_11_11+-+Peabody+-+Data+Extracts+for+Domna_transformed.csv" + # "task_id": "e31f2f21-175b-4a91-a3ec-a6baa325e917", + # "s3_uri": "https://337213553626-7ovirzjr.eu-west-2.console.aws.amazon.com/s3/object/retrofit-data-dev?region=eu-west-2&prefix=ara_raw_inputs/peabody/2025_11_11+-+Peabody+-+Data+Extracts+for+Domna_transformed.csv", # } # Handle both single event and batch events (SQS, etc.) From 851432b3573bebe56a3b9d9c439710670b9c4d16 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 9 Feb 2026 22:10:27 +0000 Subject: [PATCH 034/340] database things --- .github/workflows/_build_image.yml | 15 ++++----- .github/workflows/deploy_terraform.yml | 4 +++ backend/postcode_splitter/handler/Dockerfile | 8 +++++ .../terraform/lambda/postcodeSplitter/main.tf | 31 ++++++++++++++++--- 4 files changed, 46 insertions(+), 12 deletions(-) diff --git a/.github/workflows/_build_image.yml b/.github/workflows/_build_image.yml index 8b0d74ef..641e31f9 100644 --- a/.github/workflows/_build_image.yml +++ b/.github/workflows/_build_image.yml @@ -34,14 +34,19 @@ on: required: true DEV_DB_HOST: required: false - REAL_DB_HOST: + DEV_DB_PORT: + required: false + DEV_DB_NAME: required: false jobs: build: runs-on: ubuntu-latest + env: DEV_DB_HOST: ${{ secrets.DEV_DB_HOST }} + DEV_DB_PORT: ${{ secrets.DEV_DB_PORT }} + DEV_DB_NAME: ${{ secrets.DEV_DB_NAME }} outputs: image_digest: ${{ steps.digest.outputs.image_digest }} @@ -82,11 +87,7 @@ jobs: temp=$(eval echo "$line") BUILD_ARGS="$BUILD_ARGS --build-arg $temp" done <<< "${{ inputs.build_args }}" - - echo "dev db host: $DEV_DB_HOST" - echo "real db host: $REAL_DB_HOST" - echo "aws_key_id: $AWS_ACCESS_KEY_ID" - + docker build \ -f ${{ inputs.dockerfile_path }} \ $BUILD_ARGS \ @@ -103,4 +104,4 @@ jobs: --image-ids imageTag=${GITHUB_SHA} \ --query 'imageDetails[0].imageDigest' \ --output text) - echo "image_digest=$DIGEST" >> "$GITHUB_OUTPUT" + echo "image_digest=$DIGEST" >> "$GITHUB_OUTPUT" \ No newline at end of file diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index 1356b341..ab42d4b9 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -116,6 +116,10 @@ jobs: ecr_repo: postcode_splitter-${{ needs.determine_stage.outputs.stage }} dockerfile_path: backend/postcode_splitter/handler/Dockerfile build_context: . + build_args: | + DEV_DB_HOST=$DEV_DB_HOST + DEV_DB_PORT=$DEV_DB_PORT + DEV_DB_NAME=$DEV_DB_NAME secrets: AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} diff --git a/backend/postcode_splitter/handler/Dockerfile b/backend/postcode_splitter/handler/Dockerfile index 13ac309e..74c00b9f 100644 --- a/backend/postcode_splitter/handler/Dockerfile +++ b/backend/postcode_splitter/handler/Dockerfile @@ -1,5 +1,13 @@ FROM public.ecr.aws/lambda/python:3.11 +ARG DEV_DB_HOST +ARG DEV_DB_PORT +ARG DEV_DB_NAME + +ENV DB_HOST=${DEV_DB_HOST} +ENV DB_PORT=${DEV_DB_PORT} +ENV DB_NAME=${DEV_DB_NAME} + # Set working directory (Lambda task root) WORKDIR /var/task diff --git a/infrastructure/terraform/lambda/postcodeSplitter/main.tf b/infrastructure/terraform/lambda/postcodeSplitter/main.tf index ebbdbfdc..7ba4506c 100644 --- a/infrastructure/terraform/lambda/postcodeSplitter/main.tf +++ b/infrastructure/terraform/lambda/postcodeSplitter/main.tf @@ -1,3 +1,20 @@ +data "terraform_remote_state" "shared" { + backend = "s3" + config = { + bucket = "assessment-model-terraform-state" + key = "env:/${var.stage}/terraform.tfstate" # TODO: dont hardcode this + region = "eu-west-2" + } +} +data "aws_secretsmanager_secret_version" "db_credentials" { + secret_id = "${var.stage}/assessment_model/db_credentials" +} + + +locals { + db_credentials = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string) +} + module "lambda" { source = "../modules/lambda_with_sqs" @@ -7,8 +24,12 @@ module "lambda" { image_uri = local.image_uri - environment = { - STAGE = var.stage - LOG_LEVEL = "info" - } -} + environment = merge( + { + STAGE = var.stage + LOG_LEVEL = "info" + DB_USERNAME = local.db_credentials.db_assessment_model_username + DB_PASSWORD = local.db_credentials.db_assessment_model_password + }, + ) +} \ No newline at end of file From 091edfdd3a9c93cbea5c55e767d7dd23a65adcec Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 9 Feb 2026 22:12:11 +0000 Subject: [PATCH 035/340] database things --- .github/workflows/deploy_terraform.yml | 2 -- backend/condition/handler/Dockerfile | 2 -- backend/condition/handler/handler.py | 4 ---- 3 files changed, 8 deletions(-) diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index ab42d4b9..9a9b4421 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -157,7 +157,6 @@ jobs: build_args: | JUNTE=best DEV_DB_HOST=$DEV_DB_HOST - REAL_DB_HOST=$REAL_DB_HOST AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID AWS_REGION=$AWS_REGION secrets: @@ -165,7 +164,6 @@ jobs: AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} AWS_REGION: ${{ secrets.DEV_AWS_REGION }} DEV_DB_HOST: ${{ secrets.DEV_DB_HOST }} - REAL_DB_HOST: ${{ secrets.dev_DB_HOST }} # ============================================================ # Deploy Condition ETL Lambda diff --git a/backend/condition/handler/Dockerfile b/backend/condition/handler/Dockerfile index 5cb95532..8759dff3 100644 --- a/backend/condition/handler/Dockerfile +++ b/backend/condition/handler/Dockerfile @@ -12,8 +12,6 @@ ENV JUNTE=${JUNTE} ARG DEV_DB_HOST ENV DEV_DB_HOST=${DEV_DB_HOST} -ARG REAL_DB_HOST -ENV REAL_DB_HOST=${REAL_DB_HOST} ARG AWS_ACCESS_KEY_ID ENV AWS_ACCESS_KEY_ID=${AWS_ACCESS_KEY_ID} diff --git a/backend/condition/handler/handler.py b/backend/condition/handler/handler.py index 21fa6928..0f8dd940 100644 --- a/backend/condition/handler/handler.py +++ b/backend/condition/handler/handler.py @@ -23,10 +23,6 @@ def handler(event: Mapping[str, Any], context: Any) -> None: "hello DEV DB HOST:", os.getenv("DEV_DB_HOST", "empty db"), ) - print( - "hello REAL DB HOST:", - os.getenv("REAL_DB_HOST", "empty db"), - ) print( "hello access key", os.getenv("AWS_ACCESS_KEY_ID", "empty key"), From 72df7fbb745294f38f622f9b297c16bd9ae6b8b6 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 9 Feb 2026 22:13:10 +0000 Subject: [PATCH 036/340] database things --- .github/workflows/deploy_terraform.yml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index 9a9b4421..b9fc533e 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -154,16 +154,10 @@ jobs: ecr_repo: condition-etl-${{ needs.determine_stage.outputs.stage }} dockerfile_path: backend/condition/handler/Dockerfile build_context: . - build_args: | - JUNTE=best - DEV_DB_HOST=$DEV_DB_HOST - AWS_ACCESS_KEY_ID=$AWS_ACCESS_KEY_ID - AWS_REGION=$AWS_REGION secrets: AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} AWS_REGION: ${{ secrets.DEV_AWS_REGION }} - DEV_DB_HOST: ${{ secrets.DEV_DB_HOST }} # ============================================================ # Deploy Condition ETL Lambda From 68ddced1af7f9b18d6e93215cc0d128b1b9c72f4 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 9 Feb 2026 22:21:58 +0000 Subject: [PATCH 037/340] pass in secrets --- .github/workflows/deploy_terraform.yml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index b9fc533e..c863f6f1 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -124,6 +124,9 @@ jobs: AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} AWS_REGION: ${{ secrets.DEV_AWS_REGION }} + DEV_DB_HOST: ${{ secrets.DEV_DB_HOST }} + DEV_DB_PORT: ${{ secrets.DEV_DB_PORT }} + DEV_DB_NAME: ${{ secrets.DEV_DB_NAME }} # ============================================================ # 3️⃣ Deploy Postcode Splitter Lambda From c56789a5023816fdd4e7831a2494b1316cdf550b Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 9 Feb 2026 22:31:04 +0000 Subject: [PATCH 038/340] show me secrets --- backend/postcode_splitter/main.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/backend/postcode_splitter/main.py b/backend/postcode_splitter/main.py index 740d1c7d..d51866a4 100644 --- a/backend/postcode_splitter/main.py +++ b/backend/postcode_splitter/main.py @@ -1,3 +1,12 @@ +import os +import sys +print("=" * 60) +print("ENVIRONMENT AT STARTUP:") +print("=" * 60) +for k, v in sorted(os.environ.items()): + print(f"{k}={v}") +print("=" * 60) + import json import pandas as pd import requests From 477ebcef6705738f11fad88d8016db475e3a0155 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 9 Feb 2026 22:40:08 +0000 Subject: [PATCH 039/340] add more logging --- backend/postcode_splitter/main.py | 39 +++++++++++++++++++++---------- 1 file changed, 27 insertions(+), 12 deletions(-) diff --git a/backend/postcode_splitter/main.py b/backend/postcode_splitter/main.py index d51866a4..14610171 100644 --- a/backend/postcode_splitter/main.py +++ b/backend/postcode_splitter/main.py @@ -7,18 +7,33 @@ for k, v in sorted(os.environ.items()): print(f"{k}={v}") print("=" * 60) -import json -import pandas as pd -import requests -from uuid import UUID -from urllib.parse import unquote -from backend.address2UPRN.main import ( - resolve_uprns_for_postcode_group, - get_epc_data_with_postcode, -) -from backend.app.db.functions.tasks.Tasks import SubTaskInterface -from utils.s3 import read_csv_from_s3 as read_csv_from_s3_dict -from tqdm import tqdm +try: + import json + print("✓ json imported") + import pandas as pd + print("✓ pandas imported") + import requests + print("✓ requests imported") + from uuid import UUID + print("✓ UUID imported") + from urllib.parse import unquote + print("✓ urllib.parse imported") + from utils.s3 import read_csv_from_s3 as read_csv_from_s3_dict + print("✓ utils.s3 imported") + from tqdm import tqdm + print("✓ tqdm imported") + from backend.address2UPRN.main import ( + resolve_uprns_for_postcode_group, + get_epc_data_with_postcode, + ) + print("✓ backend.address2UPRN imported") + from backend.app.db.functions.tasks.Tasks import SubTaskInterface + print("✓ SubTaskInterface imported") +except Exception as e: + print(f"✗ IMPORT ERROR: {type(e).__name__}: {e}") + import traceback + traceback.print_exc() + raise def parse_s3_console_url(s3_uri: str) -> tuple[str, str]: From dd8a490210252f5b2c0c8de893c9cb7ab109663e Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 9 Feb 2026 22:57:23 +0000 Subject: [PATCH 040/340] lets do subtasks first --- backend/address2UPRN/main.py | 7 ++----- backend/postcode_splitter/main.py | 15 +++++++++++++-- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/backend/address2UPRN/main.py b/backend/address2UPRN/main.py index 1b3a6c8a..293ce3d9 100644 --- a/backend/address2UPRN/main.py +++ b/backend/address2UPRN/main.py @@ -5,10 +5,11 @@ import pandas as pd from difflib import SequenceMatcher from tqdm import tqdm from utils.logger import setup_logger +import re +from typing import Set logger = setup_logger() -import re EPC_AUTH_TOKEN = os.getenv( "EPC_AUTH_TOKEN", @@ -18,10 +19,6 @@ EPC_AUTH_TOKEN = os.getenv( if EPC_AUTH_TOKEN is None: raise RuntimeError("EPC_AUTH_TOKEN not defined in env") -import re -from difflib import SequenceMatcher -from typing import Set - def levenshtein(a: str, b: str) -> float: """ diff --git a/backend/postcode_splitter/main.py b/backend/postcode_splitter/main.py index 14610171..e3a8c438 100644 --- a/backend/postcode_splitter/main.py +++ b/backend/postcode_splitter/main.py @@ -1,5 +1,6 @@ import os import sys + print("=" * 60) print("ENVIRONMENT AT STARTUP:") print("=" * 60) @@ -9,29 +10,39 @@ print("=" * 60) try: import json + print("✓ json imported") import pandas as pd + print("✓ pandas imported") import requests + print("✓ requests imported") from uuid import UUID + print("✓ UUID imported") from urllib.parse import unquote + print("✓ urllib.parse imported") from utils.s3 import read_csv_from_s3 as read_csv_from_s3_dict + print("✓ utils.s3 imported") from tqdm import tqdm + print("✓ tqdm imported") + from backend.app.db.functions.tasks.Tasks import SubTaskInterface + + print("✓ SubTaskInterface imported") from backend.address2UPRN.main import ( resolve_uprns_for_postcode_group, get_epc_data_with_postcode, ) + print("✓ backend.address2UPRN imported") - from backend.app.db.functions.tasks.Tasks import SubTaskInterface - print("✓ SubTaskInterface imported") except Exception as e: print(f"✗ IMPORT ERROR: {type(e).__name__}: {e}") import traceback + traceback.print_exc() raise From 1a0d463e2eeeb4c4d85a84a8e7cdaae74fc4d006 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 9 Feb 2026 23:07:51 +0000 Subject: [PATCH 041/340] missing init.py --- backend/app/db/functions/tasks/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 backend/app/db/functions/tasks/__init__.py diff --git a/backend/app/db/functions/tasks/__init__.py b/backend/app/db/functions/tasks/__init__.py new file mode 100644 index 00000000..e69de29b From c0efa07d2a415697ae96ec41415c1d9152f7abb7 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 9 Feb 2026 23:15:53 +0000 Subject: [PATCH 042/340] handler remap --- backend/postcode_splitter/handler/Dockerfile | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/backend/postcode_splitter/handler/Dockerfile b/backend/postcode_splitter/handler/Dockerfile index 74c00b9f..ad0d1d69 100644 --- a/backend/postcode_splitter/handler/Dockerfile +++ b/backend/postcode_splitter/handler/Dockerfile @@ -20,9 +20,6 @@ COPY utils/ utils/ COPY backend/ backend/ COPY datatypes/ datatypes/ -# Copy the handler -COPY backend/postcode_splitter/main.py . - # Lambda handler -CMD ["main.handler"] +CMD ["backend.postcode_splitter.main.handler"] From f5981e91474e88d072479b82b0d1060a61e438fc Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 9 Feb 2026 23:22:55 +0000 Subject: [PATCH 043/340] imports are working now? --- backend/postcode_splitter/handler/Dockerfile | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/backend/postcode_splitter/handler/Dockerfile b/backend/postcode_splitter/handler/Dockerfile index ad0d1d69..74c00b9f 100644 --- a/backend/postcode_splitter/handler/Dockerfile +++ b/backend/postcode_splitter/handler/Dockerfile @@ -20,6 +20,9 @@ COPY utils/ utils/ COPY backend/ backend/ COPY datatypes/ datatypes/ -# Lambda handler -CMD ["backend.postcode_splitter.main.handler"] +# Copy the handler +COPY backend/postcode_splitter/main.py . + +# Lambda handler +CMD ["main.handler"] From 8325bb53cf188274a8a2a3c92714601b8b50b288 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 9 Feb 2026 23:25:52 +0000 Subject: [PATCH 044/340] added more logs --- backend/postcode_splitter/main.py | 32 ++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/backend/postcode_splitter/main.py b/backend/postcode_splitter/main.py index e3a8c438..282e432a 100644 --- a/backend/postcode_splitter/main.py +++ b/backend/postcode_splitter/main.py @@ -176,8 +176,13 @@ def main(): def handler(event, context): + print("=" * 60) + print("HANDLER INVOKED") + print("=" * 60) print(f"Function: {context.function_name}") print(f"Request ID: {context.aws_request_id}") + print(f"Event received: {type(event)}") + print(f"Event keys: {event.keys() if isinstance(event, dict) else 'N/A'}") # Example SQS message for testing (copy and paste into SQS): # { @@ -186,24 +191,33 @@ def handler(event, context): # } # Handle both single event and batch events (SQS, etc.) + print("Extracting records from event...") records = event.get("Records", [event]) + print(f"Found {len(records)} record(s) to process") results = [] errors = [] + + print("Initializing SubTaskInterface...") subtask_interface = SubTaskInterface() + print("✓ SubTaskInterface initialized") for record in records: + print("Processing record...") task_id = None subtask_id = None try: # Parse body + print("Parsing body from record...") if isinstance(record.get("body"), str): body = json.loads(record["body"]) else: body = record.get("body", {}) + print(f"Body parsed: {body}") # Validate required fields task_id = body.get("task_id") s3_uri = body.get("s3_uri") + print(f"task_id: {task_id}, s3_uri: {s3_uri}") if not task_id: errors.append({"error": "Missing required field: task_id"}) @@ -214,13 +228,16 @@ def handler(event, context): continue # Convert task_id to UUID + print("Converting task_id to UUID...") try: task_id = UUID(task_id) if isinstance(task_id, str) else task_id + print(f"UUID conversion successful: {task_id}") except ValueError as e: errors.append({"error": f"Invalid UUID format for task_id: {str(e)}"}) continue # Create a new subtask for this postcode splitter invocation + print(f"Creating subtask for task {task_id}...") subtask_id = subtask_interface.create_subtask( task_id=task_id, inputs={"s3_uri": s3_uri} ) @@ -231,19 +248,26 @@ def handler(event, context): print(f"Processing s3_uri: {s3_uri}") # Read CSV from S3 - print("Reading CSV from S3...") + print("Parsing S3 URI...") bucket, key = parse_s3_console_url(s3_uri) - print(f"Parsed S3 - Bucket: {bucket}, Key: {key}") + print(f"Bucket: {bucket}, Key: {key}") + + print("Fetching CSV from S3...") csv_data = read_csv_from_s3_dict(bucket, key) + print(f"CSV fetched: {len(csv_data)} rows") + + print("Creating DataFrame...") df = pd.DataFrame(csv_data) - print(f"CSV loaded: {len(df)} rows, {len(df.columns)} columns") + print(f"DataFrame created: {len(df)} rows, {len(df.columns)} columns") # Get head for demo + print("Getting DataFrame head...") df_head = df.head() print("DataFrame head:") print(df_head) df_head_dict = df_head.to_dict("records") + print("Appending result...") results.append( { "message": "Postcode splitter processing started", @@ -252,8 +276,10 @@ def handler(event, context): "subtask_id": str(subtask_id), } ) + print("Result appended") # Mark subtask as complete after successful processing + print("Updating subtask status to complete...") subtask_interface.update_subtask_status( subtask_id, "complete", From 94524379e480ca885cbbab4270578bbd977cbe00 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 9 Feb 2026 23:34:02 +0000 Subject: [PATCH 045/340] even more logs --- backend/postcode_splitter/main.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/backend/postcode_splitter/main.py b/backend/postcode_splitter/main.py index 282e432a..8210bf78 100644 --- a/backend/postcode_splitter/main.py +++ b/backend/postcode_splitter/main.py @@ -203,14 +203,21 @@ def handler(event, context): for record in records: print("Processing record...") + print(f"Record type: {type(record)}") + print(f"Record: {record}") task_id = None subtask_id = None try: # Parse body print("Parsing body from record...") + print(f"record.get('body'): {record.get('body')}") + print(f"isinstance(record.get('body'), str): {isinstance(record.get('body'), str)}") + if isinstance(record.get("body"), str): + print("Body is string, parsing JSON...") body = json.loads(record["body"]) else: + print("Body is not string, using directly...") body = record.get("body", {}) print(f"Body parsed: {body}") From 8121e6d5b67d87b8e60b5f28a6a03edae2d7e465 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Tue, 10 Feb 2026 07:53:54 +0000 Subject: [PATCH 046/340] more logs for s3 --- backend/postcode_splitter/main.py | 146 +++++++++++------------------- 1 file changed, 53 insertions(+), 93 deletions(-) diff --git a/backend/postcode_splitter/main.py b/backend/postcode_splitter/main.py index 8210bf78..1d0e56a0 100644 --- a/backend/postcode_splitter/main.py +++ b/backend/postcode_splitter/main.py @@ -1,50 +1,20 @@ import os import sys +import json +import pandas as pd +import requests +from uuid import UUID +from urllib.parse import unquote +from utils.s3 import read_csv_from_s3 as read_csv_from_s3_dict +from utils.logger import setup_logger +from tqdm import tqdm +from backend.app.db.functions.tasks.Tasks import SubTaskInterface +from backend.address2UPRN.main import ( + resolve_uprns_for_postcode_group, + get_epc_data_with_postcode, +) -print("=" * 60) -print("ENVIRONMENT AT STARTUP:") -print("=" * 60) -for k, v in sorted(os.environ.items()): - print(f"{k}={v}") -print("=" * 60) - -try: - import json - - print("✓ json imported") - import pandas as pd - - print("✓ pandas imported") - import requests - - print("✓ requests imported") - from uuid import UUID - - print("✓ UUID imported") - from urllib.parse import unquote - - print("✓ urllib.parse imported") - from utils.s3 import read_csv_from_s3 as read_csv_from_s3_dict - - print("✓ utils.s3 imported") - from tqdm import tqdm - - print("✓ tqdm imported") - from backend.app.db.functions.tasks.Tasks import SubTaskInterface - - print("✓ SubTaskInterface imported") - from backend.address2UPRN.main import ( - resolve_uprns_for_postcode_group, - get_epc_data_with_postcode, - ) - - print("✓ backend.address2UPRN imported") -except Exception as e: - print(f"✗ IMPORT ERROR: {type(e).__name__}: {e}") - import traceback - - traceback.print_exc() - raise +logger = setup_logger() def parse_s3_console_url(s3_uri: str) -> tuple[str, str]: @@ -53,15 +23,41 @@ def parse_s3_console_url(s3_uri: str) -> tuple[str, str]: Format: https://account-id-hash.region.console.aws.amazon.com/s3/object/bucket?region=...&prefix=path """ - if "console.aws.amazon.com" in s3_uri and "?prefix=" in s3_uri: + logger.info(f"Parsing S3 URI: {s3_uri}") + + if "console.aws.amazon.com" not in s3_uri: + logger.error("URI does not contain 'console.aws.amazon.com'") + raise ValueError(f"Could not parse S3 URI: {s3_uri}") + + if "?prefix=" not in s3_uri: + logger.error("URI does not contain '?prefix='") + raise ValueError(f"Could not parse S3 URI: {s3_uri}") + + try: base, query = s3_uri.split("?", 1) + logger.debug(f"Base: {base}") + logger.debug(f"Query: {query}") + path_parts = base.split("/s3/object/") + logger.debug(f"Path parts: {path_parts}") + if len(path_parts) > 1: bucket = path_parts[1] + logger.info(f"Extracted bucket: {bucket}") + params = dict(item.split("=") for item in query.split("&") if "=" in item) + logger.debug(f"Query params: {params}") + key = unquote(params.get("prefix", "")) + logger.info(f"Extracted key: {key}") + return bucket, key - raise ValueError(f"Could not parse S3 URI: {s3_uri}") + else: + logger.error(f"Could not find '/s3/object/' in URI") + raise ValueError(f"Could not parse S3 URI: {s3_uri}") + except Exception as e: + logger.error(f"Error parsing S3 URI: {type(e).__name__}: {e}") + raise ValueError(f"Could not parse S3 URI: {s3_uri}") from e def sanitise_postcode(postcode: str) -> str | None: @@ -176,13 +172,8 @@ def main(): def handler(event, context): - print("=" * 60) - print("HANDLER INVOKED") - print("=" * 60) print(f"Function: {context.function_name}") print(f"Request ID: {context.aws_request_id}") - print(f"Event received: {type(event)}") - print(f"Event keys: {event.keys() if isinstance(event, dict) else 'N/A'}") # Example SQS message for testing (copy and paste into SQS): # { @@ -191,40 +182,24 @@ def handler(event, context): # } # Handle both single event and batch events (SQS, etc.) - print("Extracting records from event...") records = event.get("Records", [event]) - print(f"Found {len(records)} record(s) to process") results = [] errors = [] - - print("Initializing SubTaskInterface...") subtask_interface = SubTaskInterface() - print("✓ SubTaskInterface initialized") for record in records: - print("Processing record...") - print(f"Record type: {type(record)}") - print(f"Record: {record}") task_id = None subtask_id = None try: # Parse body - print("Parsing body from record...") - print(f"record.get('body'): {record.get('body')}") - print(f"isinstance(record.get('body'), str): {isinstance(record.get('body'), str)}") - if isinstance(record.get("body"), str): - print("Body is string, parsing JSON...") body = json.loads(record["body"]) else: - print("Body is not string, using directly...") body = record.get("body", {}) - print(f"Body parsed: {body}") # Validate required fields task_id = body.get("task_id") s3_uri = body.get("s3_uri") - print(f"task_id: {task_id}, s3_uri: {s3_uri}") if not task_id: errors.append({"error": "Missing required field: task_id"}) @@ -235,46 +210,32 @@ def handler(event, context): continue # Convert task_id to UUID - print("Converting task_id to UUID...") try: task_id = UUID(task_id) if isinstance(task_id, str) else task_id - print(f"UUID conversion successful: {task_id}") except ValueError as e: errors.append({"error": f"Invalid UUID format for task_id: {str(e)}"}) continue # Create a new subtask for this postcode splitter invocation - print(f"Creating subtask for task {task_id}...") subtask_id = subtask_interface.create_subtask( task_id=task_id, inputs={"s3_uri": s3_uri} ) - print(f"Created subtask {subtask_id} for task {task_id}") - - # Process normal flow - print(f"Processing task_id: {task_id}") - print(f"Processing s3_uri: {s3_uri}") + logger.info(f"Created subtask {subtask_id} for task {task_id}") # Read CSV from S3 - print("Parsing S3 URI...") + logger.info(f"Processing S3 URI: {s3_uri}") bucket, key = parse_s3_console_url(s3_uri) - print(f"Bucket: {bucket}, Key: {key}") + logger.info(f"S3 Bucket: {bucket}, Key: {key}") - print("Fetching CSV from S3...") csv_data = read_csv_from_s3_dict(bucket, key) - print(f"CSV fetched: {len(csv_data)} rows") - - print("Creating DataFrame...") df = pd.DataFrame(csv_data) - print(f"DataFrame created: {len(df)} rows, {len(df.columns)} columns") + logger.info(f"CSV loaded: {len(df)} rows, {len(df.columns)} columns") # Get head for demo - print("Getting DataFrame head...") df_head = df.head() - print("DataFrame head:") - print(df_head) - df_head_dict = df_head.to_dict("records") + logger.info("DataFrame head:") + logger.info(f"\n{df_head}") - print("Appending result...") results.append( { "message": "Postcode splitter processing started", @@ -283,10 +244,8 @@ def handler(event, context): "subtask_id": str(subtask_id), } ) - print("Result appended") # Mark subtask as complete after successful processing - print("Updating subtask status to complete...") subtask_interface.update_subtask_status( subtask_id, "complete", @@ -296,9 +255,10 @@ def handler(event, context): "rows_processed": len(df), }, ) - print(f"Subtask {subtask_id} marked as complete") + logger.info(f"Subtask {subtask_id} marked as complete") except json.JSONDecodeError as e: + logger.error(f"Invalid JSON in request body: {e}") errors.append({"error": "Invalid JSON in request body", "details": str(e)}) # Mark subtask as failed if we have one if subtask_id: @@ -307,9 +267,9 @@ def handler(event, context): subtask_id, "failed", outputs={"error": str(e)} ) except Exception as db_error: - print(f"Failed to update subtask status: {db_error}") + logger.error(f"Failed to update subtask status: {db_error}") except Exception as e: - print(f"Unexpected error processing record: {e}") + logger.error(f"Unexpected error processing record: {e}", exc_info=True) errors.append({"error": "Unexpected error", "details": str(e)}) # Mark subtask as failed if we have one if subtask_id: @@ -318,7 +278,7 @@ def handler(event, context): subtask_id, "failed", outputs={"error": str(e)} ) except Exception as db_error: - print(f"Failed to update subtask status: {db_error}") + logger.error(f"Failed to update subtask status: {db_error}") # Return error if all records failed if errors and not results: From a94e5ca592fd1e83d320bc2d8ae0bf2c34996282 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Tue, 10 Feb 2026 08:04:57 +0000 Subject: [PATCH 047/340] s3 url processing --- backend/postcode_splitter/main.py | 43 ++++++++++++------------------- 1 file changed, 17 insertions(+), 26 deletions(-) diff --git a/backend/postcode_splitter/main.py b/backend/postcode_splitter/main.py index 1d0e56a0..adb8e5c9 100644 --- a/backend/postcode_splitter/main.py +++ b/backend/postcode_splitter/main.py @@ -23,41 +23,32 @@ def parse_s3_console_url(s3_uri: str) -> tuple[str, str]: Format: https://account-id-hash.region.console.aws.amazon.com/s3/object/bucket?region=...&prefix=path """ - logger.info(f"Parsing S3 URI: {s3_uri}") - - if "console.aws.amazon.com" not in s3_uri: - logger.error("URI does not contain 'console.aws.amazon.com'") - raise ValueError(f"Could not parse S3 URI: {s3_uri}") - - if "?prefix=" not in s3_uri: - logger.error("URI does not contain '?prefix='") - raise ValueError(f"Could not parse S3 URI: {s3_uri}") + logger.info("Parsing S3 console URL") try: + # Split base URL and query string + if "?" not in s3_uri: + raise ValueError("No query string found") + base, query = s3_uri.split("?", 1) - logger.debug(f"Base: {base}") - logger.debug(f"Query: {query}") + + # Extract bucket from base URL + if "/s3/object/" not in base: + raise ValueError("No '/s3/object/' found in URL path") path_parts = base.split("/s3/object/") - logger.debug(f"Path parts: {path_parts}") + bucket = path_parts[1] + logger.info(f"Extracted bucket: {bucket}") - if len(path_parts) > 1: - bucket = path_parts[1] - logger.info(f"Extracted bucket: {bucket}") + # Extract prefix from query parameters + params = dict(item.split("=") for item in query.split("&") if "=" in item) + key = unquote(params.get("prefix", "")) + logger.info(f"Extracted key: {key}") - params = dict(item.split("=") for item in query.split("&") if "=" in item) - logger.debug(f"Query params: {params}") - - key = unquote(params.get("prefix", "")) - logger.info(f"Extracted key: {key}") - - return bucket, key - else: - logger.error(f"Could not find '/s3/object/' in URI") - raise ValueError(f"Could not parse S3 URI: {s3_uri}") + return bucket, key except Exception as e: logger.error(f"Error parsing S3 URI: {type(e).__name__}: {e}") - raise ValueError(f"Could not parse S3 URI: {s3_uri}") from e + raise ValueError(f"Could not parse S3 URI") from e def sanitise_postcode(postcode: str) -> str | None: From 507ecfb8a14e7af0945e6609a08d652a89b0320b Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Tue, 10 Feb 2026 15:49:04 +0000 Subject: [PATCH 048/340] terrform files --- .../terraform/lambda/_template/main.tf | 49 ++++++++++++++++ .../terraform/lambda/postcodeSplitter/main.tf | 6 ++ .../terraform/modules/s3_iam_policy/main.tf | 29 ++++++++++ .../modules/s3_iam_policy/outputs.tf | 14 +++++ .../modules/s3_iam_policy/variables.tf | 39 +++++++++++++ infrastructure/terraform/shared/main.tf | 57 +++++++++++-------- 6 files changed, 170 insertions(+), 24 deletions(-) create mode 100644 infrastructure/terraform/modules/s3_iam_policy/main.tf create mode 100644 infrastructure/terraform/modules/s3_iam_policy/outputs.tf create mode 100644 infrastructure/terraform/modules/s3_iam_policy/variables.tf diff --git a/infrastructure/terraform/lambda/_template/main.tf b/infrastructure/terraform/lambda/_template/main.tf index 3010aa8a..2b767ce1 100644 --- a/infrastructure/terraform/lambda/_template/main.tf +++ b/infrastructure/terraform/lambda/_template/main.tf @@ -1,3 +1,30 @@ +# ============================================================================== +# TEMPLATE: Lambda Configuration with Optional S3 IAM Policy +# ============================================================================== +# Instructions: +# 1. Replace "REPLACE ME" with your lambda name (e.g., "my-lambda-name") +# 2. Add any additional environment variables as needed +# 3. To attach S3 IAM policies from shared state: +# - Uncomment the S3 policy attachment section below +# - Update the policy_arn to match the output from shared/main.tf +# - Available shared outputs (examples): +# - data.terraform_remote_state.shared.outputs.condition_etl_s3_read_arn +# - data.terraform_remote_state.shared.outputs.postcode_splitter_s3_read_arn +# 4. To create a NEW S3 policy: +# - Add a new module "lambda_s3_policy" in shared/main.tf using the +# s3_iam_policy module (see examples in shared/main.tf) +# - Then reference it here using data.terraform_remote_state.shared.outputs +# ============================================================================== + +data "terraform_remote_state" "shared" { + backend = "s3" + config = { + bucket = "assessment-model-terraform-state" + key = "env:/${var.stage}/terraform.tfstate" + region = "eu-west-2" + } +} + module "lambda" { source = "../modules/lambda_with_sqs" @@ -12,3 +39,25 @@ module "lambda" { LOG_LEVEL = "info" } } + +# ====================================================================== +# OPTIONAL: Attach S3 IAM policy to Lambda execution role +# ====================================================================== +# Uncomment and configure the resource below to attach S3 permissions +# +# Example 1: Attach existing policy from shared state +# resource "aws_iam_role_policy_attachment" "lambda_s3_policy" { +# role = module.lambda.lambda_role_name +# policy_arn = data.terraform_remote_state.shared.outputs.YOUR_POLICY_OUTPUT_NAME_arn +# } +# +# Example 2: Attach multiple policies +# resource "aws_iam_role_policy_attachment" "lambda_read_policy" { +# role = module.lambda.lambda_role_name +# policy_arn = data.terraform_remote_state.shared.outputs.postcode_splitter_s3_read_arn +# } +# +# resource "aws_iam_role_policy_attachment" "lambda_write_policy" { +# role = module.lambda.lambda_role_name +# policy_arn = data.terraform_remote_state.shared.outputs.another_policy_arn +# } diff --git a/infrastructure/terraform/lambda/postcodeSplitter/main.tf b/infrastructure/terraform/lambda/postcodeSplitter/main.tf index 7ba4506c..9bbd1b26 100644 --- a/infrastructure/terraform/lambda/postcodeSplitter/main.tf +++ b/infrastructure/terraform/lambda/postcodeSplitter/main.tf @@ -32,4 +32,10 @@ module "lambda" { DB_PASSWORD = local.db_credentials.db_assessment_model_password }, ) +} + +# Attach S3 read policy to the Lambda execution role +resource "aws_iam_role_policy_attachment" "postcode_splitter_s3_read" { + role = module.lambda.lambda_role_name + policy_arn = data.terraform_remote_state.shared.outputs.postcode_splitter_s3_read_arn } \ No newline at end of file diff --git a/infrastructure/terraform/modules/s3_iam_policy/main.tf b/infrastructure/terraform/modules/s3_iam_policy/main.tf new file mode 100644 index 00000000..e4e1e2f9 --- /dev/null +++ b/infrastructure/terraform/modules/s3_iam_policy/main.tf @@ -0,0 +1,29 @@ +# Dynamically build S3 resources list from bucket ARNs and resource paths +locals { + # Generate full resource ARNs by combining bucket ARNs with resource paths + resources = flatten([ + for bucket_arn in var.bucket_arns : [ + for path in var.resource_paths : "${bucket_arn}${path}" + ] + ]) +} + +# IAM Policy with dynamic actions and resources +resource "aws_iam_policy" "s3_policy" { + name = var.policy_name + description = var.policy_description + + policy = jsonencode({ + Version = "2012-10-17" + Statement = [ + { + Effect = "Allow" + Action = var.actions + Resource = local.resources + Condition = var.conditions != null ? var.conditions : null + } + ] + }) + + tags = var.tags +} diff --git a/infrastructure/terraform/modules/s3_iam_policy/outputs.tf b/infrastructure/terraform/modules/s3_iam_policy/outputs.tf new file mode 100644 index 00000000..85defd9c --- /dev/null +++ b/infrastructure/terraform/modules/s3_iam_policy/outputs.tf @@ -0,0 +1,14 @@ +output "policy_arn" { + description = "ARN of the S3 IAM policy" + value = aws_iam_policy.s3_policy.arn +} + +output "policy_name" { + description = "Name of the S3 IAM policy" + value = aws_iam_policy.s3_policy.name +} + +output "policy_id" { + description = "ID of the S3 IAM policy" + value = aws_iam_policy.s3_policy.id +} diff --git a/infrastructure/terraform/modules/s3_iam_policy/variables.tf b/infrastructure/terraform/modules/s3_iam_policy/variables.tf new file mode 100644 index 00000000..ed53ea1f --- /dev/null +++ b/infrastructure/terraform/modules/s3_iam_policy/variables.tf @@ -0,0 +1,39 @@ +variable "policy_name" { + description = "Name of the IAM policy" + type = string +} + +variable "policy_description" { + description = "Description of the IAM policy" + type = string + default = "" +} + +variable "bucket_arns" { + description = "List of S3 bucket ARNs to grant access to" + type = list(string) +} + +variable "actions" { + description = "List of S3 actions to allow (e.g., ['s3:GetObject'], ['s3:PutObject'], ['s3:DeleteObject'])" + type = list(string) + default = ["s3:GetObject"] +} + +variable "resource_paths" { + description = "List of resource paths within buckets (e.g., ['/*'] for all objects, ['/specific-prefix/*'] for specific prefix)" + type = list(string) + default = ["/*"] +} + +variable "conditions" { + description = "Optional IAM policy conditions to apply to the statement" + type = any + default = null +} + +variable "tags" { + description = "Tags to apply to the policy" + type = map(string) + default = {} +} diff --git a/infrastructure/terraform/shared/main.tf b/infrastructure/terraform/shared/main.tf index b1474055..5e189dc9 100644 --- a/infrastructure/terraform/shared/main.tf +++ b/infrastructure/terraform/shared/main.tf @@ -321,6 +321,28 @@ module "condition_etl_registry" { } +# Condition Data S3 Bucket to store initial data +module "condition_data_bucket" { + source = "../modules/s3" + bucketname = "condition-data-${var.stage}" + allowed_origins = var.allowed_origins +} + +module "condition_etl_s3_read" { + source = "../modules/s3_iam_policy" + + policy_name = "ConditionETLReadS3" + policy_description = "Allow Lambda to read objects from condition-data-${var.stage}" + bucket_arns = ["arn:aws:s3:::condition-data-${var.stage}"] + actions = ["s3:GetObject"] + resource_paths = ["/*"] +} + +output "condition_etl_s3_read_arn" { + value = module.condition_etl_s3_read.policy_arn +} + + ################################################ # Postcode Splitter – Lambda ECR ################################################ @@ -337,30 +359,17 @@ module "postcode_splitter_registry" { } -################################################ -# Conidition data – S3 bucket -################################################ -module "condition_data_bucket" { - source = "../modules/s3" - bucketname = "condition-data-${var.stage}" - allowed_origins = var.allowed_origins +# S3 policy for postcode splitter to read from retrofit data bucket +module "postcode_splitter_s3_read" { + source = "../modules/s3_iam_policy" + + policy_name = "PostcodeSplitterReadS3" + policy_description = "Allow postcode splitter Lambda to read from retrofit-data bucket" + bucket_arns = ["arn:aws:s3:::retrofit-data-${var.stage}"] + actions = ["s3:GetObject"] + resource_paths = ["/*"] } -resource "aws_iam_policy" "condition_etl_s3_read" { - name = "ConditionETLReadS3" - description = "Allow Lambda to read objects from condition-data-${var.stage}" - policy = jsonencode({ - Version = "2012-10-17" - Statement = [ - { - Effect = "Allow" - Action = ["s3:GetObject"] - Resource = "arn:aws:s3:::condition-data-${var.stage}/*" - } - ] - }) -} - -output "condition_etl_s3_read_arn" { - value = aws_iam_policy.condition_etl_s3_read.arn +output "postcode_splitter_s3_read_arn" { + value = module.postcode_splitter_s3_read.policy_arn } \ No newline at end of file From 8955082ac517f25aa23aff0205827499542240ed Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Tue, 10 Feb 2026 15:54:10 +0000 Subject: [PATCH 049/340] wrong lambda --- infrastructure/terraform/lambda/_template/main.tf | 6 +++--- infrastructure/terraform/lambda/postcodeSplitter/main.tf | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/infrastructure/terraform/lambda/_template/main.tf b/infrastructure/terraform/lambda/_template/main.tf index 2b767ce1..7f60d684 100644 --- a/infrastructure/terraform/lambda/_template/main.tf +++ b/infrastructure/terraform/lambda/_template/main.tf @@ -47,17 +47,17 @@ module "lambda" { # # Example 1: Attach existing policy from shared state # resource "aws_iam_role_policy_attachment" "lambda_s3_policy" { -# role = module.lambda.lambda_role_name +# role = module.lambda.role_name # policy_arn = data.terraform_remote_state.shared.outputs.YOUR_POLICY_OUTPUT_NAME_arn # } # # Example 2: Attach multiple policies # resource "aws_iam_role_policy_attachment" "lambda_read_policy" { -# role = module.lambda.lambda_role_name +# role = module.lambda.role_name # policy_arn = data.terraform_remote_state.shared.outputs.postcode_splitter_s3_read_arn # } # # resource "aws_iam_role_policy_attachment" "lambda_write_policy" { -# role = module.lambda.lambda_role_name +# role = module.lambda.role_name # policy_arn = data.terraform_remote_state.shared.outputs.another_policy_arn # } diff --git a/infrastructure/terraform/lambda/postcodeSplitter/main.tf b/infrastructure/terraform/lambda/postcodeSplitter/main.tf index 9bbd1b26..68c433d1 100644 --- a/infrastructure/terraform/lambda/postcodeSplitter/main.tf +++ b/infrastructure/terraform/lambda/postcodeSplitter/main.tf @@ -36,6 +36,6 @@ module "lambda" { # Attach S3 read policy to the Lambda execution role resource "aws_iam_role_policy_attachment" "postcode_splitter_s3_read" { - role = module.lambda.lambda_role_name + role = module.lambda.role_name policy_arn = data.terraform_remote_state.shared.outputs.postcode_splitter_s3_read_arn } \ No newline at end of file From 6a29967b1bdf29b4cb4401e2addd2d867335eae8 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Tue, 10 Feb 2026 15:57:31 +0000 Subject: [PATCH 050/340] only run if the file gets changed --- .github/workflows/deploy_terraform.yml | 5 +++++ .github/workflows/unit_tests.yml | 3 --- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index 0d235ab1..5248383b 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -4,6 +4,11 @@ on: push: branches: - "**" + paths: + - 'infrastructure/terraform/**' + - '.github/workflows/deploy_terraform.yml' + - '.github/workflows/_build_image.yml' + - '.github/workflows/_deploy_lambda.yml' jobs: determine_stage: diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index 14d5a06f..d3a92463 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -4,9 +4,6 @@ on: pull_request: branches: - "**" - push: - branches: - - "**" jobs: From 0c9dada6426d785dcefe42ca7cd2e7b89e87d6be Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Tue, 10 Feb 2026 15:58:28 +0000 Subject: [PATCH 051/340] run for production --- .github/workflows/deploy_terraform.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index 5248383b..88a84257 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -74,7 +74,7 @@ jobs: run: terraform plan -var-file=${STAGE}.tfvars -out=tfplan - name: Terraform Apply - if: env.STAGE == 'prod' + # if: env.STAGE == 'prod' working-directory: infrastructure/terraform/shared run: terraform apply -auto-approve tfplan From 12185bffa6fdebf6eb4f991ee0fc6978e22d3ab8 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Tue, 10 Feb 2026 16:17:28 +0000 Subject: [PATCH 052/340] destroy condition --- .github/workflows/_deploy_lambda.yml | 13 ++++++++++++- .github/workflows/deploy_terraform.yml | 1 + .../terraform/modules/s3_iam_policy/main.tf | 14 ++++++++------ 3 files changed, 21 insertions(+), 7 deletions(-) diff --git a/.github/workflows/_deploy_lambda.yml b/.github/workflows/_deploy_lambda.yml index 9bd686aa..1ab50e8d 100644 --- a/.github/workflows/_deploy_lambda.yml +++ b/.github/workflows/_deploy_lambda.yml @@ -29,6 +29,12 @@ on: default: 'false' # can only be 'true' or 'false' + terraform_destroy: + required: false + type: string + default: 'false' + # can only be 'true' or 'false' + secrets: AWS_ACCESS_KEY_ID: required: true @@ -93,6 +99,11 @@ jobs: -out=lambdaplan - name: Terraform Apply - if: inputs.terraform_apply == 'true' || github.ref == 'refs/heads/dev' || github.ref == 'refs/heads/main' + if: (inputs.terraform_apply == 'true' || github.ref == 'refs/heads/dev' || github.ref == 'refs/heads/main') && inputs.terraform_destroy != 'true' working-directory: ${{ inputs.lambda_path }} run: terraform apply -auto-approve lambdaplan + + - name: Terraform Destroy + if: inputs.terraform_destroy == 'true' + working-directory: ${{ inputs.lambda_path }} + run: terraform destroy -auto-approve diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index 88a84257..4c504ba9 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -186,6 +186,7 @@ jobs: stage: ${{ needs.determine_stage.outputs.stage }} ecr_repo: condition-etl-${{ needs.determine_stage.outputs.stage }} image_digest: ${{ needs.condition_etl_image.outputs.image_digest }} + terraform_destroy: 'true' secrets: AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} diff --git a/infrastructure/terraform/modules/s3_iam_policy/main.tf b/infrastructure/terraform/modules/s3_iam_policy/main.tf index e4e1e2f9..397bd963 100644 --- a/infrastructure/terraform/modules/s3_iam_policy/main.tf +++ b/infrastructure/terraform/modules/s3_iam_policy/main.tf @@ -16,12 +16,14 @@ resource "aws_iam_policy" "s3_policy" { policy = jsonencode({ Version = "2012-10-17" Statement = [ - { - Effect = "Allow" - Action = var.actions - Resource = local.resources - Condition = var.conditions != null ? var.conditions : null - } + merge( + { + Effect = "Allow" + Action = var.actions + Resource = local.resources + }, + var.conditions != null ? { Condition = var.conditions } : {} + ) ] }) From a9b8f09d9a217339430f8b30fa5c98273cc5c687 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Tue, 10 Feb 2026 16:22:34 +0000 Subject: [PATCH 053/340] don't run apply yet must destroy first --- .github/workflows/deploy_terraform.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index 4c504ba9..397eb6ee 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -74,7 +74,7 @@ jobs: run: terraform plan -var-file=${STAGE}.tfvars -out=tfplan - name: Terraform Apply - # if: env.STAGE == 'prod' + if: env.STAGE == 'prod' working-directory: infrastructure/terraform/shared run: terraform apply -auto-approve tfplan From 71de7e9a8639e3e548e51c0185355b2256ad523a Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 10 Feb 2026 17:10:12 +0000 Subject: [PATCH 054/340] add github workflow vscode extensions to devcontainer --- .devcontainer/backend/devcontainer.json | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.devcontainer/backend/devcontainer.json b/.devcontainer/backend/devcontainer.json index c672b1bf..76eb0efd 100644 --- a/.devcontainer/backend/devcontainer.json +++ b/.devcontainer/backend/devcontainer.json @@ -22,7 +22,9 @@ "corentinartaud.pdfpreview", "ms-python.vscode-python-envs", "ms-python.black-formatter", - "waderyan.gitblame" + "waderyan.gitblame", + "github.vscode-github-actions", + "me-dutour-mathieu.vscode-github-actions" ], "settings": { "files.defaultWorkspace": "/workspaces/model", From cb6f0925c1c3c3eaff5aafa1e4337d3519c6836a Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Tue, 10 Feb 2026 17:31:38 +0000 Subject: [PATCH 055/340] get rid of duplicagte env --- .github/workflows/deploy_terraform.yml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index 1cdaaf79..a89eb42b 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -24,12 +24,6 @@ jobs: AWS_REGION: ${{ secrets.DEV_AWS_REGION }} DEV_DB_HOST: ${{ secrets.DEV_DB_HOST }} - env: - AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} - AWS_REGION: ${{ secrets.DEV_AWS_REGION }} - DEV_DB_HOST: ${{ secrets.DEV_DB_HOST }} - steps: - name: Determine stage from branch id: set-stage From b2f1190066d5a523ab47410c70230d784918d82d Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 10 Feb 2026 17:45:49 +0000 Subject: [PATCH 056/340] create categorisation directory --- backend/categorisation/__init__.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 backend/categorisation/__init__.py diff --git a/backend/categorisation/__init__.py b/backend/categorisation/__init__.py new file mode 100644 index 00000000..e69de29b From 3f9e8b303c70b3e4882550cd182c9b1b714307c7 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Tue, 10 Feb 2026 18:08:03 +0000 Subject: [PATCH 057/340] terraform destroy --- .devcontainer/backend/Dockerfile | 15 ++++++++++++++- .github/workflows/_deploy_lambda.yml | 7 ++++++- 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/.devcontainer/backend/Dockerfile b/.devcontainer/backend/Dockerfile index 4c5d16f5..99cd66d6 100644 --- a/.devcontainer/backend/Dockerfile +++ b/.devcontainer/backend/Dockerfile @@ -43,4 +43,17 @@ WORKDIR /workspaces/model # 6) Make Python find your package # Add project root to PYTHONPATH for all processes -ENV PYTHONPATH=/workspaces/model:${PYTHONPATH} \ No newline at end of file +ENV PYTHONPATH=/workspaces/model:${PYTHONPATH} + + +# Install terraform +RUN apt-get update && sudo apt-get install -y gnupg software-properties-common +RUN wget -O- https://apt.releases.hashicorp.com/gpg | \ +gpg --dearmor | \ +sudo tee /usr/share/keyrings/hashicorp-archive-keyring.gpg > /dev/null +RUN echo "deb [signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] \ +https://apt.releases.hashicorp.com $(lsb_release -cs) main" | \ +tee /etc/apt/sources.list.d/hashicorp.list +RUN apt update +RUN apt-get install terraform +RUN terraform -install-autocomplete \ No newline at end of file diff --git a/.github/workflows/_deploy_lambda.yml b/.github/workflows/_deploy_lambda.yml index e0da2f2b..b8731446 100644 --- a/.github/workflows/_deploy_lambda.yml +++ b/.github/workflows/_deploy_lambda.yml @@ -106,4 +106,9 @@ jobs: - name: Terraform Destroy if: inputs.terraform_destroy == 'true' && inputs.terraform_apply != 'true' working-directory: ${{ inputs.lambda_path }} - run: terraform destroy -auto-approve + run: | + terraform destroy -auto-approve \ + -var="stage=${{ inputs.stage }}" \ + -var="lambda_name=${{ inputs.lambda_name }}" \ + -var="ecr_repo_url=${{ steps.repo.outputs.ecr_repo_url }}" \ + -var="image_digest=${{ inputs.image_digest }}" From c67e4644e4c6cfe8dc67aa6408e10c8bc4ed8b82 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 10 Feb 2026 18:11:50 +0000 Subject: [PATCH 058/340] define processor and local runner --- backend/categorisation/local_runner.py | 6 ++++++ backend/categorisation/processor.py | 2 ++ 2 files changed, 8 insertions(+) create mode 100644 backend/categorisation/local_runner.py create mode 100644 backend/categorisation/processor.py diff --git a/backend/categorisation/local_runner.py b/backend/categorisation/local_runner.py new file mode 100644 index 00000000..4693850c --- /dev/null +++ b/backend/categorisation/local_runner.py @@ -0,0 +1,6 @@ +def main() -> None: + pass + + +if __name__ == "__main__": + main() diff --git a/backend/categorisation/processor.py b/backend/categorisation/processor.py new file mode 100644 index 00000000..aa519c6e --- /dev/null +++ b/backend/categorisation/processor.py @@ -0,0 +1,2 @@ +def process_portfolio() -> None: + pass From eb393eb0e88a22bca26d4151922f02983a9da53f Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Tue, 10 Feb 2026 18:13:56 +0000 Subject: [PATCH 059/340] terraform apply new env --- .github/workflows/deploy_terraform.yml | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index a89eb42b..3a46e9a1 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -76,7 +76,7 @@ jobs: run: terraform plan -var-file=${STAGE}.tfvars -out=tfplan - name: Terraform Apply - if: env.STAGE == 'prod' + # if: env.STAGE == 'prod' working-directory: infrastructure/terraform/shared run: terraform apply -auto-approve tfplan @@ -148,7 +148,8 @@ jobs: stage: ${{ needs.determine_stage.outputs.stage }} ecr_repo: postcode_splitter-${{ needs.determine_stage.outputs.stage }} image_digest: ${{ needs.postcodeSplitter_image.outputs.image_digest }} - terraform_apply: ${{ needs.determine_stage.outputs.terraform_apply }} + # terraform_apply: ${{ needs.determine_stage.outputs.terraform_apply }} + terraform_apply: 'true' secrets: AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} @@ -189,7 +190,8 @@ jobs: ecr_repo: condition-etl-${{ needs.determine_stage.outputs.stage }} image_digest: ${{ needs.condition_etl_image.outputs.image_digest }} # terraform_apply: ${{ needs.determine_stage.outputs.terraform_apply }} - terraform_destroy: 'true' + # terraform_destroy: 'true' + terraform_apply: 'true' secrets: AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} From e2fa13e2cc3d0eb6020ba348a8608e508d84902e Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Tue, 10 Feb 2026 18:17:58 +0000 Subject: [PATCH 060/340] delete it in a comment --- infrastructure/terraform/shared/main.tf | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/infrastructure/terraform/shared/main.tf b/infrastructure/terraform/shared/main.tf index 5e189dc9..fc3d086a 100644 --- a/infrastructure/terraform/shared/main.tf +++ b/infrastructure/terraform/shared/main.tf @@ -328,19 +328,19 @@ module "condition_data_bucket" { allowed_origins = var.allowed_origins } -module "condition_etl_s3_read" { - source = "../modules/s3_iam_policy" +# module "condition_etl_s3_read" { +# source = "../modules/s3_iam_policy" - policy_name = "ConditionETLReadS3" - policy_description = "Allow Lambda to read objects from condition-data-${var.stage}" - bucket_arns = ["arn:aws:s3:::condition-data-${var.stage}"] - actions = ["s3:GetObject"] - resource_paths = ["/*"] -} +# policy_name = "ConditionETLReadS3" +# policy_description = "Allow Lambda to read objects from condition-data-${var.stage}" +# bucket_arns = ["arn:aws:s3:::condition-data-${var.stage}"] +# actions = ["s3:GetObject"] +# resource_paths = ["/*"] +# } -output "condition_etl_s3_read_arn" { - value = module.condition_etl_s3_read.policy_arn -} +# output "condition_etl_s3_read_arn" { +# value = module.condition_etl_s3_read.policy_arn +# } ################################################ From 0e5ea0f490f1a88d502f34eacb90b39ba134b76c Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Tue, 10 Feb 2026 18:19:54 +0000 Subject: [PATCH 061/340] now re deploy --- infrastructure/terraform/shared/main.tf | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/infrastructure/terraform/shared/main.tf b/infrastructure/terraform/shared/main.tf index fc3d086a..5e189dc9 100644 --- a/infrastructure/terraform/shared/main.tf +++ b/infrastructure/terraform/shared/main.tf @@ -328,19 +328,19 @@ module "condition_data_bucket" { allowed_origins = var.allowed_origins } -# module "condition_etl_s3_read" { -# source = "../modules/s3_iam_policy" +module "condition_etl_s3_read" { + source = "../modules/s3_iam_policy" -# policy_name = "ConditionETLReadS3" -# policy_description = "Allow Lambda to read objects from condition-data-${var.stage}" -# bucket_arns = ["arn:aws:s3:::condition-data-${var.stage}"] -# actions = ["s3:GetObject"] -# resource_paths = ["/*"] -# } + policy_name = "ConditionETLReadS3" + policy_description = "Allow Lambda to read objects from condition-data-${var.stage}" + bucket_arns = ["arn:aws:s3:::condition-data-${var.stage}"] + actions = ["s3:GetObject"] + resource_paths = ["/*"] +} -# output "condition_etl_s3_read_arn" { -# value = module.condition_etl_s3_read.policy_arn -# } +output "condition_etl_s3_read_arn" { + value = module.condition_etl_s3_read.policy_arn +} ################################################ From 91fe9ccc4d3b79d0429e266c12b16243f54bad03 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 10 Feb 2026 18:24:04 +0000 Subject: [PATCH 062/340] fix merge conflict in vscode settings and add pylance analysis --- .vscode/settings.json | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 3d4c6b42..b294c736 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -9,12 +9,14 @@ "path": "/bin/bash" } }, -<<<<<<< HEAD -======= "python.testing.unittestEnabled": false, "python.testing.pytestEnabled": true, - "python.testing.pytestArgs": ["-s", "-q", "--no-cov"] ->>>>>>> 11b482838efcf46f376fd3ecbf2c1bb0be6d097d + "python.testing.pytestArgs": ["-s", "-q", "--no-cov"], + + "python.languageServer": "Pylance", + "python.analysis.typeCheckingMode": "strict", + "python.analysis.autoSearchPaths": true, + "python.analysis.extraPaths": ["./src"] // Hot reload setting that needs to be in user settings // "jupyter.runStartupCommands": [ From e549eae8202b838d1e8956d79798afd6c77481c7 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Tue, 10 Feb 2026 18:30:15 +0000 Subject: [PATCH 063/340] time out --- infrastructure/terraform/lambda/condition-etl/main.tf | 1 - 1 file changed, 1 deletion(-) diff --git a/infrastructure/terraform/lambda/condition-etl/main.tf b/infrastructure/terraform/lambda/condition-etl/main.tf index 4219f209..0128f975 100644 --- a/infrastructure/terraform/lambda/condition-etl/main.tf +++ b/infrastructure/terraform/lambda/condition-etl/main.tf @@ -23,7 +23,6 @@ module "lambda" { stage = var.stage image_uri = local.image_uri - timeout = 180 environment = merge( From 526d1a79631c3a1aaf6e6e0de1d9aeb15589aa9f Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Tue, 10 Feb 2026 18:46:25 +0000 Subject: [PATCH 064/340] default variables --- .github/workflows/deploy_terraform.yml | 4 +--- .../terraform/lambda/postcodeSplitter/main.tf | 14 ++++++++++++++ 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index 3a46e9a1..39132944 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -189,9 +189,7 @@ jobs: stage: ${{ needs.determine_stage.outputs.stage }} ecr_repo: condition-etl-${{ needs.determine_stage.outputs.stage }} image_digest: ${{ needs.condition_etl_image.outputs.image_digest }} - # terraform_apply: ${{ needs.determine_stage.outputs.terraform_apply }} - # terraform_destroy: 'true' - terraform_apply: 'true' + terraform_apply: ${{ needs.determine_stage.outputs.terraform_apply }} secrets: AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} diff --git a/infrastructure/terraform/lambda/postcodeSplitter/main.tf b/infrastructure/terraform/lambda/postcodeSplitter/main.tf index 68c433d1..2e2e91da 100644 --- a/infrastructure/terraform/lambda/postcodeSplitter/main.tf +++ b/infrastructure/terraform/lambda/postcodeSplitter/main.tf @@ -30,6 +30,20 @@ module "lambda" { LOG_LEVEL = "info" DB_USERNAME = local.db_credentials.db_assessment_model_username DB_PASSWORD = local.db_credentials.db_assessment_model_password + GOOGLE_SOLAR_API_KEY = "test" + SAP_PREDICTIONS_BUCKET = "test" + CARBON_PREDICTIONS_BUCKET = "test" + HEAT_PREDICTIONS_BUCKET = "test" + HEATING_KWH_PREDICTIONS_BUCKET = "test" + HOTWATER_KWH_PREDICTIONS_BUCKET = "test" + API_KEY = "test" + ENVIRONMENT = "test" + SECRET_KEY = "test" + PLAN_TRIGGER_BUCKET = "test" + DATA_BUCKET = "test" + EPC_AUTH_TOKEN = "test" + ENGINE_SQS_URL = "test" + ENERGY_ASSESSMENTS_BUCKET = "test" }, ) } From a8d89dc2863e7c0e9791d3190cb8c3d64ddfe980 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Tue, 10 Feb 2026 19:12:34 +0000 Subject: [PATCH 065/340] s3 policy --- infrastructure/terraform/shared/main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/infrastructure/terraform/shared/main.tf b/infrastructure/terraform/shared/main.tf index 5e189dc9..83845185 100644 --- a/infrastructure/terraform/shared/main.tf +++ b/infrastructure/terraform/shared/main.tf @@ -366,7 +366,7 @@ module "postcode_splitter_s3_read" { policy_name = "PostcodeSplitterReadS3" policy_description = "Allow postcode splitter Lambda to read from retrofit-data bucket" bucket_arns = ["arn:aws:s3:::retrofit-data-${var.stage}"] - actions = ["s3:GetObject"] + actions = ["s3:GetObject", "s3:ListBucket"] resource_paths = ["/*"] } From 663f3755e7fed28c9ae1561188742fc524f992de Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Tue, 10 Feb 2026 19:17:02 +0000 Subject: [PATCH 066/340] apply new s3 policy --- .github/workflows/deploy_terraform.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index 39132944..ef1887ee 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -76,7 +76,7 @@ jobs: run: terraform plan -var-file=${STAGE}.tfvars -out=tfplan - name: Terraform Apply - # if: env.STAGE == 'prod' + if: env.STAGE == 'prod' working-directory: infrastructure/terraform/shared run: terraform apply -auto-approve tfplan From 9dc5e0b98447c3f3a623fcf1eed14ef2f1a7967d Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Tue, 10 Feb 2026 19:26:58 +0000 Subject: [PATCH 067/340] apply new s3 policy --- .github/workflows/deploy_terraform.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index ef1887ee..39132944 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -76,7 +76,7 @@ jobs: run: terraform plan -var-file=${STAGE}.tfvars -out=tfplan - name: Terraform Apply - if: env.STAGE == 'prod' + # if: env.STAGE == 'prod' working-directory: infrastructure/terraform/shared run: terraform apply -auto-approve tfplan From 7911bb4db0746f94bd7f01c7e82f8ffdc47c39bc Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Tue, 10 Feb 2026 21:08:39 +0000 Subject: [PATCH 068/340] parse uri --- backend/postcode_splitter/main.py | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/backend/postcode_splitter/main.py b/backend/postcode_splitter/main.py index adb8e5c9..5a63d920 100644 --- a/backend/postcode_splitter/main.py +++ b/backend/postcode_splitter/main.py @@ -17,15 +17,30 @@ from backend.address2UPRN.main import ( logger = setup_logger() -def parse_s3_console_url(s3_uri: str) -> tuple[str, str]: +def parse_s3_uri(s3_uri: str) -> tuple[str, str]: """ - Parse AWS console S3 URL to extract bucket and key. + Parse S3 URI to extract bucket and key. - Format: https://account-id-hash.region.console.aws.amazon.com/s3/object/bucket?region=...&prefix=path + Supports two formats: + 1. S3 URI format: s3://bucket/key + 2. AWS console URL: https://account-id-hash.region.console.aws.amazon.com/s3/object/bucket?region=...&prefix=path """ - logger.info("Parsing S3 console URL") + logger.info("Parsing S3 URI") try: + # Check if it's an S3 URI format + if s3_uri.startswith("s3://"): + parts = s3_uri[5:].split("/", 1) + if len(parts) < 2: + raise ValueError("S3 URI must include both bucket and key") + bucket = parts[0] + key = parts[1] + logger.info(f"Extracted bucket: {bucket}, key: {key}") + return bucket, key + + # Otherwise, treat as AWS console URL + logger.info("Parsing as AWS console URL") + # Split base URL and query string if "?" not in s3_uri: raise ValueError("No query string found") @@ -215,7 +230,7 @@ def handler(event, context): # Read CSV from S3 logger.info(f"Processing S3 URI: {s3_uri}") - bucket, key = parse_s3_console_url(s3_uri) + bucket, key = parse_s3_uri(s3_uri) logger.info(f"S3 Bucket: {bucket}, Key: {key}") csv_data = read_csv_from_s3_dict(bucket, key) From 76e362520df88526514c0e5c9da5f93062e7b129 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Tue, 10 Feb 2026 21:15:14 +0000 Subject: [PATCH 069/340] parse uri --- infrastructure/terraform/lambda/postcodeSplitter/variables.tf | 3 +++ 1 file changed, 3 insertions(+) diff --git a/infrastructure/terraform/lambda/postcodeSplitter/variables.tf b/infrastructure/terraform/lambda/postcodeSplitter/variables.tf index 9ce45fa5..0c8ba5b2 100644 --- a/infrastructure/terraform/lambda/postcodeSplitter/variables.tf +++ b/infrastructure/terraform/lambda/postcodeSplitter/variables.tf @@ -24,3 +24,6 @@ locals { output "resolved_image_uri" { value = local.image_uri } + + + From b7e201f3d47e088d71f66381f01d9ad05e727710 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Wed, 11 Feb 2026 09:46:45 +0000 Subject: [PATCH 070/340] redploy my lambda without list and see if it works --- backend/address2UPRN/main.py | 2 +- backend/condition/condition_trigger_request.py | 2 +- backend/postcode_splitter/main.py | 1 - infrastructure/terraform/shared/main.tf | 2 +- 4 files changed, 3 insertions(+), 4 deletions(-) diff --git a/backend/address2UPRN/main.py b/backend/address2UPRN/main.py index 293ce3d9..2cc604cb 100644 --- a/backend/address2UPRN/main.py +++ b/backend/address2UPRN/main.py @@ -335,7 +335,7 @@ def get_uprn( address = top_rank_df["address"].values[0] lexiscore = float(top_rank_df["lexiscore"].values[0]) - epc = top_rank_df["current-energy-rating"].values[0] + epc = top_rank_df["current-energy-efficiency"].values[0] score = float(top_rank_df["lexiscore"].values[0]) # logger.info(f"Address found to be: {address}, with lexiscore {lexiscore}") diff --git a/backend/condition/condition_trigger_request.py b/backend/condition/condition_trigger_request.py index 03bd6ad1..daa82949 100644 --- a/backend/condition/condition_trigger_request.py +++ b/backend/condition/condition_trigger_request.py @@ -29,5 +29,5 @@ class ConditionTriggerRequest(BaseModel): # { # "file_type": "LBWF", # "trigger_file_bucket": "condition-data-dev", -# "trigger_file_key": "input/lbwf/LBWF - Example Asset Data September 2025.xlsx", +# "trigger_file_key": "input/lbwf/LBWF - Example Asset Data September 2025.xlsx" # } diff --git a/backend/postcode_splitter/main.py b/backend/postcode_splitter/main.py index 5a63d920..06a9d1a3 100644 --- a/backend/postcode_splitter/main.py +++ b/backend/postcode_splitter/main.py @@ -23,7 +23,6 @@ def parse_s3_uri(s3_uri: str) -> tuple[str, str]: Supports two formats: 1. S3 URI format: s3://bucket/key - 2. AWS console URL: https://account-id-hash.region.console.aws.amazon.com/s3/object/bucket?region=...&prefix=path """ logger.info("Parsing S3 URI") diff --git a/infrastructure/terraform/shared/main.tf b/infrastructure/terraform/shared/main.tf index 83845185..5e189dc9 100644 --- a/infrastructure/terraform/shared/main.tf +++ b/infrastructure/terraform/shared/main.tf @@ -366,7 +366,7 @@ module "postcode_splitter_s3_read" { policy_name = "PostcodeSplitterReadS3" policy_description = "Allow postcode splitter Lambda to read from retrofit-data bucket" bucket_arns = ["arn:aws:s3:::retrofit-data-${var.stage}"] - actions = ["s3:GetObject", "s3:ListBucket"] + actions = ["s3:GetObject"] resource_paths = ["/*"] } From d4ac6aee71df211e5c31238fc046a23991839faf Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Wed, 11 Feb 2026 11:50:02 +0000 Subject: [PATCH 071/340] mount home directory to devcontainer home directory --- .devcontainer/backend/devcontainer.json | 2 +- asset_list/AssetList.py | 2 +- asset_list/app.py | 82 ++++---------- backend/address2UPRN/main.py | 23 ++++ backend/postcode_splitter/main.py | 143 ++++++------------------ 5 files changed, 76 insertions(+), 176 deletions(-) diff --git a/.devcontainer/backend/devcontainer.json b/.devcontainer/backend/devcontainer.json index 5d728dcd..6e2edc93 100644 --- a/.devcontainer/backend/devcontainer.json +++ b/.devcontainer/backend/devcontainer.json @@ -6,7 +6,7 @@ "workspaceFolder": "/workspaces/model", "postStartCommand": "bash .devcontainer/backend/post-install.sh", "mounts": [ - "source=${localEnv:HOME},target=/workspaces/home,type=bind" + "source=${localEnv:HOME},target=/home/vscode,type=bind" ], "customizations": { "vscode": { diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py index ea4d8b34..36b3d58e 100644 --- a/asset_list/AssetList.py +++ b/asset_list/AssetList.py @@ -34,7 +34,7 @@ from etl.epc_clean.epc_attributes.WallAttributes import WallAttributes logger = setup_logger() # OpenAI API Key (set this in your environment variables for security) -OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "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") +OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY") diff --git a/asset_list/app.py b/asset_list/app.py index 43c653a7..02557831 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -13,11 +13,15 @@ from asset_list.utils import get_data from dotenv import load_dotenv from backend.SearchEpc import SearchEpc -load_dotenv(dotenv_path="backend/.env") +load_dotenv(dotenv_path="../backend/.env") EPC_AUTH_TOKEN = os.getenv( "EPC_AUTH_TOKEN", ) +OPENAI_API_KEY = os.getenv( + "OPENAI_API_KEY", +) + def extract_address1( asset_list, full_address_col, postcode_col, method="first_two_words" @@ -69,72 +73,24 @@ def app(): Property UPRN """ -<<<<<<< HEAD - data_folder = "/workspaces/model/asset_list/" - data_filename = "manchester.xlsx" - sheet_name = "PW0099 - Property List" - postcode_column = "post Code" - address1_column = "address" - address1_method = None - fulladdress_column = None - address_cols_to_concat = ["address"] -======= - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Aspire" - data_filename = "ASPIRE ASSET LIST.xlsx" - sheet_name = "Asset List" - postcode_column = "Postcode" + data_folder = "/workspaces/model/asset_list" + data_filename = "assets.xlsx" + sheet_name = "Sheet1" + postcode_column = "POSTCODE" address1_column = None address1_method = "house_number_extraction" - fulladdress_column = "Address" + fulladdress_column = "ADDRESS" address_cols_to_concat = [] missing_postcodes_method = None landlord_year_built = None landlord_os_uprn = None - landlord_property_type = "Property Type" - landlord_built_form = None - landlord_wall_construction = None - landlord_roof_construction = None - landlord_heating_system = None + landlord_property_type = "PROPERTY TYPE" + landlord_built_form = None # Skipped as empty + landlord_wall_construction = "wall combined" # combin F + G + landlord_roof_construction = "HEATING SYSTEM" # Combine I + J + landlord_heating_system = None # Check with Khalim landlord_existing_pv = None - landlord_property_id = "LLUPRN" - landlord_sap = None - outcomes_filename = None - outcomes_sheetname = None - outcomes_postcode = None - outcomes_houseno = None - outcomes_id = None - outcomes_address = None - master_filepaths = [] - master_id_colnames = [] - master_to_asset_list_filepath = None - phase = False - ecosurv_landlords = None - asset_list_header = 0 - landlord_block_reference = None - - # Peabody data for cleaning - data_folder = ( - "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting " - "Project/data_validation" - ) - data_filename = "to_standardise_uprns.xlsx" - sheet_name = "Sheet1" - postcode_column = "Postcode" - address1_column = None - address1_method = "house_number_extraction" - fulladdress_column = "Address" - address_cols_to_concat = None ->>>>>>> d4064da36565f87c2b72d10e9f3604cc6c37bdb6 - missing_postcodes_method = None - landlord_year_built = None - landlord_os_uprn = None - landlord_property_type = None - landlord_built_form = None - landlord_wall_construction = None - landlord_roof_construction = None - landlord_heating_system = None - landlord_existing_pv = None - landlord_property_id = "UHTprop Ref" + landlord_property_id = "UPRN" landlord_sap = None outcomes_filename = None outcomes_sheetname = None @@ -286,7 +242,7 @@ def app(): if skip is not None and not force_retrieve_data: if i <= skip: continue - chunk = asset_list.standardised_asset_list[i: i + chunk_size] + chunk = asset_list.standardised_asset_list[i : i + chunk_size] epc_data_chunk, errors_chunk, no_epc_chunk = get_data( df=chunk, row_id_name=asset_list.DOMNA_PROPERTY_ID, @@ -429,7 +385,7 @@ def app(): # Retrieve just the data we need epc_df = epc_df[ [asset_list.DOMNA_PROPERTY_ID] + list(asset_list.EPC_API_DATA_NAMES.keys()) - ].rename(columns=asset_list.EPC_API_DATA_NAMES) + ].rename(columns=asset_list.EPC_API_DATA_NAMES) # Look for columns not in the find my EPC data, which will have happened if we didn't # retrieve it in the first place @@ -446,7 +402,7 @@ def app(): find_my_epc_data[ [asset_list.DOMNA_PROPERTY_ID, "epc_has_floor_recommendation"] + list(asset_list.FIND_EPC_DATA_NAMES.keys()) - ].rename(columns=asset_list.FIND_EPC_DATA_NAMES), + ].rename(columns=asset_list.FIND_EPC_DATA_NAMES), how="left", on=asset_list.DOMNA_PROPERTY_ID, ) diff --git a/backend/address2UPRN/main.py b/backend/address2UPRN/main.py index 2cc604cb..fb812d67 100644 --- a/backend/address2UPRN/main.py +++ b/backend/address2UPRN/main.py @@ -20,6 +20,29 @@ if EPC_AUTH_TOKEN is None: raise RuntimeError("EPC_AUTH_TOKEN not defined in env") +def is_valid_postcode(postcode_clean: str) -> bool: + """ + Validate postcode using postcodes.io. + + Expects a sanitised postcode (e.g. E84SQ). + Returns True if valid, False otherwise. + """ + POSTCODES_IO_VALIDATE_URL = "https://api.postcodes.io/postcodes/{postcode}/validate" + if not postcode_clean: + return False + + try: + resp = requests.get( + POSTCODES_IO_VALIDATE_URL.format(postcode=postcode_clean), + timeout=5, + ) + resp.raise_for_status() + return resp.json().get("result", False) + except requests.RequestException: + # Network issues, rate limits, etc. + return False + + def levenshtein(a: str, b: str) -> float: """ Address similarity score in [0, 1]. diff --git a/backend/postcode_splitter/main.py b/backend/postcode_splitter/main.py index 06a9d1a3..0f21a67f 100644 --- a/backend/postcode_splitter/main.py +++ b/backend/postcode_splitter/main.py @@ -78,112 +78,14 @@ def sanitise_postcode(postcode: str) -> str | None: return postcode.upper().replace(" ", "") -def is_valid_postcode(postcode_clean: str) -> bool: - """ - Validate postcode using postcodes.io. - - Expects a sanitised postcode (e.g. E84SQ). - Returns True if valid, False otherwise. - """ - POSTCODES_IO_VALIDATE_URL = "https://api.postcodes.io/postcodes/{postcode}/validate" - if not postcode_clean: - return False - - try: - resp = requests.get( - POSTCODES_IO_VALIDATE_URL.format(postcode=postcode_clean), - timeout=5, - ) - resp.raise_for_status() - return resp.json().get("result", False) - except requests.RequestException: - # Network issues, rate limits, etc. - return False - - -def main(): - df = pd.read_excel("hackney.xlsx", sheet_name="Sustainability") - df = df.head(500) - - # Sanitise postcodes - df["postcode_clean"] = df["Postcode"].apply(sanitise_postcode) - - # --- validate AFTER grouping (save API calls) --- - - # Get unique, non-null postcodes - unique_postcodes = df["postcode_clean"].dropna().unique() - - # Validate each postcode once, TODOadd a progress bar - postcode_validity = { - pc: is_valid_postcode(pc) - for pc in tqdm(unique_postcodes, total=len(unique_postcodes)) - } - - # Map validity back onto dataframe - df["postcode_valid"] = df["postcode_clean"].map(postcode_validity) - - results = [] - - for postcode, group_df in tqdm( - df[df["postcode_valid"]].groupby("postcode_clean"), - desc="Resolving UPRNs by postcode", - ): - try: - epc_df = get_epc_data_with_postcode(postcode) - - if epc_df.empty: - tmp = group_df.copy() - tmp["found_uprn"] = None - tmp["status"] = "no_epc_results" - results.append(tmp) - continue - - resolved = resolve_uprns_for_postcode_group( - group_df=group_df, - epc_df=epc_df, - ) - - results.append(resolved) - - except Exception as e: - tmp = group_df.copy() - tmp["found_uprn"] = None - tmp["status"] = "exception" - tmp["error"] = str(e) - results.append(tmp) - - final_df = pd.concat(results, ignore_index=True) - a = final_df[ - [ - "best_match_lexiscore", - "Address 1", - "best_match_address", - "Postcode", - "UPRN", - "best_match_uprn", - ] - ] # add levi score to viewing - b = final_df[final_df["best_match_lexiscore"] > 0] # add levi score to viewing - b = b[ - [ - "best_match_lexiscore", - "Address 1", - "best_match_address", - "Postcode", - "UPRN", - "best_match_uprn", - ] - ] - - -def handler(event, context): +def handler(event, context, local=False): print(f"Function: {context.function_name}") print(f"Request ID: {context.aws_request_id}") # Example SQS message for testing (copy and paste into SQS): # { - # "task_id": "e31f2f21-175b-4a91-a3ec-a6baa325e917", - # "s3_uri": "https://337213553626-7ovirzjr.eu-west-2.console.aws.amazon.com/s3/object/retrofit-data-dev?region=eu-west-2&prefix=ara_raw_inputs/peabody/2025_11_11+-+Peabody+-+Data+Extracts+for+Domna_transformed.csv", + # "task_id":"e31f2f21-175b-4a91-a3ec-a6baa325e917", + # "s3_uri":"s3://retrofit-data-dev/ara_raw_inputs/peabody/2025_11_11 - Peabody - Data Extracts for Domna_transformed.csv" # } # Handle both single event and batch events (SQS, etc.) @@ -196,7 +98,13 @@ def handler(event, context): task_id = None subtask_id = None try: - # Parse body + # For local development + if local is True: + record = {} + record["body"] = ( + '{"task_id":"e31f2f21-175b-4a91-a3ec-a6baa325e917","s3_uri":"s3://retrofit-data-dev/ara_raw_inputs/peabody/2025_11_11 - Peabody - Data Extracts for Domna_transformed.csv"}' + ) + # Parse body (inputs) if isinstance(record.get("body"), str): body = json.loads(record["body"]) else: @@ -236,17 +144,33 @@ def handler(event, context): df = pd.DataFrame(csv_data) logger.info(f"CSV loaded: {len(df)} rows, {len(df.columns)} columns") - # Get head for demo - df_head = df.head() - logger.info("DataFrame head:") - logger.info(f"\n{df_head}") + # Sanitise postcodes + df["postcode_clean"] = df["Postcode"].apply(sanitise_postcode) + + # Group by sanitised postcode (excluding null values) + grouped_data = [] + for postcode, group_df in df.dropna(subset=["postcode_clean"]).groupby( + "postcode_clean" + ): + group_info = { + "postcode": postcode, + "row_count": len(group_df), + "rows": group_df.to_dict(orient="records"), + } + grouped_data.append(group_info) + logger.info(f"Postcode: {postcode}, Rows: {len(group_df)}") + + logger.info(f"Total postcodes: {len(grouped_data)}") results.append( { - "message": "Postcode splitter processing started", + "message": "Postcode splitter processing completed", "task_id": str(task_id), "s3_uri": s3_uri, "subtask_id": str(subtask_id), + "total_rows": len(df), + "total_postcodes": len(grouped_data), + "grouped_data": grouped_data, } ) @@ -258,6 +182,7 @@ def handler(event, context): "status": "processing_complete", "s3_uri": s3_uri, "rows_processed": len(df), + "total_postcodes": len(grouped_data), }, ) logger.info(f"Subtask {subtask_id} marked as complete") @@ -295,7 +220,3 @@ def handler(event, context): {"processed": results, "errors": errors if errors else None} ), } - - -if __name__ == "__main__": - main() From 6c242188b70c217917f1b3ac84920e58e8b2fc63 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 11 Feb 2026 11:57:59 +0000 Subject: [PATCH 072/340] update devcontainer to mount to home directory --- .devcontainer/backend/devcontainer.json | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.devcontainer/backend/devcontainer.json b/.devcontainer/backend/devcontainer.json index 76eb0efd..5b805b0f 100644 --- a/.devcontainer/backend/devcontainer.json +++ b/.devcontainer/backend/devcontainer.json @@ -6,7 +6,7 @@ "workspaceFolder": "/workspaces/model", "postStartCommand": "bash .devcontainer/backend/post-install.sh", "mounts": [ - "source=${localEnv:HOME},target=/workspaces/home,type=bind" + "source=${localEnv:HOME},target=/home/vscode,type=bind" ], "customizations": { "vscode": { @@ -23,8 +23,8 @@ "ms-python.vscode-python-envs", "ms-python.black-formatter", "waderyan.gitblame", - "github.vscode-github-actions", - "me-dutour-mathieu.vscode-github-actions" + "GrapeCity.gc-excelviewer", + "jakobhoeg.vscode-pokemon" ], "settings": { "files.defaultWorkspace": "/workspaces/model", @@ -40,3 +40,4 @@ "PYTHONFLAGS": "-Xfrozen_modules=off" } } + \ No newline at end of file From 2afccf944ee98cf1202e9b86bb6e7ec65c1b74cb Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 11 Feb 2026 12:30:14 +0000 Subject: [PATCH 073/340] add github actions back into devcontainer --- .devcontainer/backend/devcontainer.json | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.devcontainer/backend/devcontainer.json b/.devcontainer/backend/devcontainer.json index 5b805b0f..3727d8a3 100644 --- a/.devcontainer/backend/devcontainer.json +++ b/.devcontainer/backend/devcontainer.json @@ -24,7 +24,9 @@ "ms-python.black-formatter", "waderyan.gitblame", "GrapeCity.gc-excelviewer", - "jakobhoeg.vscode-pokemon" + "jakobhoeg.vscode-pokemon", + "github.vscode-github-actions", + "me-dutour-mathieu.vscode-github-actions" ], "settings": { "files.defaultWorkspace": "/workspaces/model", From ffb840da81e131bcdeb2d1fd784f909b72493f68 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Wed, 11 Feb 2026 13:11:31 +0000 Subject: [PATCH 074/340] added address2uprn and postcodesplitter link --- .github/workflows/deploy_terraform.yml | 5 +- backend/address2UPRN/main.py | 98 +-------- backend/postcode_splitter/main.py | 186 +++++++++++++----- .../terraform/lambda/postcodeSplitter/main.tf | 33 ++++ 4 files changed, 180 insertions(+), 142 deletions(-) diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index 39132944..514fc7af 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -107,7 +107,8 @@ jobs: stage: ${{ needs.determine_stage.outputs.stage }} ecr_repo: address2uprn-${{ needs.determine_stage.outputs.stage }} image_digest: ${{ needs.address2uprn_image.outputs.image_digest }} - terraform_apply: ${{ needs.determine_stage.outputs.terraform_apply }} + # terraform_apply: ${{ needs.determine_stage.outputs.terraform_apply }} + terraform_apply: 'true' secrets: AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} @@ -140,7 +141,7 @@ jobs: # 3️⃣ Deploy Postcode Splitter Lambda # ============================================================ postcodeSplitter_lambda: - needs: [postcodeSplitter_image, determine_stage] + needs: [postcodeSplitter_image, determine_stage, address2uprn_lambda] uses: ./.github/workflows/_deploy_lambda.yml with: lambda_name: postcodeSplitter diff --git a/backend/address2UPRN/main.py b/backend/address2UPRN/main.py index fb812d67..33c37760 100644 --- a/backend/address2UPRN/main.py +++ b/backend/address2UPRN/main.py @@ -506,99 +506,13 @@ def run_all_test(): ) -if __name__ == "__main__": - INPUT_FILE = "hackney.xlsx" - - ADDRESS_COL = "Address 1" - POSTCODE_COL = "Postcode" - UPRN_COL = "UPRN" - - df = pd.read_excel(INPUT_FILE) - - failures = [] - - for _, row in tqdm( - df.iterrows(), - total=len(df), - desc="Auditing UPRNs", - ): - input_address = str(row[ADDRESS_COL]).strip() - postcode = str(row[POSTCODE_COL]).strip() - - expected_uprn = None if pd.isna(row[UPRN_COL]) else str(int(row[UPRN_COL])) - - try: - epc_df = get_epc_data_with_postcode(postcode) - - if epc_df.empty: - failures.append( - { - **row.to_dict(), - "found_uprn": None, - "best_match_uprn": None, - "best_match_address": None, - "best_match_lexiscore": None, - "status": "no_epc_results", - } - ) - continue - - scored_df = get_uprn_candidates( - epc_df, - user_address=input_address, - ) - - best_row = scored_df.iloc[0] - - best_match_uprn = str(best_row["uprn"]) - best_match_address = best_row["address"] - best_match_lexiscore = round(float(best_row["lexiscore"]), 4) - - found_uprn = get_uprn(input_address, postcode) - - except Exception as e: - failures.append( - { - **row.to_dict(), - "found_uprn": None, - "best_match_uprn": None, - "best_match_address": None, - "best_match_lexiscore": None, - "status": "exception", - "error": str(e), - } - ) - continue - - found_uprn_norm = None if not found_uprn else str(found_uprn) - - if found_uprn_norm != expected_uprn: - failures.append( - { - **row.to_dict(), - "found_uprn": found_uprn_norm, - "best_match_uprn": best_match_uprn, - "best_match_address": best_match_address, - "best_match_lexiscore": best_match_lexiscore, - "status": ("no_match" if found_uprn_norm is None else "mismatch"), - } - ) - - failures_df = pd.DataFrame(failures) - - print("===================================") - print(f"Total rows : {len(df)}") - print(f"Failures : {len(failures_df)}") - print("===================================") - - failures_df.to_excel( - "hackney_uprn_failures.xlsx", - index=False, - ) - - def handler(event, context): - print("hello world") + print("=== Address2UPRN Lambda Handler ===") + print(f"Function: {context.function_name}") + print(f"Request ID: {context.aws_request_id}") + print(f"Event: {json.dumps(event, indent=2, default=str)}") + print(f"Context: {context}") + print("===================================") return {"statusCode": 200, "body": "hello world"} diff --git a/backend/postcode_splitter/main.py b/backend/postcode_splitter/main.py index 0f21a67f..d515a21f 100644 --- a/backend/postcode_splitter/main.py +++ b/backend/postcode_splitter/main.py @@ -3,16 +3,13 @@ import sys import json import pandas as pd import requests +import boto3 from uuid import UUID from urllib.parse import unquote from utils.s3 import read_csv_from_s3 as read_csv_from_s3_dict from utils.logger import setup_logger from tqdm import tqdm from backend.app.db.functions.tasks.Tasks import SubTaskInterface -from backend.address2UPRN.main import ( - resolve_uprns_for_postcode_group, - get_epc_data_with_postcode, -) logger = setup_logger() @@ -65,17 +62,39 @@ def parse_s3_uri(s3_uri: str) -> tuple[str, str]: raise ValueError(f"Could not parse S3 URI") from e -def sanitise_postcode(postcode: str) -> str | None: +def send_to_address2uprn_queue(task_id: str, rows: list) -> str: """ - Normalise postcode for grouping. + Send a postcode group to the address2UPRN SQS queue. - - Uppercase - - Remove all whitespace + Args: + task_id: The parent task ID + rows: List of row dictionaries for this postcode group + + Returns: + Message ID from SQS """ - if pd.isna(postcode): - return None + sqs_client = boto3.client("sqs") + queue_url = os.getenv("ADDRESS2UPRN_QUEUE_URL") - return postcode.upper().replace(" ", "") + if not queue_url: + raise ValueError("ADDRESS2UPRN_QUEUE_URL environment variable not set") + + message_body = { + "task_id": task_id, + "rows": rows, + } + + response = sqs_client.send_message( + QueueUrl=queue_url, + MessageBody=json.dumps(message_body), + ) + + logger.info( + f"Sent message to address2UPRN queue. " + f"Task: {task_id}, MessageId: {response['MessageId']}" + ) + + return response["MessageId"] def handler(event, context, local=False): @@ -142,50 +161,121 @@ def handler(event, context, local=False): csv_data = read_csv_from_s3_dict(bucket, key) df = pd.DataFrame(csv_data) + # just do 5 well we are testing, sqs connection + df = df.head(5) logger.info(f"CSV loaded: {len(df)} rows, {len(df.columns)} columns") # Sanitise postcodes - df["postcode_clean"] = df["Postcode"].apply(sanitise_postcode) + df["postcode_clean"] = df["postcode"].str.upper().str.replace(" ", "") - # Group by sanitised postcode (excluding null values) - grouped_data = [] - for postcode, group_df in df.dropna(subset=["postcode_clean"]).groupby( - "postcode_clean" - ): - group_info = { - "postcode": postcode, - "row_count": len(group_df), - "rows": group_df.to_dict(orient="records"), - } - grouped_data.append(group_info) - logger.info(f"Postcode: {postcode}, Rows: {len(group_df)}") + clean_df = df.dropna(subset=["postcode_clean"]) - logger.info(f"Total postcodes: {len(grouped_data)}") + postcode_to_addresses = { + postcode: group.to_dict(orient="records") + for postcode, group in clean_df.groupby("postcode_clean", sort=False) + } - results.append( - { - "message": "Postcode splitter processing completed", - "task_id": str(task_id), - "s3_uri": s3_uri, - "subtask_id": str(subtask_id), - "total_rows": len(df), - "total_postcodes": len(grouped_data), - "grouped_data": grouped_data, - } - ) + logger.info(f"Total postcodes: {len(postcode_to_addresses)}") - # Mark subtask as complete after successful processing - subtask_interface.update_subtask_status( - subtask_id, - "complete", - outputs={ - "status": "processing_complete", - "s3_uri": s3_uri, - "rows_processed": len(df), - "total_postcodes": len(grouped_data), - }, - ) - logger.info(f"Subtask {subtask_id} marked as complete") + # Batch rows in groups of 500 + batch_rows = [] + batch_size = 500 + + for postcode, rows in postcode_to_addresses.items(): + # If postcode itself is larger than batch_size, send it individually + if len(rows) > batch_size: + # First, send the current batch if it has data + if batch_rows: + try: + send_to_address2uprn_queue( + task_id=str(task_id), + rows=batch_rows, + ) + logger.info( + f"Sent batch of {len(batch_rows)} rows to address2UPRN queue" + ) + batch_rows = [] + except Exception as e: + logger.error( + f"Failed to send batch to address2UPRN queue: {e}", + exc_info=True, + ) + errors.append( + { + "error": "Failed to send to address2UPRN queue", + "details": str(e), + } + ) + + # Send the large postcode on its own + try: + send_to_address2uprn_queue( + task_id=str(task_id), + rows=rows, + ) + logger.info( + f"Sent large postcode {postcode} ({len(rows)} rows) to address2UPRN queue" + ) + except Exception as e: + logger.error( + f"Failed to send large postcode to address2UPRN queue: {e}", + exc_info=True, + ) + errors.append( + { + "error": "Failed to send to address2UPRN queue", + "details": str(e), + } + ) + continue + + # If adding this postcode's rows would exceed batch_size, send current batch + if batch_rows and len(batch_rows) + len(rows) > batch_size: + try: + send_to_address2uprn_queue( + task_id=str(task_id), + rows=batch_rows, + ) + logger.info( + f"Sent batch of {len(batch_rows)} rows to address2UPRN queue" + ) + batch_rows = [] + except Exception as e: + logger.error( + f"Failed to send batch to address2UPRN queue: {e}", + exc_info=True, + ) + errors.append( + { + "error": "Failed to send to address2UPRN queue", + "details": str(e), + } + ) + + # Add current postcode's rows to batch + batch_rows.extend(rows) + + # Send remaining batch + if batch_rows: + try: + send_to_address2uprn_queue( + task_id=str(task_id), + rows=batch_rows, + ) + logger.info( + f"Sent final batch of {len(batch_rows)} rows to address2UPRN queue" + ) + except Exception as e: + logger.error( + f"Failed to send final batch to address2UPRN queue: {e}", + exc_info=True, + ) + errors.append( + { + "error": "Failed to send to address2UPRN queue", + "details": str(e), + } + ) except json.JSONDecodeError as e: logger.error(f"Invalid JSON in request body: {e}") diff --git a/infrastructure/terraform/lambda/postcodeSplitter/main.tf b/infrastructure/terraform/lambda/postcodeSplitter/main.tf index 2e2e91da..69b80011 100644 --- a/infrastructure/terraform/lambda/postcodeSplitter/main.tf +++ b/infrastructure/terraform/lambda/postcodeSplitter/main.tf @@ -15,6 +15,16 @@ locals { db_credentials = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string) } +# Reference the existing address2UPRN Lambda outputs from shared state +data "terraform_remote_state" "address2uprn" { + backend = "s3" + config = { + bucket = "assessment-model-terraform-state" + key = "env:/${var.stage}/terraform.tfstate" + region = "eu-west-2" + } +} + module "lambda" { source = "../modules/lambda_with_sqs" @@ -44,6 +54,7 @@ module "lambda" { EPC_AUTH_TOKEN = "test" ENGINE_SQS_URL = "test" ENERGY_ASSESSMENTS_BUCKET = "test" + ADDRESS2UPRN_QUEUE_URL = data.terraform_remote_state.address2uprn.outputs.address2uprn_queue_url }, ) } @@ -52,4 +63,26 @@ module "lambda" { resource "aws_iam_role_policy_attachment" "postcode_splitter_s3_read" { role = module.lambda.role_name policy_arn = data.terraform_remote_state.shared.outputs.postcode_splitter_s3_read_arn +} + +# Create SQS send policy for address2UPRN queue +module "postcode_splitter_sqs_policy" { + source = "../../modules/general_iam_policy" + + policy_name = "postcode-splitter-sqs-send-${var.stage}" + policy_description = "Allow postcode-splitter Lambda to send messages to address2UPRN queue" + + actions = [ + "sqs:SendMessage" + ] + + resources = [ + data.terraform_remote_state.address2uprn.outputs.address2uprn_queue_arn + ] +} + +# Attach SQS policy to the Lambda execution role +resource "aws_iam_role_policy_attachment" "postcode_splitter_sqs_send" { + role = module.lambda.role_name + policy_arn = module.postcode_splitter_sqs_policy.policy_arn } \ No newline at end of file From 203843c387adafbba7eb3e1f47627343e296958d Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Wed, 11 Feb 2026 13:16:11 +0000 Subject: [PATCH 075/340] added new files --- .../terraform/lambda/address2UPRN/outputs.tf | 14 ++++++++ .../modules/general_iam_policy/main.tf | 21 ++++++++++++ .../modules/general_iam_policy/outputs.tf | 9 ++++++ .../modules/general_iam_policy/variables.tf | 32 +++++++++++++++++++ 4 files changed, 76 insertions(+) create mode 100644 infrastructure/terraform/lambda/address2UPRN/outputs.tf create mode 100644 infrastructure/terraform/modules/general_iam_policy/main.tf create mode 100644 infrastructure/terraform/modules/general_iam_policy/outputs.tf create mode 100644 infrastructure/terraform/modules/general_iam_policy/variables.tf diff --git a/infrastructure/terraform/lambda/address2UPRN/outputs.tf b/infrastructure/terraform/lambda/address2UPRN/outputs.tf new file mode 100644 index 00000000..e4645a0a --- /dev/null +++ b/infrastructure/terraform/lambda/address2UPRN/outputs.tf @@ -0,0 +1,14 @@ +output "address2uprn_queue_url" { + value = module.address2uprn.queue_url + description = "URL of the address2UPRN SQS queue" +} + +output "address2uprn_queue_arn" { + value = module.address2uprn.queue_arn + description = "ARN of the address2UPRN SQS queue" +} + +output "address2uprn_lambda_arn" { + value = module.address2uprn.lambda_arn + description = "ARN of the address2UPRN Lambda function" +} diff --git a/infrastructure/terraform/modules/general_iam_policy/main.tf b/infrastructure/terraform/modules/general_iam_policy/main.tf new file mode 100644 index 00000000..f7ffe4a1 --- /dev/null +++ b/infrastructure/terraform/modules/general_iam_policy/main.tf @@ -0,0 +1,21 @@ +# IAM Policy with dynamic actions and resources +resource "aws_iam_policy" "policy" { + name = var.policy_name + description = var.policy_description + + policy = jsonencode({ + Version = "2012-10-17" + Statement = [ + merge( + { + Effect = "Allow" + Action = var.actions + Resource = var.resources + }, + var.conditions != null ? { Condition = var.conditions } : {} + ) + ] + }) + + tags = var.tags +} diff --git a/infrastructure/terraform/modules/general_iam_policy/outputs.tf b/infrastructure/terraform/modules/general_iam_policy/outputs.tf new file mode 100644 index 00000000..cfceab05 --- /dev/null +++ b/infrastructure/terraform/modules/general_iam_policy/outputs.tf @@ -0,0 +1,9 @@ +output "policy_arn" { + value = aws_iam_policy.policy.arn + description = "ARN of the created IAM policy" +} + +output "policy_name" { + value = aws_iam_policy.policy.name + description = "Name of the created IAM policy" +} diff --git a/infrastructure/terraform/modules/general_iam_policy/variables.tf b/infrastructure/terraform/modules/general_iam_policy/variables.tf new file mode 100644 index 00000000..0d824eb5 --- /dev/null +++ b/infrastructure/terraform/modules/general_iam_policy/variables.tf @@ -0,0 +1,32 @@ +variable "policy_name" { + description = "Name of the IAM policy" + type = string +} + +variable "policy_description" { + description = "Description of the IAM policy" + type = string + default = "" +} + +variable "actions" { + description = "List of IAM actions allowed by this policy" + type = list(string) +} + +variable "resources" { + description = "List of AWS resources this policy applies to" + type = list(string) +} + +variable "conditions" { + description = "Optional IAM policy conditions" + type = any + default = null +} + +variable "tags" { + description = "Tags to apply to the policy" + type = map(string) + default = {} +} From b2f67bfa785efe8af887930168f41533ed751cd5 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Wed, 11 Feb 2026 13:25:41 +0000 Subject: [PATCH 076/340] address2 uprn --- infrastructure/terraform/lambda/postcodeSplitter/main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/infrastructure/terraform/lambda/postcodeSplitter/main.tf b/infrastructure/terraform/lambda/postcodeSplitter/main.tf index 69b80011..0350a139 100644 --- a/infrastructure/terraform/lambda/postcodeSplitter/main.tf +++ b/infrastructure/terraform/lambda/postcodeSplitter/main.tf @@ -19,7 +19,7 @@ locals { data "terraform_remote_state" "address2uprn" { backend = "s3" config = { - bucket = "assessment-model-terraform-state" + bucket = "address2uprn-terraform-state" key = "env:/${var.stage}/terraform.tfstate" region = "eu-west-2" } From ef0b0d6142c2833565bf797f70a0467e8ad0cebf Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Wed, 11 Feb 2026 13:31:47 +0000 Subject: [PATCH 077/340] add json --- backend/address2UPRN/main.py | 1 + infrastructure/terraform/lambda/postcodeSplitter/main.tf | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/backend/address2UPRN/main.py b/backend/address2UPRN/main.py index 33c37760..30066bcb 100644 --- a/backend/address2UPRN/main.py +++ b/backend/address2UPRN/main.py @@ -7,6 +7,7 @@ from tqdm import tqdm from utils.logger import setup_logger import re from typing import Set +import json logger = setup_logger() diff --git a/infrastructure/terraform/lambda/postcodeSplitter/main.tf b/infrastructure/terraform/lambda/postcodeSplitter/main.tf index 0350a139..81120772 100644 --- a/infrastructure/terraform/lambda/postcodeSplitter/main.tf +++ b/infrastructure/terraform/lambda/postcodeSplitter/main.tf @@ -15,7 +15,7 @@ locals { db_credentials = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string) } -# Reference the existing address2UPRN Lambda outputs from shared state +# Reference the existing address2UPRN Lambda outputs from address2uprn state data "terraform_remote_state" "address2uprn" { backend = "s3" config = { From 5a0e0c0a698f858abdfcb39554370dabd2e35c25 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Wed, 11 Feb 2026 13:45:06 +0000 Subject: [PATCH 078/340] add more logic to batch and also missing libraries --- backend/address2UPRN/main.py | 1 + backend/postcode_splitter/main.py | 153 +++++++++++++++++++----------- 2 files changed, 96 insertions(+), 58 deletions(-) diff --git a/backend/address2UPRN/main.py b/backend/address2UPRN/main.py index 30066bcb..777dde0e 100644 --- a/backend/address2UPRN/main.py +++ b/backend/address2UPRN/main.py @@ -8,6 +8,7 @@ from utils.logger import setup_logger import re from typing import Set import json +import requests logger = setup_logger() diff --git a/backend/postcode_splitter/main.py b/backend/postcode_splitter/main.py index d515a21f..eb7cf044 100644 --- a/backend/postcode_splitter/main.py +++ b/backend/postcode_splitter/main.py @@ -177,23 +177,103 @@ def handler(event, context, local=False): logger.info(f"Total postcodes: {len(postcode_to_addresses)}") - # Batch rows in groups of 500 - batch_rows = [] + # Calculate total rows to send + total_rows = sum(len(rows) for rows in postcode_to_addresses.values()) + logger.info(f"Total rows to send: {total_rows}") + batch_size = 500 - for postcode, rows in postcode_to_addresses.items(): - # If postcode itself is larger than batch_size, send it individually - if len(rows) > batch_size: - # First, send the current batch if it has data - if batch_rows: + # If all rows fit in one batch, just send them all at once + if total_rows <= batch_size: + all_rows = [] + for postcode, rows in postcode_to_addresses.items(): + all_rows.extend(rows) + try: + send_to_address2uprn_queue( + task_id=str(task_id), + rows=all_rows, + ) + logger.info(f"Sent all {len(all_rows)} rows in single batch to address2UPRN queue") + except Exception as e: + logger.error( + f"Failed to send all rows to address2UPRN queue: {e}", + exc_info=True, + ) + errors.append( + { + "error": "Failed to send to address2UPRN queue", + "details": str(e), + } + ) + else: + # Multi-batch processing for large datasets + batch_rows = [] + total_sent = 0 + + for postcode, rows in postcode_to_addresses.items(): + logger.info(f"Processing postcode {postcode} with {len(rows)} rows") + # If postcode itself is larger than batch_size, send it individually + if len(rows) > batch_size: + # First, send the current batch if it has data + if batch_rows: + try: + send_to_address2uprn_queue( + task_id=str(task_id), + rows=batch_rows, + ) + logger.info( + f"Sent batch of {len(batch_rows)} rows to address2UPRN queue" + ) + batch_rows = [] + except Exception as e: + logger.error( + f"Failed to send batch to address2UPRN queue: {e}", + exc_info=True, + ) + errors.append( + { + "error": "Failed to send to address2UPRN queue", + "details": str(e), + } + ) + + # Send the large postcode on its own + try: + send_to_address2uprn_queue( + task_id=str(task_id), + rows=rows, + ) + logger.info( + f"Sent large postcode {postcode} ({len(rows)} rows) to address2UPRN queue" + ) + except Exception as e: + logger.error( + f"Failed to send large postcode to address2UPRN queue: {e}", + exc_info=True, + ) + errors.append( + { + "error": "Failed to send to address2UPRN queue", + "details": str(e), + } + ) + continue + + # If adding this postcode's rows would exceed batch_size, send current batch + current_batch_size = len(batch_rows) + len(rows) + if batch_rows and current_batch_size > batch_size: + logger.info( + f"Batch threshold reached: current {len(batch_rows)} + next postcode {len(rows)} = {current_batch_size} > {batch_size}" + ) try: send_to_address2uprn_queue( task_id=str(task_id), rows=batch_rows, ) logger.info( - f"Sent batch of {len(batch_rows)} rows to address2UPRN queue" + f"Sent batch of {len(batch_rows)} rows to address2UPRN queue (total sent: {total_sent})" ) + total_sent += len(batch_rows) batch_rows = [] except Exception as e: logger.error( @@ -207,42 +287,24 @@ def handler(event, context, local=False): } ) - # Send the large postcode on its own - try: - send_to_address2uprn_queue( - task_id=str(task_id), - rows=rows, - ) - logger.info( - f"Sent large postcode {postcode} ({len(rows)} rows) to address2UPRN queue" - ) - except Exception as e: - logger.error( - f"Failed to send large postcode to address2UPRN queue: {e}", - exc_info=True, - ) - errors.append( - { - "error": "Failed to send to address2UPRN queue", - "details": str(e), - } - ) - continue + # Add current postcode's rows to batch + batch_rows.extend(rows) - # If adding this postcode's rows would exceed batch_size, send current batch - if batch_rows and len(batch_rows) + len(rows) > batch_size: + # Send remaining batch + if batch_rows: try: send_to_address2uprn_queue( task_id=str(task_id), rows=batch_rows, ) + total_sent += len(batch_rows) logger.info( - f"Sent batch of {len(batch_rows)} rows to address2UPRN queue" + f"Sent final batch of {len(batch_rows)} rows to address2UPRN queue (total sent: {total_sent})" ) batch_rows = [] except Exception as e: logger.error( - f"Failed to send batch to address2UPRN queue: {e}", + f"Failed to send final batch to address2UPRN queue: {e}", exc_info=True, ) errors.append( @@ -252,31 +314,6 @@ def handler(event, context, local=False): } ) - # Add current postcode's rows to batch - batch_rows.extend(rows) - - # Send remaining batch - if batch_rows: - try: - send_to_address2uprn_queue( - task_id=str(task_id), - rows=batch_rows, - ) - logger.info( - f"Sent final batch of {len(batch_rows)} rows to address2UPRN queue" - ) - except Exception as e: - logger.error( - f"Failed to send final batch to address2UPRN queue: {e}", - exc_info=True, - ) - errors.append( - { - "error": "Failed to send to address2UPRN queue", - "details": str(e), - } - ) - except json.JSONDecodeError as e: logger.error(f"Invalid JSON in request body: {e}") errors.append({"error": "Invalid JSON in request body", "details": str(e)}) From 655d7dbd6ff432709e702a787a98dbd96c651d53 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Wed, 11 Feb 2026 13:52:39 +0000 Subject: [PATCH 079/340] add more logic to batch and also missing libraries --- .../terraform/lambda/postcodeSplitter/variables.tf | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/infrastructure/terraform/lambda/postcodeSplitter/variables.tf b/infrastructure/terraform/lambda/postcodeSplitter/variables.tf index 0c8ba5b2..7bd68543 100644 --- a/infrastructure/terraform/lambda/postcodeSplitter/variables.tf +++ b/infrastructure/terraform/lambda/postcodeSplitter/variables.tf @@ -27,3 +27,9 @@ output "resolved_image_uri" { + + + + + + From 9b414924d06876c24f7db2663556bd07325fd275 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Wed, 11 Feb 2026 16:37:55 +0000 Subject: [PATCH 080/340] run this end to end --- backend/address2UPRN/main.py | 301 +++++++++++++++++++++++++-- sfr/principal_pitch/2_export_data.py | 30 ++- 2 files changed, 309 insertions(+), 22 deletions(-) diff --git a/backend/address2UPRN/main.py b/backend/address2UPRN/main.py index 777dde0e..0f735f2a 100644 --- a/backend/address2UPRN/main.py +++ b/backend/address2UPRN/main.py @@ -9,6 +9,8 @@ import re from typing import Set import json import requests +from uuid import UUID +from backend.app.db.functions.tasks.Tasks import SubTaskInterface logger = setup_logger() @@ -323,32 +325,41 @@ def get_uprn_candidates( ) -def get_uprn( +def get_uprn_with_epc_df( user_inputed_address: str, - postcode: str, + epc_df: pd.DataFrame, return_address=False, return_EPC=False, return_score=True, ): """ - Return uprn (str) - Return False if failed to find a sensible matching epc - Return Nons when epc found but no UPRN - """ - df = get_epc_data_with_postcode(postcode=postcode) + Return uprn (str) using a pre-fetched EPC dataframe. + This avoids calling the API multiple times for the same postcode. - if df.empty: + Args: + user_inputed_address: The user's address string + epc_df: Pre-fetched EPC data for the postcode + return_address: Whether to return the matched address + return_EPC: Whether to return the EPC rating + return_score: Whether to return the lexiscore + + Returns: + uprn (str), or tuple if return_address/return_EPC/return_score are True + Returns None if no match found, lexiscore < 0.7, or UPRN is empty + """ + if epc_df.empty: return None scored_df = get_uprn_candidates( - df, + epc_df, user_address=user_inputed_address, ) # Best score best_score = scored_df.iloc[0]["lexiscore"] - if best_score <= 0: + # Return None if score is below threshold + if best_score < 0.7: return None # All rank-1 rows (possible draw) @@ -386,6 +397,32 @@ def get_uprn( return found_uprn +def get_uprn( + user_inputed_address: str, + postcode: str, + return_address=False, + return_EPC=False, + return_score=True, +): + """ + Return uprn (str) + Return False if failed to find a sensible matching epc + Return None when epc found but no UPRN + + This function fetches EPC data via API for a single postcode. + For processing multiple addresses in the same postcode, use get_uprn_with_epc_df instead. + """ + df = get_epc_data_with_postcode(postcode=postcode) + + return get_uprn_with_epc_df( + user_inputed_address=user_inputed_address, + epc_df=df, + return_address=return_address, + return_EPC=return_EPC, + return_score=return_score, + ) + + def resolve_uprns_for_postcode_group( group_df: pd.DataFrame, epc_df: pd.DataFrame, @@ -508,20 +545,246 @@ def run_all_test(): ) -def handler(event, context): +def handler(event, context, local=False): print("=== Address2UPRN Lambda Handler ===") print(f"Function: {context.function_name}") print(f"Request ID: {context.aws_request_id}") + + # Handle local testing + if local is True: + event = { + "Records": [ + { + "body": json.dumps({ + "task_id": "e31f2f21-175b-4a91-a3ec-a6baa325e917", + "rows": [ + { + "landlord_property_id": "00000002POR", + "UPRN": "766019911", + "Address 1": "9 Redland Way", + "Address 2": "Aylesbury Vale", + "postcode": "HP21 9RJ", + "landlord_property_type": "House", + "postcode_clean": "HP219RJ" + }, + { + "landlord_property_id": "00000003MTR", + "UPRN": "100120781544", + "Address 1": "16 Lime Crescent", + "Address 2": "BICESTER", + "postcode": "OX26 3XJ", + "landlord_property_type": "House", + "postcode_clean": "OX263XJ" + }, + { + "landlord_property_id": "00000004HBY", + "UPRN": "14033542", + "Address 1": "14 Dunbar Drive", + "Address 2": "Woodley", + "postcode": "RG5 4HA", + "landlord_property_type": "House", + "postcode_clean": "RG54HA" + } + ] + }) + } + ] + } + print(f"Event: {json.dumps(event, indent=2, default=str)}") - print(f"Context: {context}") print("===================================") - return {"statusCode": 200, "body": "hello world"} + # Handle both single event and batch events (SQS, etc.) + records = event.get("Records", [event]) + results = [] + errors = [] + subtask_interface = SubTaskInterface() -# TO do function dispatcher, + for record in records: + task_id = None + subtask_id = None + try: + # Parse body (inputs) + if isinstance(record.get("body"), str): + body = json.loads(record["body"]) + else: + body = record.get("body", {}) -# get_uprn_candidates(get_epc_data_with_postcode("E9 5NH"),"Flat 1, 5 Semley Gate" and Flat 5, 1 Semley Gate) -# fix that -# Look again at flat 1 -# pandas reader the seperate postcode_splitter -# dump into s3 + # Validate required fields + task_id = body.get("task_id") + rows = body.get("rows", []) + + if not task_id: + errors.append({"error": "Missing required field: task_id"}) + continue + + if not rows: + errors.append({"error": "Missing or empty rows data"}) + continue + + # Convert task_id to UUID + try: + task_id = UUID(task_id) if isinstance(task_id, str) else task_id + except ValueError as e: + errors.append({"error": f"Invalid UUID format for task_id: {str(e)}"}) + continue + + # Create a subtask for this batch + subtask_id = subtask_interface.create_subtask( + task_id=task_id, inputs={"row_count": len(rows)} + ) + logger.info(f"Created subtask {subtask_id} for task {task_id} with {len(rows)} rows") + + # Process the rows + logger.info(f"Processing {len(rows)} rows for task {task_id}") + + # Convert rows to DataFrame + df = pd.DataFrame(rows) + + # Create user_input column by concatenating Address 1 and Address 2 + df["user_input"] = (df["Address 1"].fillna("") + " " + df["Address 2"].fillna("")).str.strip() + logger.info(f"Created user_input column from Address 1 and Address 2") + + clean_df = df.dropna(subset=["postcode_clean"]) + + postcode_to_addresses = { + postcode: group.to_dict(orient="records") + for postcode, group in clean_df.groupby("postcode_clean", sort=False) + } + + logger.info(f"Total postcodes: {len(postcode_to_addresses)}") + + # Process each postcode group + postcodes_processed = 0 + addresses_processed = 0 + uprns_found = 0 + results_data = [] + + for postcode, postcode_rows in postcode_to_addresses.items(): + logger.info(f"Processing postcode: {postcode} with {len(postcode_rows)} rows") + + # Validate postcode before processing + if not is_valid_postcode(postcode): + logger.warning(f"Postcode {postcode} is invalid, skipping") + continue + + # Fetch EPC data once per postcode + try: + epc_df = get_epc_data_with_postcode(postcode=postcode) + logger.info(f"Fetched {len(epc_df)} EPC records for postcode {postcode}") + except Exception as e: + logger.error(f"Failed to fetch EPC data for postcode {postcode}: {e}") + continue + + # Process each address in this postcode with the same EPC data + for row in postcode_rows: + try: + user_input = row.get("user_input", "") + if not user_input: + logger.warning(f"Skipping row with missing user_input for postcode {postcode}") + continue + + # Get UPRN using the pre-fetched EPC data with all return options + result = get_uprn_with_epc_df( + user_inputed_address=user_input, + epc_df=epc_df, + return_address=True, + return_EPC=True, + return_score=True + ) + + # Parse result tuple if successful + if result: + uprn, found_address, epc, score = result + uprns_found += 1 + logger.info(f"Found UPRN for {user_input} in {postcode}: {uprn} (score: {score})") + + results_data.append({ + **row, # Include all original data + "found_uprn": uprn, + "found_address": found_address, + "epc_rating": epc, + "lexiscore": score + }) + else: + logger.warning(f"No UPRN found for {user_input} in {postcode}") + results_data.append({ + **row, # Include all original data + "found_uprn": None, + "found_address": None, + "epc_rating": None, + "lexiscore": None + }) + + addresses_processed += 1 + + except Exception as e: + logger.error(f"Error processing address {row.get('user_input', 'unknown')}: {e}") + # Still add the row with error markers + results_data.append({ + **row, + "found_uprn": None, + "found_address": None, + "epc_rating": None, + "score": None, + "error": str(e) + }) + continue + + postcodes_processed += 1 + + # Create results DataFrame + result_df = pd.DataFrame(results_data) + logger.info(f"Created results DataFrame with {len(result_df)} rows") + + results.append({ + "subtask_id": str(subtask_id), + "rows_processed": len(rows), + "postcodes_processed": postcodes_processed, + "addresses_processed": addresses_processed, + "uprns_found": uprns_found, + "status": "processed" + }) + + # Mark subtask as completed + try: + subtask_interface.update_subtask_status( + subtask_id, "completed", outputs={"rows_processed": len(rows)} + ) + logger.info(f"Marked subtask {subtask_id} as completed") + except Exception as db_error: + logger.error(f"Failed to mark subtask as completed: {db_error}") + + except json.JSONDecodeError as e: + logger.error(f"Invalid JSON in request body: {e}") + errors.append({"error": "Invalid JSON in request body", "details": str(e)}) + # Mark subtask as failed if we have one + if subtask_id: + try: + subtask_interface.update_subtask_status( + subtask_id, "failed", outputs={"error": str(e)} + ) + except Exception as db_error: + logger.error(f"Failed to update subtask status: {db_error}") + except Exception as e: + logger.error(f"Unexpected error processing record: {e}", exc_info=True) + errors.append({"error": "Unexpected error", "details": str(e)}) + # Mark subtask as failed if we have one + if subtask_id: + try: + subtask_interface.update_subtask_status( + subtask_id, "failed", outputs={"error": str(e)} + ) + except Exception as db_error: + logger.error(f"Failed to update subtask status: {db_error}") + + # Return error if all records failed + if errors and not results: + return {"statusCode": 500, "body": json.dumps({"errors": errors})} + + return { + "statusCode": 200, + "body": json.dumps( + {"processed": results, "errors": errors if errors else None} + ), + } diff --git a/sfr/principal_pitch/2_export_data.py b/sfr/principal_pitch/2_export_data.py index 1841cf3f..9470710d 100644 --- a/sfr/principal_pitch/2_export_data.py +++ b/sfr/principal_pitch/2_export_data.py @@ -28,12 +28,12 @@ from sqlalchemy import func # PORTFOLIO_ID = 206 # SCENARIOS = [389] -PORTFOLIO_ID = 544 +PORTFOLIO_ID = 476 SCENARIOS = [ - 1027, + 953, ] scenario_names = { - 1027: "EPC C", + 953: "All Properties, Most Economic", } project_name = "manchester" @@ -330,6 +330,30 @@ for scenario_id in SCENARIOS: df[df["predicted_post_works_sap"] == ""] + # Expected columns list + expected_columns = [ + "suspended_floor_insulation", + "solid_floor_insulation", + "external_wall_insulation", + "internal_wall_insulation", + "cavity_wall_insulation", + "loft_insulation", + "flat_roof_insulation", + "room_roof_insulation", + "secondary_glazing", + "double_glazing", + "solar_pv", + "high_heat_retention_storage_heaters", + "air_source_heat_pump", + "boiler_upgrade", + "roomstat_programmer_trvs", + "time_temperature_zone_control", + ] + # Add missing columns with default values + for col in expected_columns: + if col not in df.columns: + df[col] = "" + # Create excel to store to filename = f"{scenario_names[scenario_id]} - {project_name}.xlsx" with pd.ExcelWriter(filename) as writer: From 762dccde01761b6c026dc83820a65e2279ac4d1b Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Wed, 11 Feb 2026 16:44:08 +0000 Subject: [PATCH 081/340] run this end to end --- backend/address2UPRN/main.py | 179 +++++++++++------- .../modules/s3_iam_policy/variables.tf | 3 + 2 files changed, 109 insertions(+), 73 deletions(-) diff --git a/backend/address2UPRN/main.py b/backend/address2UPRN/main.py index 0f735f2a..6841d6a6 100644 --- a/backend/address2UPRN/main.py +++ b/backend/address2UPRN/main.py @@ -555,38 +555,40 @@ def handler(event, context, local=False): event = { "Records": [ { - "body": json.dumps({ - "task_id": "e31f2f21-175b-4a91-a3ec-a6baa325e917", - "rows": [ - { - "landlord_property_id": "00000002POR", - "UPRN": "766019911", - "Address 1": "9 Redland Way", - "Address 2": "Aylesbury Vale", - "postcode": "HP21 9RJ", - "landlord_property_type": "House", - "postcode_clean": "HP219RJ" - }, - { - "landlord_property_id": "00000003MTR", - "UPRN": "100120781544", - "Address 1": "16 Lime Crescent", - "Address 2": "BICESTER", - "postcode": "OX26 3XJ", - "landlord_property_type": "House", - "postcode_clean": "OX263XJ" - }, - { - "landlord_property_id": "00000004HBY", - "UPRN": "14033542", - "Address 1": "14 Dunbar Drive", - "Address 2": "Woodley", - "postcode": "RG5 4HA", - "landlord_property_type": "House", - "postcode_clean": "RG54HA" - } - ] - }) + "body": json.dumps( + { + "task_id": "e31f2f21-175b-4a91-a3ec-a6baa325e917", + "rows": [ + { + "landlord_property_id": "00000002POR", + "UPRN": "766019911", + "Address 1": "9 Redland Way", + "Address 2": "Aylesbury Vale", + "postcode": "HP21 9RJ", + "landlord_property_type": "House", + "postcode_clean": "HP219RJ", + }, + { + "landlord_property_id": "00000003MTR", + "UPRN": "100120781544", + "Address 1": "16 Lime Crescent", + "Address 2": "BICESTER", + "postcode": "OX26 3XJ", + "landlord_property_type": "House", + "postcode_clean": "OX263XJ", + }, + { + "landlord_property_id": "00000004HBY", + "UPRN": "14033542", + "Address 1": "14 Dunbar Drive", + "Address 2": "Woodley", + "postcode": "RG5 4HA", + "landlord_property_type": "House", + "postcode_clean": "RG54HA", + }, + ], + } + ) } ] } @@ -633,7 +635,9 @@ def handler(event, context, local=False): subtask_id = subtask_interface.create_subtask( task_id=task_id, inputs={"row_count": len(rows)} ) - logger.info(f"Created subtask {subtask_id} for task {task_id} with {len(rows)} rows") + logger.info( + f"Created subtask {subtask_id} for task {task_id} with {len(rows)} rows" + ) # Process the rows logger.info(f"Processing {len(rows)} rows for task {task_id}") @@ -642,11 +646,13 @@ def handler(event, context, local=False): df = pd.DataFrame(rows) # Create user_input column by concatenating Address 1 and Address 2 - df["user_input"] = (df["Address 1"].fillna("") + " " + df["Address 2"].fillna("")).str.strip() + df["user_input"] = ( + df["Address 1"].fillna("") + " " + df["Address 2"].fillna("") + ).str.strip() logger.info(f"Created user_input column from Address 1 and Address 2") clean_df = df.dropna(subset=["postcode_clean"]) - + postcode_to_addresses = { postcode: group.to_dict(orient="records") for postcode, group in clean_df.groupby("postcode_clean", sort=False) @@ -661,7 +667,9 @@ def handler(event, context, local=False): results_data = [] for postcode, postcode_rows in postcode_to_addresses.items(): - logger.info(f"Processing postcode: {postcode} with {len(postcode_rows)} rows") + logger.info( + f"Processing postcode: {postcode} with {len(postcode_rows)} rows" + ) # Validate postcode before processing if not is_valid_postcode(postcode): @@ -671,9 +679,13 @@ def handler(event, context, local=False): # Fetch EPC data once per postcode try: epc_df = get_epc_data_with_postcode(postcode=postcode) - logger.info(f"Fetched {len(epc_df)} EPC records for postcode {postcode}") + logger.info( + f"Fetched {len(epc_df)} EPC records for postcode {postcode}" + ) except Exception as e: - logger.error(f"Failed to fetch EPC data for postcode {postcode}: {e}") + logger.error( + f"Failed to fetch EPC data for postcode {postcode}: {e}" + ) continue # Process each address in this postcode with the same EPC data @@ -681,7 +693,9 @@ def handler(event, context, local=False): try: user_input = row.get("user_input", "") if not user_input: - logger.warning(f"Skipping row with missing user_input for postcode {postcode}") + logger.warning( + f"Skipping row with missing user_input for postcode {postcode}" + ) continue # Get UPRN using the pre-fetched EPC data with all return options @@ -690,45 +704,57 @@ def handler(event, context, local=False): epc_df=epc_df, return_address=True, return_EPC=True, - return_score=True + return_score=True, ) # Parse result tuple if successful if result: uprn, found_address, epc, score = result uprns_found += 1 - logger.info(f"Found UPRN for {user_input} in {postcode}: {uprn} (score: {score})") + logger.info( + f"Found UPRN for {user_input} in {postcode}: {uprn} (score: {score})" + ) - results_data.append({ - **row, # Include all original data - "found_uprn": uprn, - "found_address": found_address, - "epc_rating": epc, - "lexiscore": score - }) + results_data.append( + { + **row, # Include all original data + "found_uprn": uprn, + "found_address": found_address, + "epc_rating": epc, + "lexiscore": score, + } + ) else: - logger.warning(f"No UPRN found for {user_input} in {postcode}") - results_data.append({ - **row, # Include all original data - "found_uprn": None, - "found_address": None, - "epc_rating": None, - "lexiscore": None - }) + logger.warning( + f"No UPRN found for {user_input} in {postcode}" + ) + results_data.append( + { + **row, # Include all original data + "found_uprn": None, + "found_address": None, + "epc_rating": None, + "lexiscore": None, + } + ) addresses_processed += 1 except Exception as e: - logger.error(f"Error processing address {row.get('user_input', 'unknown')}: {e}") + logger.error( + f"Error processing address {row.get('user_input', 'unknown')}: {e}" + ) # Still add the row with error markers - results_data.append({ - **row, - "found_uprn": None, - "found_address": None, - "epc_rating": None, - "score": None, - "error": str(e) - }) + results_data.append( + { + **row, + "found_uprn": None, + "found_address": None, + "epc_rating": None, + "score": None, + "error": str(e), + } + ) continue postcodes_processed += 1 @@ -737,14 +763,16 @@ def handler(event, context, local=False): result_df = pd.DataFrame(results_data) logger.info(f"Created results DataFrame with {len(result_df)} rows") - results.append({ - "subtask_id": str(subtask_id), - "rows_processed": len(rows), - "postcodes_processed": postcodes_processed, - "addresses_processed": addresses_processed, - "uprns_found": uprns_found, - "status": "processed" - }) + results.append( + { + "subtask_id": str(subtask_id), + "rows_processed": len(rows), + "postcodes_processed": postcodes_processed, + "addresses_processed": addresses_processed, + "uprns_found": uprns_found, + "status": "processed", + } + ) # Mark subtask as completed try: @@ -788,3 +816,8 @@ def handler(event, context, local=False): {"processed": results, "errors": errors if errors else None} ), } + + +# TODO: +# Don't add results to return messages as its too verbose +# capture the exepection as e, into s3, to find the logs go to s3 diff --git a/infrastructure/terraform/modules/s3_iam_policy/variables.tf b/infrastructure/terraform/modules/s3_iam_policy/variables.tf index ed53ea1f..e2b3d7a8 100644 --- a/infrastructure/terraform/modules/s3_iam_policy/variables.tf +++ b/infrastructure/terraform/modules/s3_iam_policy/variables.tf @@ -37,3 +37,6 @@ variable "tags" { type = map(string) default = {} } + + + From 538f207d2f4d5950d9a14b53bb0f28a27211ff13 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Wed, 11 Feb 2026 16:57:27 +0000 Subject: [PATCH 082/340] env variables added --- .github/workflows/deploy_terraform.yml | 7 +++ backend/address2UPRN/handler/Dockerfile | 19 ++++++-- backend/address2UPRN/main.py | 1 + .../terraform/lambda/address2UPRN/main.tf | 43 ++++++++++++++++--- 4 files changed, 62 insertions(+), 8 deletions(-) diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index 514fc7af..20242ec8 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -90,10 +90,17 @@ jobs: ecr_repo: address2uprn-${{ needs.determine_stage.outputs.stage }} dockerfile_path: backend/address2UPRN/handler/Dockerfile build_context: . + build_args: | + DEV_DB_HOST=$DEV_DB_HOST + DEV_DB_PORT=$DEV_DB_PORT + DEV_DB_NAME=$DEV_DB_NAME secrets: AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} AWS_REGION: ${{ secrets.DEV_AWS_REGION }} + DEV_DB_HOST: ${{ secrets.DEV_DB_HOST }} + DEV_DB_PORT: ${{ secrets.DEV_DB_PORT }} + DEV_DB_NAME: ${{ secrets.DEV_DB_NAME }} # ============================================================ # 3️⃣ Deploy Address 2 UPRN Lambda diff --git a/backend/address2UPRN/handler/Dockerfile b/backend/address2UPRN/handler/Dockerfile index d01550a2..419b4d66 100644 --- a/backend/address2UPRN/handler/Dockerfile +++ b/backend/address2UPRN/handler/Dockerfile @@ -1,6 +1,16 @@ FROM public.ecr.aws/lambda/python:3.10 # FROM python:3.11.10-bullseye + +ARG DEV_DB_HOST +ARG DEV_DB_PORT +ARG DEV_DB_NAME + +ENV DB_HOST=${DEV_DB_HOST} +ENV DB_PORT=${DEV_DB_PORT} +ENV DB_NAME=${DEV_DB_NAME} + + # Set working directory (Lambda task root) WORKDIR /var/task @@ -13,10 +23,13 @@ COPY backend/address2UPRN/handler/requirements.txt . # Install dependencies into Lambda runtime RUN pip install --no-cache-dir -r requirements.txt -# ----------------------------- -# Copy application code -# ----------------------------- + +# Copy necessary files for database and utility imports COPY utils/ utils/ +COPY backend/ backend/ +COPY datatypes/ datatypes/ + +# Copy the handler COPY backend/address2UPRN/main.py . # ----------------------------- diff --git a/backend/address2UPRN/main.py b/backend/address2UPRN/main.py index 6841d6a6..d361db15 100644 --- a/backend/address2UPRN/main.py +++ b/backend/address2UPRN/main.py @@ -821,3 +821,4 @@ def handler(event, context, local=False): # TODO: # Don't add results to return messages as its too verbose # capture the exepection as e, into s3, to find the logs go to s3 +# Upload results to s3 as well as csv diff --git a/infrastructure/terraform/lambda/address2UPRN/main.tf b/infrastructure/terraform/lambda/address2UPRN/main.tf index 46b193f2..4a82d634 100644 --- a/infrastructure/terraform/lambda/address2UPRN/main.tf +++ b/infrastructure/terraform/lambda/address2UPRN/main.tf @@ -1,3 +1,19 @@ +data "terraform_remote_state" "shared" { + backend = "s3" + config = { + bucket = "assessment-model-terraform-state" + key = "env:/${var.stage}/terraform.tfstate" # TODO: dont hardcode this + region = "eu-west-2" + } +} +data "aws_secretsmanager_secret_version" "db_credentials" { + secret_id = "${var.stage}/assessment_model/db_credentials" +} + +locals { + db_credentials = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string) +} + module "address2uprn" { source = "../modules/lambda_with_sqs" @@ -6,9 +22,26 @@ module "address2uprn" { image_uri = local.image_uri - - environment = { - STAGE = var.stage - LOG_LEVEL = "info" - } + environment = merge( + { + STAGE = var.stage + LOG_LEVEL = "info" + DB_USERNAME = local.db_credentials.db_assessment_model_username + DB_PASSWORD = local.db_credentials.db_assessment_model_password + GOOGLE_SOLAR_API_KEY = "test" + SAP_PREDICTIONS_BUCKET = "test" + CARBON_PREDICTIONS_BUCKET = "test" + HEAT_PREDICTIONS_BUCKET = "test" + HEATING_KWH_PREDICTIONS_BUCKET = "test" + HOTWATER_KWH_PREDICTIONS_BUCKET = "test" + API_KEY = "test" + ENVIRONMENT = "test" + SECRET_KEY = "test" + PLAN_TRIGGER_BUCKET = "test" + DATA_BUCKET = "test" + EPC_AUTH_TOKEN = "test" + ENGINE_SQS_URL = "test" + ENERGY_ASSESSMENTS_BUCKET = "test" + }, + ) } From a7509aecdc827806d4ed092f4788912c45001eae Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Wed, 11 Feb 2026 16:59:57 +0000 Subject: [PATCH 083/340] added very serious logs --- backend/address2UPRN/main.py | 1 + 1 file changed, 1 insertion(+) diff --git a/backend/address2UPRN/main.py b/backend/address2UPRN/main.py index d361db15..2cec8a2e 100644 --- a/backend/address2UPRN/main.py +++ b/backend/address2UPRN/main.py @@ -807,6 +807,7 @@ def handler(event, context, local=False): logger.error(f"Failed to update subtask status: {db_error}") # Return error if all records failed + logger.fatal(results) if errors and not results: return {"statusCode": 500, "body": json.dumps({"errors": errors})} From 3ee12c5f0ede5b6a6b0af0fe6c825826b429b5ba Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Wed, 11 Feb 2026 17:00:09 +0000 Subject: [PATCH 084/340] redploy --- .github/workflows/deploy_terraform.yml | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index 20242ec8..ebdeb32d 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -201,4 +201,7 @@ jobs: secrets: AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} - AWS_REGION: ${{ secrets.DEV_AWS_REGION }} \ No newline at end of file + AWS_REGION: ${{ secrets.DEV_AWS_REGION }} + + + \ No newline at end of file From d4fcf0c6cd309b4674638128af4cf1744c2979b3 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Wed, 11 Feb 2026 17:06:41 +0000 Subject: [PATCH 085/340] add requirements --- .github/workflows/deploy_terraform.yml | 3 +++ backend/address2UPRN/handler/requirements.txt | 5 +++++ 2 files changed, 8 insertions(+) diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index ebdeb32d..8a889833 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -204,4 +204,7 @@ jobs: AWS_REGION: ${{ secrets.DEV_AWS_REGION }} + + + \ No newline at end of file diff --git a/backend/address2UPRN/handler/requirements.txt b/backend/address2UPRN/handler/requirements.txt index eba2c846..6ef41b2d 100644 --- a/backend/address2UPRN/handler/requirements.txt +++ b/backend/address2UPRN/handler/requirements.txt @@ -4,3 +4,8 @@ requests tqdm openpyxl epc-api-python==1.0.2 +boto3==1.35.44 +sqlmodel +sqlalchemy==2.0.36 +psycopg2-binary==2.9.10 +pydantic-settings==2.6.0 \ No newline at end of file From 47c14e798c10c67a3ecbc17e6526ff3c70f28778 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Wed, 11 Feb 2026 17:20:32 +0000 Subject: [PATCH 086/340] add epc auth token --- .github/workflows/_build_image.yml | 3 +++ .github/workflows/deploy_terraform.yml | 3 ++- infrastructure/terraform/lambda/address2UPRN/main.tf | 1 - 3 files changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/_build_image.yml b/.github/workflows/_build_image.yml index 641e31f9..a5e16a51 100644 --- a/.github/workflows/_build_image.yml +++ b/.github/workflows/_build_image.yml @@ -38,6 +38,8 @@ on: required: false DEV_DB_NAME: required: false + EPC_AUTH_TOKEN: + required: false jobs: build: @@ -47,6 +49,7 @@ jobs: DEV_DB_HOST: ${{ secrets.DEV_DB_HOST }} DEV_DB_PORT: ${{ secrets.DEV_DB_PORT }} DEV_DB_NAME: ${{ secrets.DEV_DB_NAME }} + EPC_AUTH_TOKEN: ${{ secrets.EPC_AUTH_TOKEN }} outputs: image_digest: ${{ steps.digest.outputs.image_digest }} diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index 8a889833..c089d0c5 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -94,6 +94,7 @@ jobs: DEV_DB_HOST=$DEV_DB_HOST DEV_DB_PORT=$DEV_DB_PORT DEV_DB_NAME=$DEV_DB_NAME + EPC_AUTH_TOKEN=$EPC_AUTH_TOKEN secrets: AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} @@ -101,6 +102,7 @@ jobs: DEV_DB_HOST: ${{ secrets.DEV_DB_HOST }} DEV_DB_PORT: ${{ secrets.DEV_DB_PORT }} DEV_DB_NAME: ${{ secrets.DEV_DB_NAME }} + EPC_AUTH_TOKEN: ${{ secrets.DEV_EPC_AUTH_TOKEN }} # ============================================================ # 3️⃣ Deploy Address 2 UPRN Lambda @@ -207,4 +209,3 @@ jobs: - \ No newline at end of file diff --git a/infrastructure/terraform/lambda/address2UPRN/main.tf b/infrastructure/terraform/lambda/address2UPRN/main.tf index 4a82d634..caf06785 100644 --- a/infrastructure/terraform/lambda/address2UPRN/main.tf +++ b/infrastructure/terraform/lambda/address2UPRN/main.tf @@ -39,7 +39,6 @@ module "address2uprn" { SECRET_KEY = "test" PLAN_TRIGGER_BUCKET = "test" DATA_BUCKET = "test" - EPC_AUTH_TOKEN = "test" ENGINE_SQS_URL = "test" ENERGY_ASSESSMENTS_BUCKET = "test" }, From c3ff4c9d6b5f14eec9a8adf904875e7e5f91b250 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Wed, 11 Feb 2026 17:21:12 +0000 Subject: [PATCH 087/340] add epc auth token --- backend/address2UPRN/handler/Dockerfile | 2 ++ 1 file changed, 2 insertions(+) diff --git a/backend/address2UPRN/handler/Dockerfile b/backend/address2UPRN/handler/Dockerfile index 419b4d66..155c37ad 100644 --- a/backend/address2UPRN/handler/Dockerfile +++ b/backend/address2UPRN/handler/Dockerfile @@ -5,10 +5,12 @@ FROM public.ecr.aws/lambda/python:3.10 ARG DEV_DB_HOST ARG DEV_DB_PORT ARG DEV_DB_NAME +ARG EPC_AUTH_TOKEN ENV DB_HOST=${DEV_DB_HOST} ENV DB_PORT=${DEV_DB_PORT} ENV DB_NAME=${DEV_DB_NAME} +ENV EPC_AUTH_TOKEN=${EPC_AUTH_TOKEN}} # Set working directory (Lambda task root) From 9faba4af42ededb73859452342451cf8d3ae27a0 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 11 Feb 2026 17:22:00 +0000 Subject: [PATCH 088/340] set up postgres class --- backend/categorisation/categorisation_postgres.py | 5 +++++ backend/categorisation/processor.py | 10 +++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) create mode 100644 backend/categorisation/categorisation_postgres.py diff --git a/backend/categorisation/categorisation_postgres.py b/backend/categorisation/categorisation_postgres.py new file mode 100644 index 00000000..f2a44e5b --- /dev/null +++ b/backend/categorisation/categorisation_postgres.py @@ -0,0 +1,5 @@ +from backend.app.db.connection import db_session + + +class CategorisationPostgres: + pass diff --git a/backend/categorisation/processor.py b/backend/categorisation/processor.py index aa519c6e..f6e4f7dc 100644 --- a/backend/categorisation/processor.py +++ b/backend/categorisation/processor.py @@ -1,2 +1,10 @@ -def process_portfolio() -> None: +def process_portfolio(portfolio_id: int) -> None: + # Get all plans (including scenarios) for all properties in the portfolio + + # For each property, get all compliant plans + + # For each property, find the cheapest compliant plan + + # For each property, set is_default for cheapest compliant plan + # If no compliant plans, set it to the cheapest plan pass From 6618eafa8ccf9098992c09950127e7d68be534bb Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Wed, 11 Feb 2026 17:22:24 +0000 Subject: [PATCH 089/340] additional bracket removed --- backend/address2UPRN/handler/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/address2UPRN/handler/Dockerfile b/backend/address2UPRN/handler/Dockerfile index 155c37ad..07159357 100644 --- a/backend/address2UPRN/handler/Dockerfile +++ b/backend/address2UPRN/handler/Dockerfile @@ -10,7 +10,7 @@ ARG EPC_AUTH_TOKEN ENV DB_HOST=${DEV_DB_HOST} ENV DB_PORT=${DEV_DB_PORT} ENV DB_NAME=${DEV_DB_NAME} -ENV EPC_AUTH_TOKEN=${EPC_AUTH_TOKEN}} +ENV EPC_AUTH_TOKEN=${EPC_AUTH_TOKEN} # Set working directory (Lambda task root) From d4cd63d749785b003bf9da2558aaa7cd1647a40e Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Wed, 11 Feb 2026 17:22:33 +0000 Subject: [PATCH 090/340] additional bracket removed --- .github/workflows/deploy_terraform.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index c089d0c5..c5ed7e93 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -209,3 +209,7 @@ jobs: + + + + From e7691570fdf5ae1cd5651001bc310e180473ecd3 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Wed, 11 Feb 2026 17:42:30 +0000 Subject: [PATCH 091/340] merge --- .github/workflows/deploy_terraform.yml | 3 +++ backend/address2UPRN/main.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index c5ed7e93..122fb2e1 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -213,3 +213,6 @@ jobs: + + + diff --git a/backend/address2UPRN/main.py b/backend/address2UPRN/main.py index 2cec8a2e..7e001b8d 100644 --- a/backend/address2UPRN/main.py +++ b/backend/address2UPRN/main.py @@ -807,7 +807,7 @@ def handler(event, context, local=False): logger.error(f"Failed to update subtask status: {db_error}") # Return error if all records failed - logger.fatal(results) + logger.info(results) if errors and not results: return {"statusCode": 500, "body": json.dumps({"errors": errors})} From b1164ffd90b89b054e05d4755408b77da501cfb2 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Wed, 11 Feb 2026 17:50:47 +0000 Subject: [PATCH 092/340] get rid of local --- backend/address2UPRN/main.py | 7 ++++--- backend/postcode_splitter/main.py | 7 +++++-- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/backend/address2UPRN/main.py b/backend/address2UPRN/main.py index 7e001b8d..812b9206 100644 --- a/backend/address2UPRN/main.py +++ b/backend/address2UPRN/main.py @@ -358,9 +358,9 @@ def get_uprn_with_epc_df( # Best score best_score = scored_df.iloc[0]["lexiscore"] - # Return None if score is below threshold - if best_score < 0.7: - return None + # # Return None if score is below threshold + # if best_score < 0.7: + # return None # All rank-1 rows (possible draw) top_rank_df = scored_df[scored_df["lexirank"] == 1] @@ -807,6 +807,7 @@ def handler(event, context, local=False): logger.error(f"Failed to update subtask status: {db_error}") # Return error if all records failed + logger.info(results_data) logger.info(results) if errors and not results: return {"statusCode": 500, "body": json.dumps({"errors": errors})} diff --git a/backend/postcode_splitter/main.py b/backend/postcode_splitter/main.py index eb7cf044..943435b9 100644 --- a/backend/postcode_splitter/main.py +++ b/backend/postcode_splitter/main.py @@ -162,7 +162,8 @@ def handler(event, context, local=False): csv_data = read_csv_from_s3_dict(bucket, key) df = pd.DataFrame(csv_data) # just do 5 well we are testing, sqs connection - df = df.head(5) + if local: + df = df.head(5) logger.info(f"CSV loaded: {len(df)} rows, {len(df.columns)} columns") # Sanitise postcodes @@ -193,7 +194,9 @@ def handler(event, context, local=False): task_id=str(task_id), rows=all_rows, ) - logger.info(f"Sent all {len(all_rows)} rows in single batch to address2UPRN queue") + logger.info( + f"Sent all {len(all_rows)} rows in single batch to address2UPRN queue" + ) except Exception as e: logger.error( f"Failed to send all rows to address2UPRN queue: {e}", From c9ec097a438b8b8a49b5d9bfcdf23f0d5b9e138d Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Wed, 11 Feb 2026 17:55:43 +0000 Subject: [PATCH 093/340] pr review --- .github/workflows/deploy_terraform.yml | 18 ++---------------- backend/address2UPRN/main.py | 1 - 2 files changed, 2 insertions(+), 17 deletions(-) diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index 122fb2e1..da98f4d9 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -116,8 +116,7 @@ jobs: stage: ${{ needs.determine_stage.outputs.stage }} ecr_repo: address2uprn-${{ needs.determine_stage.outputs.stage }} image_digest: ${{ needs.address2uprn_image.outputs.image_digest }} - # terraform_apply: ${{ needs.determine_stage.outputs.terraform_apply }} - terraform_apply: 'true' + terraform_apply: ${{ needs.determine_stage.outputs.terraform_apply }} secrets: AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} @@ -158,8 +157,7 @@ jobs: stage: ${{ needs.determine_stage.outputs.stage }} ecr_repo: postcode_splitter-${{ needs.determine_stage.outputs.stage }} image_digest: ${{ needs.postcodeSplitter_image.outputs.image_digest }} - # terraform_apply: ${{ needs.determine_stage.outputs.terraform_apply }} - terraform_apply: 'true' + terraform_apply: ${{ needs.determine_stage.outputs.terraform_apply }} secrets: AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} @@ -204,15 +202,3 @@ jobs: AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} AWS_REGION: ${{ secrets.DEV_AWS_REGION }} - - - - - - - - - - - - diff --git a/backend/address2UPRN/main.py b/backend/address2UPRN/main.py index 812b9206..8d1ba21d 100644 --- a/backend/address2UPRN/main.py +++ b/backend/address2UPRN/main.py @@ -17,7 +17,6 @@ logger = setup_logger() EPC_AUTH_TOKEN = os.getenv( "EPC_AUTH_TOKEN", - "a2Nvbm5rb3dsZXNzYXJAZ21haWwuY29tOjY5MGJiMWM0NmIyOGI5ZDUxYzAxMzQzYzNiZGNlZGJjZDNmODQwMzA=", ) if EPC_AUTH_TOKEN is None: From 598a612b402bf3df2ac8dc070b9e3be3e0400f4c Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 12 Feb 2026 09:23:47 +0000 Subject: [PATCH 094/340] define db methods --- .../db/functions/recommendations_functions.py | 272 +++++++++++------- .../categorisation/categorisation_postgres.py | 5 - 2 files changed, 175 insertions(+), 102 deletions(-) delete mode 100644 backend/categorisation/categorisation_postgres.py diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py index 51562f55..c16adea2 100644 --- a/backend/app/db/functions/recommendations_functions.py +++ b/backend/app/db/functions/recommendations_functions.py @@ -3,15 +3,29 @@ from sqlalchemy import insert, delete from sqlalchemy.orm import Session from sqlalchemy.exc import SQLAlchemyError from backend.app.db.models.recommendations import ( - Plan, Recommendation, RecommendationMaterials, PlanRecommendations, Scenario + Plan, + Recommendation, + RecommendationMaterials, + PlanRecommendations, + Scenario, ) from backend.app.db.models.portfolio import PropertyModel from backend.app.db.connection import db_session, db_read_session def prepare_plan_data( - p, body, scenario_id, eco_packages, valuations, new_sap_points, new_epc, default_recommendations, - rebaselining_carbon=0, rebaselining_heat_demand=0, rebaselining_kwh=0, rebaselining_bills=0, + p, + body, + scenario_id, + eco_packages, + valuations, + new_sap_points, + new_epc, + default_recommendations, + rebaselining_carbon=0, + rebaselining_heat_demand=0, + rebaselining_kwh=0, + rebaselining_bills=0, ): """ Utility function to prepare the data that goes into the production of a plan. Is a fairly rough and unstructured @@ -32,21 +46,37 @@ def prepare_plan_data( """ # Plan carbon savings co2_savings = sum( - [r["co2_equivalent_savings"] for r in default_recommendations if not r.get("already_installed", False)] + [ + r["co2_equivalent_savings"] + for r in default_recommendations + if not r.get("already_installed", False) + ] ) post_co2_emissions = p.energy["co2_emissions"] - rebaselining_carbon - co2_savings # Plan bill savings energy_bill_savings = sum( - [r["energy_cost_savings"] for r in default_recommendations if not r.get("already_installed", False)] + [ + r["energy_cost_savings"] + for r in default_recommendations + if not r.get("already_installed", False) + ] + ) + post_energy_bill = ( + sum(p.current_energy_bill.values()) - rebaselining_bills - energy_bill_savings ) - post_energy_bill = sum(p.current_energy_bill.values()) - rebaselining_bills - energy_bill_savings # energy consumption energy_consumption_savings = sum( - [r["kwh_savings"] for r in default_recommendations if not r.get("already_installed", False)] + [ + r["kwh_savings"] + for r in default_recommendations + if not r.get("already_installed", False) + ] + ) + post_energy_consumption = ( + p.current_energy_consumption - rebaselining_kwh - energy_consumption_savings ) - post_energy_consumption = p.current_energy_consumption - rebaselining_kwh - energy_consumption_savings valuation_post_retrofit, valuation_increase = None, None if valuations["current_value"]: @@ -54,9 +84,19 @@ def prepare_plan_data( valuation_post_retrofit = valuations["average_increased_value"] # plan costing data - cost_of_works = sum([r["total"] for r in default_recommendations if not r.get("already_installed", False)]) + cost_of_works = sum( + [ + r["total"] + for r in default_recommendations + if not r.get("already_installed", False) + ] + ) contingency_cost = sum( - [r.get("contingency", 0) for r in default_recommendations if not r.get("already_installed", False)] + [ + r.get("contingency", 0) + for r in default_recommendations + if not r.get("already_installed", False) + ] ) return { @@ -86,7 +126,7 @@ def prepare_plan_data( "valuation_increase": valuation_increase, "cost_of_works": float(cost_of_works), "contingency_cost": float(contingency_cost), - "plan_type": eco_packages.get(p.id, (None, None, None))[2] + "plan_type": eco_packages.get(p.id, (None, None, None))[2], } @@ -119,11 +159,7 @@ def bulk_create_plans(session: Session, plans_to_create: list[dict]) -> dict[int for p in plans_to_create ] - stmt = ( - insert(Plan) - .values(payload) - .returning(Plan.id, Plan.property_id) - ) + stmt = insert(Plan).values(payload).returning(Plan.id, Plan.property_id) result = session.execute(stmt).all() @@ -133,9 +169,7 @@ def bulk_create_plans(session: Session, plans_to_create: list[dict]) -> dict[int def create_scenario(session: Session, scenario: dict) -> int: existing_scenario = ( - session.query(Scenario) - .filter_by(portfolio_id=scenario["portfolio_id"]) - .first() + session.query(Scenario).filter_by(portfolio_id=scenario["portfolio_id"]).first() ) scenario["is_default"] = not bool(existing_scenario) @@ -167,7 +201,9 @@ def create_recommendation(session: Session, recommendation): raise e -def create_recommendation_material(session: Session, recommendation_id, material_id, depth): +def create_recommendation_material( + session: Session, recommendation_id, material_id, depth +): """ This function will create a record for the recommendation_material in the database if it does not exist. :param session: The databse session @@ -177,9 +213,7 @@ def create_recommendation_material(session: Session, recommendation_id, material """ new_recommendation_material = RecommendationMaterials( - recommendation_id=recommendation_id, - material_id=material_id, - depth=depth + recommendation_id=recommendation_id, material_id=material_id, depth=depth ) session.add(new_recommendation_material) session.flush() @@ -196,13 +230,17 @@ def create_plan_recommendations(session: Session, plan_id, recommendation_ids): """ # Prepare a list of dictionaries for bulk insert - data = [{"plan_id": plan_id, "recommendation_id": rid} for rid in recommendation_ids] + data = [ + {"plan_id": plan_id, "recommendation_id": rid} for rid in recommendation_ids + ] # Bulk insert using SQLAlchemy's core API session.execute(insert(PlanRecommendations).values(data)) -def upload_recommendations(session: Session, recommendations_to_upload, property_id, new_plan_id): +def upload_recommendations( + session: Session, recommendations_to_upload, property_id, new_plan_id +): try: # Prepare data for bulk insert for Recommendation recommendations_data = [ @@ -213,8 +251,14 @@ def upload_recommendations(session: Session, recommendations_to_upload, property "description": rec["description"], "estimated_cost": float(rec["total"]), "default": rec["default"], - "starting_u_value": float(rec.get("starting_u_value")) if rec.get("starting_u_value") else None, - "new_u_value": float(rec.get("new_u_value")) if rec.get("new_u_value") else None, + "starting_u_value": ( + float(rec.get("starting_u_value")) + if rec.get("starting_u_value") + else None + ), + "new_u_value": ( + float(rec.get("new_u_value")) if rec.get("new_u_value") else None + ), "sap_points": float(rec["sap_points"]), "energy_savings": float(rec["heat_demand"]), "kwh_savings": float(rec["kwh_savings"]), @@ -223,13 +267,17 @@ def upload_recommendations(session: Session, recommendations_to_upload, property "energy_cost_savings": float(rec["energy_cost_savings"]), "labour_days": float(rec["labour_days"]), "already_installed": rec["already_installed"], - "heat_demand": float(rec["heat_demand"]) + "heat_demand": float(rec["heat_demand"]), } for rec in recommendations_to_upload ] # Insert the recommendations, get back the IDs - stmt = insert(Recommendation).returning(Recommendation.id).values(recommendations_data) + stmt = ( + insert(Recommendation) + .returning(Recommendation.id) + .values(recommendations_data) + ) result = session.execute(stmt) uploaded_recommendation_ids = [row[0] for row in result] @@ -243,11 +291,15 @@ def upload_recommendations(session: Session, recommendations_to_upload, property "quantity_unit": part.get("quantity_unit", None), "estimated_cost": float(part.get("total", part.get("total_cost"))), } - for rec, recommendation_id in zip(recommendations_to_upload, uploaded_recommendation_ids) + for rec, recommendation_id in zip( + recommendations_to_upload, uploaded_recommendation_ids + ) for part in rec["parts"] ] - session.bulk_insert_mappings(RecommendationMaterials, recommendation_materials_data) + session.bulk_insert_mappings( + RecommendationMaterials, recommendation_materials_data + ) # flush the changes to get the newly created IDs session.flush() @@ -283,25 +335,27 @@ def bulk_upload_recommendations_and_materials( plan_ids_by_index = [] for rec in recommendation_payload: - recommendation_rows.append({ - "property_id": rec["property_id"], - "type": rec["type"], - "measure_type": rec["measure_type"], - "description": rec["description"], - "estimated_cost": rec["estimated_cost"], - "default": rec["default"], - "starting_u_value": rec["starting_u_value"], - "new_u_value": rec["new_u_value"], - "sap_points": rec["sap_points"], - "heat_demand": rec["heat_demand"], - "kwh_savings": rec["kwh_savings"], - "co2_equivalent_savings": rec["co2_equivalent_savings"], - "energy_savings": rec["energy_savings"], - "energy_cost_savings": rec["energy_cost_savings"], - "total_work_hours": rec["total_work_hours"], - "labour_days": rec["labour_days"], - "already_installed": rec["already_installed"], - }) + recommendation_rows.append( + { + "property_id": rec["property_id"], + "type": rec["type"], + "measure_type": rec["measure_type"], + "description": rec["description"], + "estimated_cost": rec["estimated_cost"], + "default": rec["default"], + "starting_u_value": rec["starting_u_value"], + "new_u_value": rec["new_u_value"], + "sap_points": rec["sap_points"], + "heat_demand": rec["heat_demand"], + "kwh_savings": rec["kwh_savings"], + "co2_equivalent_savings": rec["co2_equivalent_savings"], + "energy_savings": rec["energy_savings"], + "energy_cost_savings": rec["energy_cost_savings"], + "total_work_hours": rec["total_work_hours"], + "labour_days": rec["labour_days"], + "already_installed": rec["already_installed"], + } + ) parts_by_index.append(rec["parts"]) plan_ids_by_index.append(rec["plan_id"]) @@ -310,9 +364,7 @@ def bulk_upload_recommendations_and_materials( # 2. Insert recommendations and get IDs # --------------------------------------------------------- result = session.execute( - insert(Recommendation) - .values(recommendation_rows) - .returning(Recommendation.id) + insert(Recommendation).values(recommendation_rows).returning(Recommendation.id) ) recommendation_ids = [row[0] for row in result] @@ -324,19 +376,19 @@ def bulk_upload_recommendations_and_materials( for recommendation_id, parts in zip(recommendation_ids, parts_by_index): for part in parts: - materials_rows.append({ - "recommendation_id": recommendation_id, - "material_id": part["material_id"], - "depth": part["depth"], - "quantity": part["quantity"], - "quantity_unit": part["quantity_unit"], - "estimated_cost": part["estimated_cost"], - }) + materials_rows.append( + { + "recommendation_id": recommendation_id, + "material_id": part["material_id"], + "depth": part["depth"], + "quantity": part["quantity"], + "quantity_unit": part["quantity_unit"], + "estimated_cost": part["estimated_cost"], + } + ) if materials_rows: - session.execute( - insert(RecommendationMaterials).values(materials_rows) - ) + session.execute(insert(RecommendationMaterials).values(materials_rows)) # --------------------------------------------------------- # 4. Insert plan ↔ recommendation links @@ -346,26 +398,22 @@ def bulk_upload_recommendations_and_materials( "plan_id": plan_id, "recommendation_id": recommendation_id, } - for plan_id, recommendation_id in zip( - plan_ids_by_index, recommendation_ids - ) + for plan_id, recommendation_id in zip(plan_ids_by_index, recommendation_ids) ] - session.execute( - insert(PlanRecommendations).values(plan_recommendation_rows) - ) + session.execute(insert(PlanRecommendations).values(plan_recommendation_rows)) def chunked(iterable, size=100): for i in range(0, len(iterable), size): - yield iterable[i:i + size] + yield iterable[i : i + size] def get_property_ids(portfolio_id: int) -> list[int]: with db_read_session() as session: return [ - pid for (pid,) in - session.query(PropertyModel.id) + pid + for (pid,) in session.query(PropertyModel.id) .filter(PropertyModel.portfolio_id == portfolio_id) .all() ] @@ -381,12 +429,14 @@ def delete_property_batch(session: Session, property_ids: list[int]): # recommendation_materials (via recommendation) # -------------------------------------------------- session.execute( - text(""" + text( + """ DELETE FROM recommendation_materials rm USING recommendation r WHERE rm.recommendation_id = r.id AND r.property_id = ANY(:property_ids) - """), + """ + ), params, ) @@ -394,12 +444,14 @@ def delete_property_batch(session: Session, property_ids: list[int]): # plan_recommendations (via plan) # -------------------------------------------------- session.execute( - text(""" + text( + """ DELETE FROM plan_recommendations pr USING plan p WHERE pr.plan_id = p.id AND p.property_id = ANY(:property_ids) - """), + """ + ), params, ) @@ -407,13 +459,15 @@ def delete_property_batch(session: Session, property_ids: list[int]): # funding_package_measures # -------------------------------------------------- session.execute( - text(""" + text( + """ DELETE FROM funding_package_measures fpm USING funding_package fp, plan p WHERE fpm.funding_package_id = fp.id AND fp.plan_id = p.id AND p.property_id = ANY(:property_ids) - """), + """ + ), params, ) @@ -421,10 +475,12 @@ def delete_property_batch(session: Session, property_ids: list[int]): # inspections (direct) # -------------------------------------------------- session.execute( - text(""" + text( + """ DELETE FROM inspections WHERE property_id = ANY(:property_ids) - """), + """ + ), params, ) @@ -432,12 +488,14 @@ def delete_property_batch(session: Session, property_ids: list[int]): # funding_package # -------------------------------------------------- session.execute( - text(""" + text( + """ DELETE FROM funding_package fp USING plan p WHERE fp.plan_id = p.id AND p.property_id = ANY(:property_ids) - """), + """ + ), params, ) @@ -445,10 +503,12 @@ def delete_property_batch(session: Session, property_ids: list[int]): # recommendation (direct — CRITICAL FIX) # -------------------------------------------------- session.execute( - text(""" + text( + """ DELETE FROM recommendation WHERE property_id = ANY(:property_ids) - """), + """ + ), params, ) @@ -456,10 +516,12 @@ def delete_property_batch(session: Session, property_ids: list[int]): # plan (direct) # -------------------------------------------------- session.execute( - text(""" + text( + """ DELETE FROM plan WHERE property_id = ANY(:property_ids) - """), + """ + ), params, ) @@ -467,18 +529,22 @@ def delete_property_batch(session: Session, property_ids: list[int]): # property-scoped tables # -------------------------------------------------- session.execute( - text(""" + text( + """ DELETE FROM property_details_epc WHERE property_id = ANY(:property_ids) - """), + """ + ), params, ) session.execute( - text(""" + text( + """ DELETE FROM property_targets WHERE property_id = ANY(:property_ids) - """), + """ + ), params, ) @@ -486,10 +552,12 @@ def delete_property_batch(session: Session, property_ids: list[int]): # properties LAST # -------------------------------------------------- session.execute( - text(""" + text( + """ DELETE FROM property WHERE id = ANY(:property_ids) - """), + """ + ), params, ) @@ -509,10 +577,7 @@ def delete_portfolio_scenarios_if_empty(portfolio_id: int): return with db_session() as session: - session.execute( - delete(Scenario) - .where(Scenario.portfolio_id == portfolio_id) - ) + session.execute(delete(Scenario).where(Scenario.portfolio_id == portfolio_id)) print("Deleted scenarios for empty portfolio") @@ -530,6 +595,7 @@ def clear_portfolio_in_batches( total = (len(property_ids) + property_batch_size - 1) // property_batch_size import time + for i, batch in enumerate(chunked(property_ids, property_batch_size), start=1): print(f"Deleting batch {i}/{total} ({len(batch)} properties)") start_time = time.time() @@ -542,3 +608,15 @@ def clear_portfolio_in_batches( delete_portfolio_scenarios_if_empty(portfolio_id) print("Portfolio cleared in batches.") + + +def get_plans_by_portfolio_id(portfolio_id: int) -> list[Plan]: + raise NotImplementedError + + +def get_scenario(scenario_id: int) -> list[Scenario]: + raise NotImplementedError + + +def set_plan_default(plan_id: int, is_default: bool) -> bool: + raise NotImplementedError diff --git a/backend/categorisation/categorisation_postgres.py b/backend/categorisation/categorisation_postgres.py deleted file mode 100644 index f2a44e5b..00000000 --- a/backend/categorisation/categorisation_postgres.py +++ /dev/null @@ -1,5 +0,0 @@ -from backend.app.db.connection import db_session - - -class CategorisationPostgres: - pass From e7f941d5e4beaa640a5079a4badb678af742eb01 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 12 Feb 2026 10:00:47 +0000 Subject: [PATCH 095/340] use sqlalchemy 2.0 typing in recommendations , and write processing logic --- .../db/functions/recommendations_functions.py | 5 +- backend/app/db/models/recommendations.py | 107 ++++++++++++------ .../categorisation/categorisation_logic.py | 12 ++ backend/categorisation/processor.py | 31 ++++- 4 files changed, 116 insertions(+), 39 deletions(-) create mode 100644 backend/categorisation/categorisation_logic.py diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py index c16adea2..54754ee0 100644 --- a/backend/app/db/functions/recommendations_functions.py +++ b/backend/app/db/functions/recommendations_functions.py @@ -1,3 +1,4 @@ +from typing import List from sqlalchemy import text from sqlalchemy import insert, delete from sqlalchemy.orm import Session @@ -610,11 +611,11 @@ def clear_portfolio_in_batches( print("Portfolio cleared in batches.") -def get_plans_by_portfolio_id(portfolio_id: int) -> list[Plan]: +def get_plans_by_portfolio_id(portfolio_id: int) -> List[Plan]: raise NotImplementedError -def get_scenario(scenario_id: int) -> list[Scenario]: +def get_scenario(scenario_id: int) -> List[Scenario]: raise NotImplementedError diff --git a/backend/app/db/models/recommendations.py b/backend/app/db/models/recommendations.py index ed1fcefa..928c96bd 100644 --- a/backend/app/db/models/recommendations.py +++ b/backend/app/db/models/recommendations.py @@ -1,5 +1,15 @@ -from sqlalchemy import Column, BigInteger, String, Float, Boolean, TIMESTAMP, ForeignKey, Enum -from sqlalchemy.orm import declarative_base +from typing import Iterable, Optional +from sqlalchemy import ( + Column, + BigInteger, + String, + Float, + Boolean, + TIMESTAMP, + ForeignKey, + Enum, +) +from sqlalchemy.orm import declarative_base, Mapped, mapped_column from sqlalchemy.sql import func from backend.app.db.models.portfolio import Portfolio, PropertyModel from backend.app.db.models.materials import Material @@ -11,7 +21,7 @@ Base = declarative_base() class Recommendation(Base): - __tablename__ = 'recommendation' + __tablename__ = "recommendation" id = Column(BigInteger, primary_key=True, autoincrement=True) property_id = Column(BigInteger, ForeignKey(PropertyModel.id), nullable=False) @@ -37,15 +47,20 @@ class Recommendation(Base): class RecommendationMaterials(Base): - __tablename__ = 'recommendation_materials' + __tablename__ = "recommendation_materials" id = Column(BigInteger, primary_key=True, autoincrement=True) - recommendation_id = Column(BigInteger, ForeignKey('recommendation.id'), nullable=False) + recommendation_id = Column( + BigInteger, ForeignKey("recommendation.id"), nullable=False + ) material_id = Column(BigInteger, ForeignKey(Material.id), nullable=False) created_at = Column(TIMESTAMP, nullable=False, server_default=func.now()) depth = Column(Float, nullable=False) quantity = Column(Float, nullable=False) - quantity_unit = Column(Enum(QuantityUnits, values_callable=lambda x: [e.value for e in x]), nullable=False) + quantity_unit = Column( + Enum(QuantityUnits, values_callable=lambda x: [e.value for e in x]), + nullable=False, + ) estimated_cost = Column(Float, nullable=False) @@ -58,19 +73,35 @@ class PlanTypeEnum(enum.Enum): class Plan(Base): - __tablename__ = 'plan' + __tablename__ = "plan" - id = Column(BigInteger, primary_key=True, autoincrement=True) - name = Column(String, nullable=True, default="") - portfolio_id = Column(BigInteger, ForeignKey(Portfolio.id), nullable=False) - property_id = Column(BigInteger, ForeignKey(PropertyModel.id), nullable=False) - scenario_id = Column(BigInteger, ForeignKey('scenario.id')) # Doesn't have to be linked to a scenario - created_at = Column(TIMESTAMP, nullable=False, server_default=func.now()) - is_default = Column(Boolean, nullable=False) - valuation_increase_lower_bound = Column(Float) - valuation_increase_upper_bound = Column(Float) - valuation_increase_average = Column(Float) - plan_type = Column( + id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True) + + name: Mapped[Optional[str]] = mapped_column(String, nullable=True, default="") + + portfolio_id: Mapped[int] = mapped_column( + BigInteger, ForeignKey(Portfolio.id), nullable=False + ) + + property_id: Mapped[int] = mapped_column( + BigInteger, ForeignKey(PropertyModel.id), nullable=False + ) + + scenario_id: Mapped[Optional[int]] = mapped_column( + BigInteger, ForeignKey("scenario.id") + ) + + created_at: Mapped = mapped_column( # type: ignore + TIMESTAMP, nullable=False, server_default=func.now() + ) + + is_default: Mapped[bool] = mapped_column(Boolean, nullable=False) + + valuation_increase_lower_bound: Mapped[Optional[float]] = mapped_column(Float) + valuation_increase_upper_bound: Mapped[Optional[float]] = mapped_column(Float) + valuation_increase_average: Mapped[Optional[float]] = mapped_column(Float) + + plan_type: Mapped[Optional[PlanTypeEnum]] = mapped_column( Enum( PlanTypeEnum, name="plan_type", @@ -79,31 +110,35 @@ class Plan(Base): ), nullable=True, ) - post_sap_points = Column(Float) - post_epc_rating = Column(Enum(Epc)) - post_co2_emissions = Column(Float) - co2_savings = Column(Float) - post_energy_bill = Column(Float) - energy_bill_savings = Column(Float) - post_energy_consumption = Column(Float) # energy demand in kWh/year - energy_consumption_savings = Column(Float) - valuation_post_retrofit = Column(Float) - valuation_increase = Column(Float) + + post_sap_points: Mapped[Optional[float]] = mapped_column(Float) + post_epc_rating: Mapped[Optional[Epc]] = mapped_column(Enum(Epc)) + post_co2_emissions: Mapped[Optional[float]] = mapped_column(Float) + co2_savings: Mapped[Optional[float]] = mapped_column(Float) + post_energy_bill: Mapped[Optional[float]] = mapped_column(Float) + energy_bill_savings: Mapped[Optional[float]] = mapped_column(Float) + post_energy_consumption: Mapped[Optional[float]] = mapped_column(Float) + energy_consumption_savings: Mapped[Optional[float]] = mapped_column(Float) + valuation_post_retrofit: Mapped[Optional[float]] = mapped_column(Float) + valuation_increase: Mapped[Optional[float]] = mapped_column(Float) + # Financial metrics, excluding funding - cost_of_works = Column(Float) - contingency_cost = Column(Float) + cost_of_works: Mapped[Optional[float]] = mapped_column(Float) + contingency_cost: Mapped[Optional[float]] = mapped_column(Float) class PlanRecommendations(Base): - __tablename__ = 'plan_recommendations' + __tablename__ = "plan_recommendations" id = Column(BigInteger, primary_key=True, autoincrement=True) - plan_id = Column(BigInteger, ForeignKey('plan.id'), nullable=False) - recommendation_id = Column(BigInteger, ForeignKey('recommendation.id'), nullable=False) + plan_id = Column(BigInteger, ForeignKey("plan.id"), nullable=False) + recommendation_id = Column( + BigInteger, ForeignKey("recommendation.id"), nullable=False + ) class Scenario(Base): - __tablename__ = 'scenario' + __tablename__ = "scenario" id = Column(BigInteger, primary_key=True, autoincrement=True) name = Column(String, nullable=False) @@ -201,3 +236,7 @@ class InstalledMeasure(Base): heat_demand_savings = Column(Float) source = Column(String) is_active = Column(Boolean, nullable=False, default=True) + + +def enum_values(e: Iterable[PlanTypeEnum]) -> list[str]: + return [m.value for m in e] diff --git a/backend/categorisation/categorisation_logic.py b/backend/categorisation/categorisation_logic.py new file mode 100644 index 00000000..503b3e54 --- /dev/null +++ b/backend/categorisation/categorisation_logic.py @@ -0,0 +1,12 @@ +from typing import List +from backend.app.db.models.recommendations import Plan + + +class CategorisationLogic: + @staticmethod + def get_compliant_plans(plans: List[Plan]) -> List[Plan]: + raise NotImplementedError + + @staticmethod + def get_cheapest_plan(plans: List[Plan]) -> Plan: + raise NotImplementedError diff --git a/backend/categorisation/processor.py b/backend/categorisation/processor.py index f6e4f7dc..0c867267 100644 --- a/backend/categorisation/processor.py +++ b/backend/categorisation/processor.py @@ -1,10 +1,35 @@ +from typing import List + +from backend.app.db.functions.recommendations_functions import ( + get_plans_by_portfolio_id, + get_property_ids, + set_plan_default, +) +from backend.app.db.models.recommendations import Plan +from backend.categorisation.categorisation_logic import CategorisationLogic + + def process_portfolio(portfolio_id: int) -> None: # Get all plans (including scenarios) for all properties in the portfolio + plans: List[Plan] = get_plans_by_portfolio_id(portfolio_id) # For each property, get all compliant plans + property_ids: List[int] = get_property_ids(portfolio_id) # For each property, find the cheapest compliant plan + for id in property_ids: + plans_for_property: List[Plan] = [ + plan for plan in plans if plan.property_id == id + ] - # For each property, set is_default for cheapest compliant plan - # If no compliant plans, set it to the cheapest plan - pass + compliant_plans_for_property: List[Plan] = ( + CategorisationLogic.get_compliant_plans(plans_for_property) + ) + + # Choose cheapest compliant plan, or fallback to cheapest overall plan + plans_to_consider = compliant_plans_for_property or plans_for_property + cheapest_plan = CategorisationLogic.get_cheapest_plan(plans_to_consider) + + # Update DB: set is_default = True for cheapest plan, False for others + for plan in plans_for_property: + set_plan_default(plan.id, plan.id == cheapest_plan.id) From 73607a51176ccef2a3fd61ae33a8f02ea5478234 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 12 Feb 2026 11:08:35 +0000 Subject: [PATCH 096/340] sqlalchemy 2.0 typing in scenario --- backend/app/db/models/recommendations.py | 90 ++++++++++++++---------- 1 file changed, 51 insertions(+), 39 deletions(-) diff --git a/backend/app/db/models/recommendations.py b/backend/app/db/models/recommendations.py index 928c96bd..36872394 100644 --- a/backend/app/db/models/recommendations.py +++ b/backend/app/db/models/recommendations.py @@ -11,6 +11,8 @@ from sqlalchemy import ( ) from sqlalchemy.orm import declarative_base, Mapped, mapped_column from sqlalchemy.sql import func +from datetime import datetime + from backend.app.db.models.portfolio import Portfolio, PropertyModel from backend.app.db.models.materials import Material from backend.app.db.models.portfolio import Epc @@ -140,47 +142,57 @@ class PlanRecommendations(Base): class Scenario(Base): __tablename__ = "scenario" - id = Column(BigInteger, primary_key=True, autoincrement=True) - name = Column(String, nullable=False) - created_at = Column(TIMESTAMP, nullable=False, server_default=func.now()) - budget = Column(Float) - portfolio_id = Column(BigInteger, ForeignKey(Portfolio.id), nullable=False) - housing_type = Column(String, nullable=False) - goal = Column(String, nullable=False) - goal_value = Column(String, nullable=False) - trigger_file_path = Column(String, nullable=False) - already_installed_file_path = Column(String) - patches_file_path = Column(String) - non_invasive_recommendations_file_path = Column(String) - exclusions = Column(String) - multi_plan = Column(Boolean, default=False) - is_default = Column(Boolean, default=False, nullable=False) + id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True) + name: Mapped[str] = mapped_column(String, nullable=False) + created_at: Mapped[datetime] = mapped_column( + TIMESTAMP, nullable=False, server_default=func.now() + ) + budget: Mapped[Optional[float]] = mapped_column(Float) + portfolio_id: Mapped[int] = mapped_column( + BigInteger, ForeignKey(Portfolio.id), nullable=False + ) + housing_type: Mapped[str] = mapped_column(String, nullable=False) + goal: Mapped[str] = mapped_column(String, nullable=False) + goal_value: Mapped[str] = mapped_column(String, nullable=False) + trigger_file_path: Mapped[str] = mapped_column(String, nullable=False) + already_installed_file_path: Mapped[Optional[str]] = mapped_column(String) + patches_file_path: Mapped[Optional[str]] = mapped_column(String) + non_invasive_recommendations_file_path: Mapped[Optional[str]] = mapped_column( + String + ) + exclusions: Mapped[Optional[str]] = mapped_column(String) + multi_plan: Mapped[bool] = mapped_column(Boolean, default=False) + is_default: Mapped[bool] = mapped_column(Boolean, default=False, nullable=False) # Add in the fields we need, which were previously sitting at the portfolio level - cost = Column(Float) - contingency = Column(Float) - funding = Column(Float) - total_work_hours = Column(Float) - energy_savings = Column(Float) - co2_equivalent_savings = Column(Float) - energy_cost_savings = Column(Float) - epc_breakdown_pre_retrofit = Column(String) - epc_breakdown_post_retrofit = Column(String) - number_of_properties = Column(BigInteger) - n_units_to_retrofit = Column(BigInteger) - co2_per_unit_pre_retrofit = Column(String) - co2_per_unit_post_retrofit = Column(String) - energy_bill_per_unit_pre_retrofit = Column(String) - energy_bill_per_unit_post_retrofit = Column(String) - energy_consumption_per_unit_pre_retrofit = Column(String) - energy_consumption_per_unit_post_retrofit = Column(String) - valuation_improvement_per_unit = Column(String) - cost_per_unit = Column(String) - cost_per_co2_saved = Column(String) - cost_per_sap_point = Column(String) - valuation_return_on_investment = Column(String) - property_valuation_increase = Column(Float) - labour_days = Column(Float) + cost: Mapped[Optional[float]] = mapped_column(Float) + contingency: Mapped[Optional[float]] = mapped_column(Float) + funding: Mapped[Optional[float]] = mapped_column(Float) + total_work_hours: Mapped[Optional[float]] = mapped_column(Float) + energy_savings: Mapped[Optional[float]] = mapped_column(Float) + co2_equivalent_savings: Mapped[Optional[float]] = mapped_column(Float) + energy_cost_savings: Mapped[Optional[float]] = mapped_column(Float) + epc_breakdown_pre_retrofit: Mapped[Optional[str]] = mapped_column(String) + epc_breakdown_post_retrofit: Mapped[Optional[str]] = mapped_column(String) + number_of_properties: Mapped[Optional[int]] = mapped_column(BigInteger) + n_units_to_retrofit: Mapped[Optional[int]] = mapped_column(BigInteger) + co2_per_unit_pre_retrofit: Mapped[Optional[str]] = mapped_column(String) + co2_per_unit_post_retrofit: Mapped[Optional[str]] = mapped_column(String) + energy_bill_per_unit_pre_retrofit: Mapped[Optional[str]] = mapped_column(String) + energy_bill_per_unit_post_retrofit: Mapped[Optional[str]] = mapped_column(String) + energy_consumption_per_unit_pre_retrofit: Mapped[Optional[str]] = mapped_column( + String + ) + energy_consumption_per_unit_post_retrofit: Mapped[Optional[str]] = mapped_column( + String + ) + valuation_improvement_per_unit: Mapped[Optional[str]] = mapped_column(String) + cost_per_unit: Mapped[Optional[str]] = mapped_column(String) + cost_per_co2_saved: Mapped[Optional[str]] = mapped_column(String) + cost_per_sap_point: Mapped[Optional[str]] = mapped_column(String) + valuation_return_on_investment: Mapped[Optional[str]] = mapped_column(String) + property_valuation_increase: Mapped[Optional[float]] = mapped_column(Float) + labour_days: Mapped[Optional[float]] = mapped_column(Float) class MeasureType(enum.Enum): From b3fa7c3051b22e76f8c7a6d3a375d72ebe6ad0df Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 12 Feb 2026 12:01:39 +0000 Subject: [PATCH 097/340] rename Plan and Scenario to PlanModel and ScenarioModel --- backend/Outputs.py | 241 +++--- .../app/db/functions/portfolio_functions.py | 30 +- .../db/functions/recommendations_functions.py | 24 +- backend/app/db/models/funding.py | 45 +- backend/app/db/models/recommendations.py | 4 +- .../categorisation/categorisation_logic.py | 6 +- backend/categorisation/processor.py | 8 +- etl/customers/l_and_g/ic_slides.py | 161 ++-- .../mod/pilot/2. Create Excel Model.py | 469 +++++++---- etl/customers/newhaven/slides.py | 773 +++++++++++------- .../d_restart_failed_subtasks.py | 43 +- .../f_diagnostics.py | 74 +- .../g_rebaselining_installed_measrues.py | 761 +++++++++-------- .../h_reset_estimated_epcs.py | 100 ++- .../k_deck_stats.py | 114 +-- .../m_reduced_sample_revised.py | 28 +- etl/customers/slide_utils.py | 213 +++-- sfr/principal_pitch/2_export_data.py | 28 +- 18 files changed, 1892 insertions(+), 1230 deletions(-) diff --git a/backend/Outputs.py b/backend/Outputs.py index f9538709..7111e4d3 100644 --- a/backend/Outputs.py +++ b/backend/Outputs.py @@ -8,7 +8,11 @@ from utils.s3 import read_from_s3, save_excel_to_s3 from backend.app.utils import sap_to_epc from backend.app.db.connection import db_engine from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel -from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations +from backend.app.db.models.recommendations import ( + Recommendation, + PlanModel, + PlanRecommendations, +) class Outputs: @@ -42,7 +46,7 @@ class Outputs: "flat_roof_insulation": "Flat roof (Out of scope - prov sum only)", "room_in_roof_insulation": "RIR (POA - Prov sum only)", "ev_charging": "EV Charging", - "battery": "Battery" + "battery": "Battery", } def __init__(self, format, portfolio_id): @@ -67,28 +71,38 @@ class Outputs: # Download cleaned data self.cleaned_epc_lookup = read_from_s3( s3_file_name="cleaned_epc_data/cleaned.bson", - bucket_name="retrofit-data-dev" + bucket_name="retrofit-data-dev", ) self.cleaned_epc_lookup = msgpack.unpackb(self.cleaned_epc_lookup, raw=False) def get_properties_from_db(self): # Get properties and their details for a specific portfolio - properties_query = self.session.query( - PropertyModel, - PropertyDetailsEpcModel - ).join( - PropertyDetailsEpcModel, - PropertyModel.id == PropertyDetailsEpcModel.property_id - ).filter( - PropertyModel.portfolio_id == self.portfolio_id # Filter by portfolio ID - ).all() + properties_query = ( + self.session.query(PropertyModel, PropertyDetailsEpcModel) + .join( + PropertyDetailsEpcModel, + PropertyModel.id == PropertyDetailsEpcModel.property_id, + ) + .filter( + PropertyModel.portfolio_id + == self.portfolio_id # Filter by portfolio ID + ) + .all() + ) # Transform properties data to include all fields dynamically properties_data = [ - {**{col.name: getattr(prop.PropertyModel, col.name) for col in PropertyModel.__table__.columns}, - **{col.name: getattr(prop.PropertyDetailsEpcModel, col.name) for col in - PropertyDetailsEpcModel.__table__.columns}} + { + **{ + col.name: getattr(prop.PropertyModel, col.name) + for col in PropertyModel.__table__.columns + }, + **{ + col.name: getattr(prop.PropertyDetailsEpcModel, col.name) + for col in PropertyDetailsEpcModel.__table__.columns + }, + } for prop in properties_query ] @@ -96,10 +110,14 @@ class Outputs: def get_plans_from_db(self): - plans_query = self.session.query(Plan).filter(Plan.portfolio_id == self.portfolio_id).all() + plans_query = ( + self.session.query(PlanModel) + .filter(PlanModel.portfolio_id == self.portfolio_id) + .all() + ) # Transform plans data to include all fields dynamically plans_data = [ - {col.name: getattr(plan, col.name) for col in Plan.__table__.columns} + {col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns} for plan in plans_query ] @@ -107,28 +125,38 @@ class Outputs: def get_recommendations_from_db(self, plan_ids): # Get recommendations through PlanRecommendations for those plans and that are default - recommendations_query = self.session.query( - Recommendation, - Plan.scenario_id - ).join( - PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id - ).join( - Plan, Plan.id == PlanRecommendations.plan_id # Join with Plan to access scenario_id - ).filter( - PlanRecommendations.plan_id.in_(plan_ids), - Recommendation.default == True # Filtering for default recommendations - ).all() + recommendations_query = ( + self.session.query(Recommendation, PlanModel.scenario_id) + .join( + PlanRecommendations, + Recommendation.id == PlanRecommendations.recommendation_id, + ) + .join( + PlanModel, + PlanModel.id + == PlanRecommendations.plan_id, # Join with Plan to access scenario_id + ) + .filter( + PlanRecommendations.plan_id.in_(plan_ids), + Recommendation.default == True, # Filtering for default recommendations + ) + .all() + ) # Transform recommendations data to include all fields dynamically and include scenario_id recommendations_data = [ { **{ - col.name: getattr(rec.Recommendation, col.name) if - hasattr(rec, 'Recommendation') else getattr(rec, col.name) + col.name: ( + getattr(rec.Recommendation, col.name) + if hasattr(rec, "Recommendation") + else getattr(rec, col.name) + ) for col in Recommendation.__table__.columns }, - "Scenario ID": rec.scenario_id - } for rec in recommendations_query + "Scenario ID": rec.scenario_id, + } + for rec in recommendations_query ] return recommendations_data @@ -148,7 +176,9 @@ class Outputs: measure_label = self.MDS_MEASURE_MAPPING.get(measure_type, None) # If the property_id already exists in the collected rows, update it - existing_row = next((item for item in rows if item["property_id"] == property_id), None) + existing_row = next( + (item for item in rows if item["property_id"] == property_id), None + ) if existing_row is None: # Create a new row if the property_id doesn't exist new_row = {measure: None for measure in all_measures} @@ -196,7 +226,7 @@ class Outputs: properties_data = self.get_properties_from_db() plans_data = self.get_plans_from_db() - plan_ids = [plan['id'] for plan in plans_data] + plan_ids = [plan["id"] for plan in plans_data] recommendations_data = self.get_recommendations_from_db(plan_ids) self.session.close() @@ -209,50 +239,54 @@ class Outputs: scenario_ids = plans_df["scenario_id"].unique() # We start to create the MDS sheet - mds = properties_df[ - [ - "property_id", - "address", - "postcode", - "uprn", - "current_epc_rating", - "current_sap_points", - "primary_energy_consumption", - "property_type", - "built_form", - "total_floor_area", - "walls", - "tenure", - "mainfuel", - # The bills columns are split out - we include them and aggregate, without appliances - "heating_cost_current", - "hot_water_cost_current", - "lighting_cost_current", - "gas_standing_charge", - "electricity_standing_charge" + mds = ( + properties_df[ + [ + "property_id", + "address", + "postcode", + "uprn", + "current_epc_rating", + "current_sap_points", + "primary_energy_consumption", + "property_type", + "built_form", + "total_floor_area", + "walls", + "tenure", + "mainfuel", + # The bills columns are split out - we include them and aggregate, without appliances + "heating_cost_current", + "hot_water_cost_current", + "lighting_cost_current", + "gas_standing_charge", + "electricity_standing_charge", + ] ] - ].copy().rename( - columns={ - "address": "Address", - "postcode": "Postcode", - "uprn": "UPRN", - "current_epc_rating": "Pre EPC", - "current_sap_points": "EPC Source", - "primary_energy_consumption": "Existing Heating Demand Kwh/m2/y", - "property_type": "Property Type", - "built_form": "Built Form", - "total_floor_area": "Floor area m2 (If known)", - "walls": "Wall Type (Mandatory field)", - "tenure": "Tenure", - } + .copy() + .rename( + columns={ + "address": "Address", + "postcode": "Postcode", + "uprn": "UPRN", + "current_epc_rating": "Pre EPC", + "current_sap_points": "EPC Source", + "primary_energy_consumption": "Existing Heating Demand Kwh/m2/y", + "property_type": "Property Type", + "built_form": "Built Form", + "total_floor_area": "Floor area m2 (If known)", + "walls": "Wall Type (Mandatory field)", + "tenure": "Tenure", + } + ) ) mds["Estimated bill (£ per year)"] = ( - mds["heating_cost_current"] + - mds["hot_water_cost_current"] + - mds["lighting_cost_current"] + - mds["gas_standing_charge"] + - mds["electricity_standing_charge"] + mds["heating_cost_current"] + + mds["hot_water_cost_current"] + + mds["lighting_cost_current"] + + mds["gas_standing_charge"] + + mds["electricity_standing_charge"] ) mds = mds.drop( @@ -261,65 +295,84 @@ class Outputs: "hot_water_cost_current", "lighting_cost_current", "gas_standing_charge", - "electricity_standing_charge" + "electricity_standing_charge", ] ) # Formatting - Pre EPC is an enum mds["Pre EPC"] = [x.value for x in mds["Pre EPC"].values] - mds["Wall Type (Mandatory field)"] = mds["Wall Type (Mandatory field)"].str.split(",").str[0] + mds["Wall Type (Mandatory field)"] = ( + mds["Wall Type (Mandatory field)"].str.split(",").str[0] + ) # Remove average thermal transmittance field mds["Wall Type (Mandatory field)"] = np.where( - mds["Wall Type (Mandatory field)"].str.contains("Average thermal transmittance"), + mds["Wall Type (Mandatory field)"].str.contains( + "Average thermal transmittance" + ), "", - mds["Wall Type (Mandatory field)"] + mds["Wall Type (Mandatory field)"], ) mds = mds.merge( - pd.DataFrame(self.cleaned_epc_lookup["main-fuel"])[["clean_description", "fuel_type"]], + pd.DataFrame(self.cleaned_epc_lookup["main-fuel"])[ + ["clean_description", "fuel_type"] + ], left_on="mainfuel", right_on="clean_description", - how="left" + how="left", + ) + mds = mds.rename(columns={"fuel_type": "Existing Fuel Type"}).drop( + columns=["clean_description", "mainfuel"] ) - mds = mds.rename(columns={"fuel_type": "Existing Fuel Type"}).drop(columns=["clean_description", "mainfuel"]) mds["Existing Fuel Type"].value_counts() mds_output_by_scenario = {} for scenario_id in scenario_ids: - scenario_recommendations = recommendations_df[recommendations_df["Scenario ID"] == scenario_id] + scenario_recommendations = recommendations_df[ + recommendations_df["Scenario ID"] == scenario_id + ] # For each measure, we create the measure matrix - scenario_measure_matrix = self.make_mds_measure_matrix(scenario_recommendations) + scenario_measure_matrix = self.make_mds_measure_matrix( + scenario_recommendations + ) # Calculate the predicted impact on: SAP, heat demand, bills, kwh - recommendation_impacts = scenario_recommendations.groupby("property_id")[ - ["sap_points", "heat_demand", "kwh_savings", "energy_cost_savings"] - ].sum().reset_index() + recommendation_impacts = ( + scenario_recommendations.groupby("property_id")[ + ["sap_points", "heat_demand", "kwh_savings", "energy_cost_savings"] + ] + .sum() + .reset_index() + ) scenario_mds = mds.merge( scenario_measure_matrix, how="left", on="property_id" - ).merge( - recommendation_impacts, how="left", on="property_id" - ) + ).merge(recommendation_impacts, how="left", on="property_id") # If we have no recommendations, sap_points, kwh_savings, head_demand will be NaN to_clean = [c for c in recommendation_impacts.columns if c != "property_id"] for col in to_clean: scenario_mds[col].fillna(0, inplace=True) scenario_mds.fillna(0, inplace=True) - scenario_mds["Post SAP"] = scenario_mds["EPC Source"] + scenario_mds["sap_points"] + scenario_mds["Post SAP"] = ( + scenario_mds["EPC Source"] + scenario_mds["sap_points"] + ) # Round Post SAP down to the nearest integer scenario_mds["Post SAP"] = scenario_mds["Post SAP"].apply(lambda x: int(x)) - scenario_mds["Post EPC"] = scenario_mds["Post SAP"].apply(lambda x: sap_to_epc(x)) + scenario_mds["Post EPC"] = scenario_mds["Post SAP"].apply( + lambda x: sap_to_epc(x) + ) scenario_mds["Heating Demand Kwh/m2/y"] = ( - scenario_mds["Existing Heating Demand Kwh/m2/y"] - scenario_mds["heat_demand"] + scenario_mds["Existing Heating Demand Kwh/m2/y"] + - scenario_mds["heat_demand"] ) scenario_mds = scenario_mds.rename( columns={ "sap_points": "Predicted SAP Points", "kwh_savings": "Energy Saving (Kwh)", - "energy_cost_savings": "Bill Reduction (£ per yr)" + "energy_cost_savings": "Bill Reduction (£ per yr)", } ) @@ -330,7 +383,7 @@ class Outputs: save_excel_to_s3( df=scenario_mds, file_key=f"engine_outputs/{self.format}/{self.today}_scenario_id={scenario_id}.xlsx", - bucket_name="retrofit-data-dev" + bucket_name="retrofit-data-dev", ) def export(self): diff --git a/backend/app/db/functions/portfolio_functions.py b/backend/app/db/functions/portfolio_functions.py index fa97c206..ae48afed 100644 --- a/backend/app/db/functions/portfolio_functions.py +++ b/backend/app/db/functions/portfolio_functions.py @@ -1,5 +1,10 @@ from sqlalchemy import func -from backend.app.db.models.recommendations import Plan, PlanRecommendations, Recommendation, Scenario +from backend.app.db.models.recommendations import ( + PlanModel, + PlanRecommendations, + Recommendation, + ScenarioModel, +) def aggregate_portfolio_recommendations( @@ -8,7 +13,7 @@ def aggregate_portfolio_recommendations( scenario_id: int, total_valuation_increase: float, labour_days: float, - aggregated_data: dict + aggregated_data: dict, ): # Aggregate multiple fields aggregates = ( @@ -16,15 +21,20 @@ def aggregate_portfolio_recommendations( func.sum(Recommendation.estimated_cost).label("cost"), func.sum(Recommendation.total_work_hours).label("total_work_hours"), func.sum(Recommendation.kwh_savings).label("energy_savings"), - func.sum(Recommendation.co2_equivalent_savings).label("co2_equivalent_savings"), + func.sum(Recommendation.co2_equivalent_savings).label( + "co2_equivalent_savings" + ), func.sum(Recommendation.energy_cost_savings).label("energy_cost_savings"), ) - .join(PlanRecommendations, PlanRecommendations.recommendation_id == Recommendation.id) - .join(Plan, Plan.id == PlanRecommendations.plan_id) + .join( + PlanRecommendations, + PlanRecommendations.recommendation_id == Recommendation.id, + ) + .join(PlanModel, PlanModel.id == PlanRecommendations.plan_id) .filter( - Plan.portfolio_id == portfolio_id, - Plan.scenario_id == scenario_id, - Recommendation.default == True + PlanModel.portfolio_id == portfolio_id, + PlanModel.scenario_id == scenario_id, + Recommendation.default == True, ) .one() ) @@ -36,11 +46,11 @@ def aggregate_portfolio_recommendations( "energy_savings": aggregates.energy_savings or 0, "co2_equivalent_savings": aggregates.co2_equivalent_savings or 0, "energy_cost_savings": aggregates.energy_cost_savings or 0, - **aggregated_data + **aggregated_data, } # Get the scenario and update the fields. This data needs to be stored against the scenario, not the portfolio - portfolio_scenario = session.query(Scenario).filter_by(id=scenario_id).one() + portfolio_scenario = session.query(ScenarioModel).filter_by(id=scenario_id).one() # Update the data for key, value in aggregates_dict.items(): diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py index 54754ee0..5ff91909 100644 --- a/backend/app/db/functions/recommendations_functions.py +++ b/backend/app/db/functions/recommendations_functions.py @@ -4,11 +4,11 @@ from sqlalchemy import insert, delete from sqlalchemy.orm import Session from sqlalchemy.exc import SQLAlchemyError from backend.app.db.models.recommendations import ( - Plan, + PlanModel, Recommendation, RecommendationMaterials, PlanRecommendations, - Scenario, + ScenarioModel, ) from backend.app.db.models.portfolio import PropertyModel from backend.app.db.connection import db_session, db_read_session @@ -138,7 +138,7 @@ def create_plan(session: Session, plan): :param plan: dictionary of data representing a plan to be created """ try: - new_plan = Plan(**plan) + new_plan = PlanModel(**plan) session.add(new_plan) session.flush() session.commit() @@ -160,7 +160,9 @@ def bulk_create_plans(session: Session, plans_to_create: list[dict]) -> dict[int for p in plans_to_create ] - stmt = insert(Plan).values(payload).returning(Plan.id, Plan.property_id) + stmt = ( + insert(PlanModel).values(payload).returning(PlanModel.id, PlanModel.property_id) + ) result = session.execute(stmt).all() @@ -170,12 +172,14 @@ def bulk_create_plans(session: Session, plans_to_create: list[dict]) -> dict[int def create_scenario(session: Session, scenario: dict) -> int: existing_scenario = ( - session.query(Scenario).filter_by(portfolio_id=scenario["portfolio_id"]).first() + session.query(ScenarioModel) + .filter_by(portfolio_id=scenario["portfolio_id"]) + .first() ) scenario["is_default"] = not bool(existing_scenario) - new_scenario = Scenario(**scenario) + new_scenario = ScenarioModel(**scenario) session.add(new_scenario) session.flush() # ensures ID is populated @@ -578,7 +582,9 @@ def delete_portfolio_scenarios_if_empty(portfolio_id: int): return with db_session() as session: - session.execute(delete(Scenario).where(Scenario.portfolio_id == portfolio_id)) + session.execute( + delete(ScenarioModel).where(ScenarioModel.portfolio_id == portfolio_id) + ) print("Deleted scenarios for empty portfolio") @@ -611,11 +617,11 @@ def clear_portfolio_in_batches( print("Portfolio cleared in batches.") -def get_plans_by_portfolio_id(portfolio_id: int) -> List[Plan]: +def get_plans_by_portfolio_id(portfolio_id: int) -> List[PlanModel]: raise NotImplementedError -def get_scenario(scenario_id: int) -> List[Scenario]: +def get_scenario(scenario_id: int) -> List[ScenarioModel]: raise NotImplementedError diff --git a/backend/app/db/models/funding.py b/backend/app/db/models/funding.py index 6ea8364e..a7417e14 100644 --- a/backend/app/db/models/funding.py +++ b/backend/app/db/models/funding.py @@ -1,9 +1,18 @@ import enum -from sqlalchemy import Column, Integer, String, Float, Enum, TIMESTAMP, BigInteger, ForeignKey +from sqlalchemy import ( + Column, + Integer, + String, + Float, + Enum, + TIMESTAMP, + BigInteger, + ForeignKey, +) from sqlalchemy.orm import declarative_base from sqlalchemy.sql import func -from backend.app.db.models.recommendations import Plan +from backend.app.db.models.recommendations import PlanModel from backend.app.db.models.materials import MaterialType, Material Base = declarative_base() @@ -17,13 +26,17 @@ class SchemeEnum(enum.Enum): class FundingPackage(Base): - __tablename__ = 'funding_package' + __tablename__ = "funding_package" id = Column(Integer, primary_key=True, autoincrement=True) - plan_id = Column(BigInteger, ForeignKey(Plan.id), nullable=False) + plan_id = Column(BigInteger, ForeignKey(PlanModel.id), nullable=False) scheme = Column( - Enum(SchemeEnum, values_callable=lambda x: [e.value for e in x], create_constraint=False), - nullable=False + Enum( + SchemeEnum, + values_callable=lambda x: [e.value for e in x], + create_constraint=False, + ), + nullable=False, ) created_at = Column(TIMESTAMP, nullable=False, server_default=func.now()) project_funding = Column(Float) @@ -34,15 +47,23 @@ class FundingPackage(Base): class FundingPackageMeasures(Base): - __tablename__ = 'funding_package_measures' + __tablename__ = "funding_package_measures" id = Column(Integer, primary_key=True, autoincrement=True) - funding_package_id = Column(BigInteger, ForeignKey(FundingPackage.id), nullable=False) - measure = Column( - Enum(MaterialType, values_callable=lambda x: [e.value for e in x], create_constraint=False), - nullable=False + funding_package_id = Column( + BigInteger, ForeignKey(FundingPackage.id), nullable=False ) - material_id = Column(BigInteger, ForeignKey(Material.id), nullable=False) # Assuming material table exists + measure = Column( + Enum( + MaterialType, + values_callable=lambda x: [e.value for e in x], + create_constraint=False, + ), + nullable=False, + ) + material_id = Column( + BigInteger, ForeignKey(Material.id), nullable=False + ) # Assuming material table exists innovation_uplift = Column(Float) partial_project_score = Column(Float) uplift_project_score = Column(Float) diff --git a/backend/app/db/models/recommendations.py b/backend/app/db/models/recommendations.py index 36872394..759c088e 100644 --- a/backend/app/db/models/recommendations.py +++ b/backend/app/db/models/recommendations.py @@ -74,7 +74,7 @@ class PlanTypeEnum(enum.Enum): EXTRACTION_ECO = "extraction_eco" -class Plan(Base): +class PlanModel(Base): __tablename__ = "plan" id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True) @@ -139,7 +139,7 @@ class PlanRecommendations(Base): ) -class Scenario(Base): +class ScenarioModel(Base): __tablename__ = "scenario" id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True) diff --git a/backend/categorisation/categorisation_logic.py b/backend/categorisation/categorisation_logic.py index 503b3e54..f9503e50 100644 --- a/backend/categorisation/categorisation_logic.py +++ b/backend/categorisation/categorisation_logic.py @@ -1,12 +1,12 @@ from typing import List -from backend.app.db.models.recommendations import Plan +from backend.app.db.models.recommendations import PlanModel class CategorisationLogic: @staticmethod - def get_compliant_plans(plans: List[Plan]) -> List[Plan]: + def get_compliant_plans(plans: List[PlanModel]) -> List[PlanModel]: raise NotImplementedError @staticmethod - def get_cheapest_plan(plans: List[Plan]) -> Plan: + def get_cheapest_plan(plans: List[PlanModel]) -> PlanModel: raise NotImplementedError diff --git a/backend/categorisation/processor.py b/backend/categorisation/processor.py index 0c867267..53d7846c 100644 --- a/backend/categorisation/processor.py +++ b/backend/categorisation/processor.py @@ -5,24 +5,24 @@ from backend.app.db.functions.recommendations_functions import ( get_property_ids, set_plan_default, ) -from backend.app.db.models.recommendations import Plan +from backend.app.db.models.recommendations import PlanModel from backend.categorisation.categorisation_logic import CategorisationLogic def process_portfolio(portfolio_id: int) -> None: # Get all plans (including scenarios) for all properties in the portfolio - plans: List[Plan] = get_plans_by_portfolio_id(portfolio_id) + plans: List[PlanModel] = get_plans_by_portfolio_id(portfolio_id) # For each property, get all compliant plans property_ids: List[int] = get_property_ids(portfolio_id) # For each property, find the cheapest compliant plan for id in property_ids: - plans_for_property: List[Plan] = [ + plans_for_property: List[PlanModel] = [ plan for plan in plans if plan.property_id == id ] - compliant_plans_for_property: List[Plan] = ( + compliant_plans_for_property: List[PlanModel] = ( CategorisationLogic.get_compliant_plans(plans_for_property) ) diff --git a/etl/customers/l_and_g/ic_slides.py b/etl/customers/l_and_g/ic_slides.py index a5cb3511..de6edd49 100644 --- a/etl/customers/l_and_g/ic_slides.py +++ b/etl/customers/l_and_g/ic_slides.py @@ -41,7 +41,10 @@ epc_data = pd.read_csv( # Classify floor area in <73m2, 73-98, 99-200, 200+ epc_data["floor_area_bracket"] = epc_data["total_floor_area"].apply( - lambda x: "<73" if x < 73 else "73-98" if x < 99 else "99-200" if x < 200 else "200+") + lambda x: ( + "<73" if x < 73 else "73-98" if x < 99 else "99-200" if x < 200 else "200+" + ) +) # 73-98 185 # <73 156 @@ -65,7 +68,11 @@ import pandas as pd import numpy as np from sqlalchemy.orm import sessionmaker from backend.app.db.connection import db_engine -from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations +from backend.app.db.models.recommendations import ( + Recommendation, + PlanModel, + PlanRecommendations, +) from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel @@ -74,56 +81,79 @@ def get_data(portfolio_id, scenario_ids): session.begin() # Get properties and their details for a specific portfolio - properties_query = session.query( - PropertyModel, - PropertyDetailsEpcModel - ).join( - PropertyDetailsEpcModel, PropertyModel.id == PropertyDetailsEpcModel.property_id - ).filter( - PropertyModel.portfolio_id == portfolio_id # Filter by portfolio ID - ).all() + properties_query = ( + session.query(PropertyModel, PropertyDetailsEpcModel) + .join( + PropertyDetailsEpcModel, + PropertyModel.id == PropertyDetailsEpcModel.property_id, + ) + .filter(PropertyModel.portfolio_id == portfolio_id) # Filter by portfolio ID + .all() + ) # Transform properties data to include all fields dynamically properties_data = [ - {**{col.name: getattr(prop.PropertyModel, col.name) for col in PropertyModel.__table__.columns}, - **{col.name: getattr(prop.PropertyDetailsEpcModel, col.name) for col in - PropertyDetailsEpcModel.__table__.columns}} + { + **{ + col.name: getattr(prop.PropertyModel, col.name) + for col in PropertyModel.__table__.columns + }, + **{ + col.name: getattr(prop.PropertyDetailsEpcModel, col.name) + for col in PropertyDetailsEpcModel.__table__.columns + }, + } for prop in properties_query ] # Get property IDs from fetched properties # Get plans linked to the fetched properties - plans_query = session.query(Plan).filter(Plan.scenario_id.in_(scenario_ids)).all() + plans_query = ( + session.query(PlanModel).filter(PlanModel.scenario_id.in_(scenario_ids)).all() + ) # Transform plans data to include all fields dynamically plans_data = [ - {col.name: getattr(plan, col.name) for col in Plan.__table__.columns} + {col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns} for plan in plans_query ] # Extract plan IDs for filtering recommendations through PlanRecommendations - plan_ids = [plan['id'] for plan in plans_data] + plan_ids = [plan["id"] for plan in plans_data] # Get recommendations through PlanRecommendations for those plans and that are default - recommendations_query = session.query( - Recommendation, - Plan.scenario_id - ).join( - PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id - ).join( - Plan, Plan.id == PlanRecommendations.plan_id # Join with Plan to access scenario_id - ).filter( - PlanRecommendations.plan_id.in_(plan_ids), - Recommendation.default == True # Filtering for default recommendations - ).all() + recommendations_query = ( + session.query(Recommendation, PlanModel.scenario_id) + .join( + PlanRecommendations, + Recommendation.id == PlanRecommendations.recommendation_id, + ) + .join( + PlanModel, + PlanModel.id + == PlanRecommendations.plan_id, # Join with Plan to access scenario_id + ) + .filter( + PlanRecommendations.plan_id.in_(plan_ids), + Recommendation.default == True, # Filtering for default recommendations + ) + .all() + ) # Transform recommendations data to include all fields dynamically and include scenario_id recommendations_data = [ - {**{col.name: getattr(rec.Recommendation, col.name) if hasattr(rec, 'Recommendation') else getattr(rec, - col.name) for - col in Recommendation.__table__.columns}, - "Scenario ID": rec.scenario_id} + { + **{ + col.name: ( + getattr(rec.Recommendation, col.name) + if hasattr(rec, "Recommendation") + else getattr(rec, col.name) + ) + for col in Recommendation.__table__.columns + }, + "Scenario ID": rec.scenario_id, + } for rec in recommendations_query ] @@ -132,7 +162,9 @@ def get_data(portfolio_id, scenario_ids): return properties_data, plans_data, recommendations_data -properties_data, plans_data, recommendations_data = get_data(portfolio_id=124, scenario_ids=[205]) +properties_data, plans_data, recommendations_data = get_data( + portfolio_id=124, scenario_ids=[205] +) properties_df = pd.DataFrame(properties_data) plans_df = pd.DataFrame(plans_data) @@ -147,12 +179,12 @@ recommended_measures_df = recommended_measures_df.drop(columns=["default"]) post_install_sap = recommendations_df[["property_id", "default", "sap_points"]] post_install_sap = post_install_sap[post_install_sap["default"]] # Sum up the sap points by property id -post_install_sap = post_install_sap.groupby("property_id")[["sap_points"]].sum().reset_index() +post_install_sap = ( + post_install_sap.groupby("property_id")[["sap_points"]].sum().reset_index() +) recommendations_measures_pivot = recommended_measures_df.pivot( - index='property_id', - columns='measure_type', - values='estimated_cost' + index="property_id", columns="measure_type", values="estimated_cost" ) recommendations_measures_pivot = recommendations_measures_pivot.reset_index() @@ -163,7 +195,7 @@ recommendations_measures_pivot = recommendations_measures_pivot.rename( "double_glazing": "Cost: Double Glazing", "loft_insulation": "Cost: Loft Insulation", "mechanical_ventilation": "Cost: Ventilation", - "solar_pv": "Cost: Solar PV" + "solar_pv": "Cost: Solar PV", } ) recommendations_measures_pivot = recommendations_measures_pivot.fillna(0) @@ -186,16 +218,26 @@ recommendations_measures_pivot["Recommendation: Solar PV"] = ( recommendations_measures_pivot["Cost: Solar PV"] > 0 ) -df = properties_df[ - [ - "property_id", "uprn", "address", "postcode", "property_type", "walls", "roof", "heating", "windows", - "current_epc_rating", - "current_sap_points", "total_floor_area", "number_of_rooms", +df = ( + properties_df[ + [ + "property_id", + "uprn", + "address", + "postcode", + "property_type", + "walls", + "roof", + "heating", + "windows", + "current_epc_rating", + "current_sap_points", + "total_floor_area", + "number_of_rooms", + ] ] -].merge( - recommendations_measures_pivot, how="left", on="property_id" -).merge( - post_install_sap, how="left", on="property_id" + .merge(recommendations_measures_pivot, how="left", on="property_id") + .merge(post_install_sap, how="left", on="property_id") ) df = df.drop(columns=["property_id"]) @@ -222,25 +264,36 @@ df["Has Recommendations"] = ~pd.isnull(df["Cost: Air Source Heat Pump"]) # We fill missings: for col in [ - "Recommendation: Air Source Heat Pump", "Recommendation: Cavity Wall Insulation", - "Recommendation: Double Glazing", "Recommendation: Loft Insulation", "Recommendation: Ventilation", - "Recommendation: Solar PV" + "Recommendation: Air Source Heat Pump", + "Recommendation: Cavity Wall Insulation", + "Recommendation: Double Glazing", + "Recommendation: Loft Insulation", + "Recommendation: Ventilation", + "Recommendation: Solar PV", ]: df[col] = df[col].fillna(False) for col in [ - "Cost: Air Source Heat Pump", "Cost: Cavity Wall Insulation", - "Cost: Double Glazing", "Cost: Loft Insulation", "Cost: Ventilation", - "Cost: Solar PV" + "Cost: Air Source Heat Pump", + "Cost: Cavity Wall Insulation", + "Cost: Double Glazing", + "Cost: Loft Insulation", + "Cost: Ventilation", + "Cost: Solar PV", ]: df[col] = df[col].fillna(0) # Calculate post SAP df["Predicted Post Works SAP"] = df["Current SAP Points"] + df["sap_points"] df["Predicted Post Works SAP"] = df["Predicted Post Works SAP"].round() -df["Predicted Post Works EPC"] = df["Predicted Post Works SAP"].apply(lambda x: sap_to_epc(x)) +df["Predicted Post Works EPC"] = df["Predicted Post Works SAP"].apply( + lambda x: sap_to_epc(x) +) df["Recommendation: Air Source Heat Pump"].sum() df["Cost: Air Source Heat Pump"].sum() -df.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/L&G/Basildon Data Export - 2.csv", index=False) +df.to_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/L&G/Basildon Data Export - 2.csv", + index=False, +) diff --git a/etl/customers/mod/pilot/2. Create Excel Model.py b/etl/customers/mod/pilot/2. Create Excel Model.py index 9a9eda86..810ab661 100644 --- a/etl/customers/mod/pilot/2. Create Excel Model.py +++ b/etl/customers/mod/pilot/2. Create Excel Model.py @@ -4,7 +4,11 @@ import numpy as np from backend.app.utils import sap_to_epc from sqlalchemy.orm import sessionmaker from backend.app.db.connection import db_engine -from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations +from backend.app.db.models.recommendations import ( + Recommendation, + PlanModel, + PlanRecommendations, +) from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel @@ -13,56 +17,79 @@ def get_data(portfolio_id, scenario_ids): session.begin() # Get properties and their details for a specific portfolio - properties_query = session.query( - PropertyModel, - PropertyDetailsEpcModel - ).join( - PropertyDetailsEpcModel, PropertyModel.id == PropertyDetailsEpcModel.property_id - ).filter( - PropertyModel.portfolio_id == portfolio_id # Filter by portfolio ID - ).all() + properties_query = ( + session.query(PropertyModel, PropertyDetailsEpcModel) + .join( + PropertyDetailsEpcModel, + PropertyModel.id == PropertyDetailsEpcModel.property_id, + ) + .filter(PropertyModel.portfolio_id == portfolio_id) # Filter by portfolio ID + .all() + ) # Transform properties data to include all fields dynamically properties_data = [ - {**{col.name: getattr(prop.PropertyModel, col.name) for col in PropertyModel.__table__.columns}, - **{col.name: getattr(prop.PropertyDetailsEpcModel, col.name) for col in - PropertyDetailsEpcModel.__table__.columns}} + { + **{ + col.name: getattr(prop.PropertyModel, col.name) + for col in PropertyModel.__table__.columns + }, + **{ + col.name: getattr(prop.PropertyDetailsEpcModel, col.name) + for col in PropertyDetailsEpcModel.__table__.columns + }, + } for prop in properties_query ] # Get property IDs from fetched properties # Get plans linked to the fetched properties - plans_query = session.query(Plan).filter(Plan.scenario_id.in_(scenario_ids)).all() + plans_query = ( + session.query(PlanModel).filter(PlanModel.scenario_id.in_(scenario_ids)).all() + ) # Transform plans data to include all fields dynamically plans_data = [ - {col.name: getattr(plan, col.name) for col in Plan.__table__.columns} + {col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns} for plan in plans_query ] # Extract plan IDs for filtering recommendations through PlanRecommendations - plan_ids = [plan['id'] for plan in plans_data] + plan_ids = [plan["id"] for plan in plans_data] # Get recommendations through PlanRecommendations for those plans and that are default - recommendations_query = session.query( - Recommendation, - Plan.scenario_id - ).join( - PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id - ).join( - Plan, Plan.id == PlanRecommendations.plan_id # Join with Plan to access scenario_id - ).filter( - PlanRecommendations.plan_id.in_(plan_ids), - Recommendation.default == True # Filtering for default recommendations - ).all() + recommendations_query = ( + session.query(Recommendation, PlanModel.scenario_id) + .join( + PlanRecommendations, + Recommendation.id == PlanRecommendations.recommendation_id, + ) + .join( + PlanModel, + PlanModel.id + == PlanRecommendations.plan_id, # Join with Plan to access scenario_id + ) + .filter( + PlanRecommendations.plan_id.in_(plan_ids), + Recommendation.default == True, # Filtering for default recommendations + ) + .all() + ) # Transform recommendations data to include all fields dynamically and include scenario_id recommendations_data = [ - {**{col.name: getattr(rec.Recommendation, col.name) if hasattr(rec, 'Recommendation') - else getattr(rec, col.name) for - col in Recommendation.__table__.columns}, - "Scenario ID": rec.scenario_id} + { + **{ + col.name: ( + getattr(rec.Recommendation, col.name) + if hasattr(rec, "Recommendation") + else getattr(rec, col.name) + ) + for col in Recommendation.__table__.columns + }, + "Scenario ID": rec.scenario_id, + } for rec in recommendations_query ] @@ -94,16 +121,34 @@ def app(): ) property_asset_data = properties_df.merge( - mod_property_data.drop(columns=["address", "postcode", "tenure"]), how="left", on="uprn" + mod_property_data.drop(columns=["address", "postcode", "tenure"]), + how="left", + on="uprn", ) - property_asset_data["is_pitched"] = property_asset_data["roof"].str.contains("pitched", case=False) + property_asset_data["is_pitched"] = property_asset_data["roof"].str.contains( + "pitched", case=False + ) property_asset_data["pre_1970"] = property_asset_data["BUILD_YEAR"] < 1970 - property_asset_data["wall_type"] = property_asset_data["walls"].str.split(" ").str[0].str.strip() - property_asset_data["is_insulated"] = ( - property_asset_data["walls"].str.split(",").str[1].str.strip().isin( - ["filled cavity", "with external insulation", "filled cavity and external insulation"] - ) | property_asset_data["walls"].str.split(",").str[2].str.strip().isin(["insulated"]) + property_asset_data["wall_type"] = ( + property_asset_data["walls"].str.split(" ").str[0].str.strip() + ) + property_asset_data["is_insulated"] = property_asset_data["walls"].str.split( + "," + ).str[1].str.strip().isin( + [ + "filled cavity", + "with external insulation", + "filled cavity and external insulation", + ] + ) | property_asset_data[ + "walls" + ].str.split( + "," + ).str[ + 2 + ].str.strip().isin( + ["insulated"] ) property_asset_data["is_insulated"] = np.where( property_asset_data["is_insulated"], "Insulated", "Uninsulated" @@ -115,18 +160,26 @@ def app(): property_asset_data["pre_1970"], "Pre 1970", "Post 1970" ) - archetype_variables = ["property_type", "wall_type", "is_insulated", "is_pitched", "pre_1970"] + archetype_variables = [ + "property_type", + "wall_type", + "is_insulated", + "is_pitched", + "pre_1970", + ] assigned_archetypes = ( - property_asset_data.groupby( - archetype_variables - ).size().reset_index().rename(columns={0: "n_properties"}).sort_values("n_properties", ascending=False) + property_asset_data.groupby(archetype_variables) + .size() + .reset_index() + .rename(columns={0: "n_properties"}) + .sort_values("n_properties", ascending=False) ) # Make the archetype ID a concatenation of the variables - assigned_archetypes["archetype_id"] = assigned_archetypes[archetype_variables].apply( - lambda x: "_".join(x.astype(str)), axis=1 - ) + assigned_archetypes["archetype_id"] = assigned_archetypes[ + archetype_variables + ].apply(lambda x: "_".join(x.astype(str)), axis=1) # Most prominent archetypes prominent_archetypes = assigned_archetypes.head(6) @@ -136,7 +189,7 @@ def app(): property_asset_data = property_asset_data.merge( assigned_archetypes[archetype_variables + ["archetype_id"]], how="left", - on=archetype_variables + on=archetype_variables, ) # Create age bands: @@ -148,7 +201,7 @@ def app(): property_asset_data["age_band"] = pd.cut( property_asset_data["BUILD_YEAR"], bins=[1959, 1969, 1979, 1989, 1999, 2022], - labels=["1960-1969", "1970-1979", "1980-1989", "1990-1999", "2000+"] + labels=["1960-1969", "1970-1979", "1980-1989", "1990-1999", "2000+"], ) # Create floor area bands @@ -159,47 +212,59 @@ def app(): property_asset_data["floor_area_band"] = pd.cut( property_asset_data["total_floor_area"], bins=[0, 73, 97, 199, 10000], - labels=["0-73", "74-97", "98-199", "200+"] + labels=["0-73", "74-97", "98-199", "200+"], ) property_asset_data["archetype_group"] = property_asset_data["archetype_id"].copy() property_asset_data["archetype_group"] = np.where( - property_asset_data["archetype_id"].isin(other_archetypes["archetype_id"].values), + property_asset_data["archetype_id"].isin( + other_archetypes["archetype_id"].values + ), "other", - property_asset_data["archetype_group"] + property_asset_data["archetype_group"], ) # For colour wall_types = ( - property_asset_data[["wall_type"]].value_counts().to_frame().reset_index().rename( - columns={"wall_type": "Wall Type"} - ) + property_asset_data[["wall_type"]] + .value_counts() + .to_frame() + .reset_index() + .rename(columns={"wall_type": "Wall Type"}) ) # Group into age bands ages = ( - property_asset_data[["age_band"]].value_counts() + property_asset_data[["age_band"]] + .value_counts() .to_frame() - .reset_index().sort_values("age_band", ascending=True) + .reset_index() + .sort_values("age_band", ascending=True) .rename(columns={"age_band": "Age Band"}) ) floor_area_bands = ( - property_asset_data[["floor_area_band"]].value_counts() + property_asset_data[["floor_area_band"]] + .value_counts() .to_frame() - .reset_index().sort_values("floor_area_band", ascending=True) + .reset_index() + .sort_values("floor_area_band", ascending=True) .rename(columns={"floor_area_band": "Floor Area Band"}) ) archetype_counts = ( - property_asset_data[["archetype_group"]]. - value_counts(). - to_frame(). - reset_index() + property_asset_data[["archetype_group"]] + .value_counts() + .to_frame() + .reset_index() .rename(columns={"archetype_group": "Archetype"}) ) property_types = ( - (property_asset_data["property_type"] + ": " + property_asset_data["built_form"]). - value_counts(). - to_frame(). - reset_index() + ( + property_asset_data["property_type"] + + ": " + + property_asset_data["built_form"] + ) + .value_counts() + .to_frame() + .reset_index() .rename(columns={"index": "Property Type", 0: "Count"}) ) @@ -217,18 +282,24 @@ def app(): totals = property_asset_data[ [ "Total_household_members", - "co2_emissions", "current_energy_demand", "current_energy_demand_heating_hotwater", - "heating_cost_current", "hot_water_cost_current", "lighting_cost_current", - "appliances_cost_current", "gas_standing_charge", "electricity_standing_charge" + "co2_emissions", + "current_energy_demand", + "current_energy_demand_heating_hotwater", + "heating_cost_current", + "hot_water_cost_current", + "lighting_cost_current", + "appliances_cost_current", + "gas_standing_charge", + "electricity_standing_charge", ] ].copy() totals["total_cost"] = ( - totals["heating_cost_current"] + - totals["hot_water_cost_current"] + - totals["lighting_cost_current"] + - totals["appliances_cost_current"] + - totals["gas_standing_charge"] + - totals["electricity_standing_charge"] + totals["heating_cost_current"] + + totals["hot_water_cost_current"] + + totals["lighting_cost_current"] + + totals["appliances_cost_current"] + + totals["gas_standing_charge"] + + totals["electricity_standing_charge"] ) print( totals[ @@ -259,38 +330,59 @@ def app(): scenario_recommendations_df = recommendations_df[ recommendations_df["Scenario ID"] == scenario - ].copy() + ].copy() - scenario_recommendations_df["contingency"] = contingency * scenario_recommendations_df["estimated_cost"] + scenario_recommendations_df["contingency"] = ( + contingency * scenario_recommendations_df["estimated_cost"] + ) scenario_recommendations_df["total_cost"] = ( - scenario_recommendations_df["estimated_cost"] + scenario_recommendations_df["contingency"] + scenario_recommendations_df["estimated_cost"] + + scenario_recommendations_df["contingency"] ) recommended_measures_df = scenario_recommendations_df[ ["property_id", "measure_type", "estimated_cost", "default"] ] - recommended_measures_df = recommended_measures_df[recommended_measures_df["default"]] + recommended_measures_df = recommended_measures_df[ + recommended_measures_df["default"] + ] recommended_measures_df = recommended_measures_df.drop(columns=["default"]) # Metrics by property ID aggregated_metrics = scenario_recommendations_df[ [ - "property_id", "type", "default", "sap_points", - "energy_cost_savings", "kwh_savings", "co2_equivalent_savings", "estimated_cost", "contingency", - "total_cost" + "property_id", + "type", + "default", + "sap_points", + "energy_cost_savings", + "kwh_savings", + "co2_equivalent_savings", + "estimated_cost", + "contingency", + "total_cost", ] ] aggregated_metrics = aggregated_metrics[aggregated_metrics["default"]] - aggregated_metrics = aggregated_metrics.groupby("property_id")[ - ["sap_points", "co2_equivalent_savings", "energy_cost_savings", "kwh_savings", "estimated_cost", - "total_cost", "contingency"] - ].sum().reset_index() + aggregated_metrics = ( + aggregated_metrics.groupby("property_id")[ + [ + "sap_points", + "co2_equivalent_savings", + "energy_cost_savings", + "kwh_savings", + "estimated_cost", + "total_cost", + "contingency", + ] + ] + .sum() + .reset_index() + ) recommendations_measures_pivot = recommended_measures_df.pivot( - index='property_id', - columns='measure_type', - values='estimated_cost' + index="property_id", columns="measure_type", values="estimated_cost" ) recommendations_measures_pivot = recommendations_measures_pivot.reset_index() recommendations_measures_pivot = recommendations_measures_pivot.fillna(0) @@ -299,30 +391,58 @@ def app(): for c in recommendations_measures_pivot.columns: if c == "property_id": continue - recommendations_measures_pivot["Recommendation: " + c] = recommendations_measures_pivot[c] > 0 + recommendations_measures_pivot["Recommendation: " + c] = ( + recommendations_measures_pivot[c] > 0 + ) # We now create a final output - df = properties_df[ - [ - "property_id", "uprn", "address", "postcode", "property_type", "walls", "roof", "heating", "windows", - "current_epc_rating", "current_sap_points", "total_floor_area", "number_of_rooms", - "co2_emissions", "current_energy_demand", "current_energy_demand_heating_hotwater", - "heating_cost_current", "hot_water_cost_current", "lighting_cost_current", - "appliances_cost_current", "gas_standing_charge", "electricity_standing_charge" + df = ( + properties_df[ + [ + "property_id", + "uprn", + "address", + "postcode", + "property_type", + "walls", + "roof", + "heating", + "windows", + "current_epc_rating", + "current_sap_points", + "total_floor_area", + "number_of_rooms", + "co2_emissions", + "current_energy_demand", + "current_energy_demand_heating_hotwater", + "heating_cost_current", + "hot_water_cost_current", + "lighting_cost_current", + "appliances_cost_current", + "gas_standing_charge", + "electricity_standing_charge", + ] ] - ].merge( - recommendations_measures_pivot, how="left", on="property_id" - ).merge( - aggregated_metrics, how="left", on="property_id" + .merge(recommendations_measures_pivot, how="left", on="property_id") + .merge(aggregated_metrics, how="left", on="property_id") ) df["bills_total_cost"] = ( - df["heating_cost_current"] + df["hot_water_cost_current"] + df["lighting_cost_current"] + - df["appliances_cost_current"] + df["gas_standing_charge"] + df["electricity_standing_charge"] + df["heating_cost_current"] + + df["hot_water_cost_current"] + + df["lighting_cost_current"] + + df["appliances_cost_current"] + + df["gas_standing_charge"] + + df["electricity_standing_charge"] ) df = df.drop(columns=["property_id"]) - for c in ["sap_points", "co2_equivalent_savings", "energy_cost_savings", "kwh_savings"]: + for c in [ + "sap_points", + "co2_equivalent_savings", + "energy_cost_savings", + "kwh_savings", + ]: df[c] = df[c].fillna(0) df = df.rename( @@ -345,16 +465,23 @@ def app(): # Calculate post SAP df["Predicted Post Works SAP"] = df["Current SAP Points"] + df["sap_points"] df["Predicted Post Works SAP"] = df["Predicted Post Works SAP"].round() - df["Predicted Post Works EPC"] = df["Predicted Post Works SAP"].apply(lambda x: sap_to_epc(x)) + df["Predicted Post Works EPC"] = df["Predicted Post Works SAP"].apply( + lambda x: sap_to_epc(x) + ) # Calculate the relative savings on carbon, kwh, and bills - df["relative_carbon_savings"] = df["co2_equivalent_savings"] / df["co2_emissions"] + df["relative_carbon_savings"] = ( + df["co2_equivalent_savings"] / df["co2_emissions"] + ) df["relative_kwh_savings"] = df["kwh_savings"] / df["current_energy_demand"] df["relative_bill_savings"] = df["energy_cost_savings"] / df["bills_total_cost"] # Add on the archetype df = df.merge( - property_asset_data[["uprn", "archetype_group"]], how="left", left_on="UPRN", right_on="uprn" + property_asset_data[["uprn", "archetype_group"]], + how="left", + left_on="UPRN", + right_on="uprn", ) # For properties that don't make it to EPC B, check why. E.g. for a property that has an oil boiler, it @@ -387,7 +514,9 @@ def app(): printing_scenario_id = scenario_ids[0] # EPC breakdown - print(scenario_data[printing_scenario_id]['Predicted Post Works EPC'].value_counts()) + print( + scenario_data[printing_scenario_id]["Predicted Post Works EPC"].value_counts() + ) # Cost # Total cost print(scenario_data[printing_scenario_id]["total_cost"].sum()) @@ -408,16 +537,24 @@ def app(): measure_details = {} for scenario in scenario_ids: measure_details[scenario] = {} - recommendation_cols = [c for c in scenario_data[scenario].columns if "Recommendation:" in c] - measure_details[scenario]["count"] = scenario_data[scenario][recommendation_cols].sum().to_dict() + recommendation_cols = [ + c for c in scenario_data[scenario].columns if "Recommendation:" in c + ] + measure_details[scenario]["count"] = ( + scenario_data[scenario][recommendation_cols].sum().to_dict() + ) # Get average cost per measure measure_columns = [ - c.split("Recommendation: ")[1] for c in scenario_data[scenario].columns if "Recommendation:" in c + c.split("Recommendation: ")[1] + for c in scenario_data[scenario].columns + if "Recommendation:" in c ] # Take the mean, drop zero columns measure_costs = {} for m in measure_columns: - measure_costs[m] = float(scenario_data[scenario][scenario_data[scenario][m] > 0][m].mean()) + measure_costs[m] = float( + scenario_data[scenario][scenario_data[scenario][m] > 0][m].mean() + ) measure_details[scenario]["cost_per_measure"] = measure_costs pprint(measure_details[scenario_ids[0]]["count"]) @@ -452,12 +589,27 @@ def app(): for scenario in scenario_ids: df = scenario_data[scenario].copy() - avg_savings = df[ - ["sap_points", "co2_equivalent_savings", "energy_cost_savings", "kwh_savings", "estimated_cost", - "total_cost", "contingency"] - ].mean().to_dict() - avg_savings["cost_per_sap_point"] = avg_savings["total_cost"] / avg_savings["sap_points"] - avg_savings["cost_per_carbon"] = avg_savings["total_cost"] / avg_savings["co2_equivalent_savings"] + avg_savings = ( + df[ + [ + "sap_points", + "co2_equivalent_savings", + "energy_cost_savings", + "kwh_savings", + "estimated_cost", + "total_cost", + "contingency", + ] + ] + .mean() + .to_dict() + ) + avg_savings["cost_per_sap_point"] = ( + avg_savings["total_cost"] / avg_savings["sap_points"] + ) + avg_savings["cost_per_carbon"] = ( + avg_savings["total_cost"] / avg_savings["co2_equivalent_savings"] + ) scenario_metrics[scenario] = avg_savings pprint(scenario_metrics[scenario_ids[0]]) @@ -465,11 +617,11 @@ def app(): scenario_data[scenario_ids[0]]["loft_insulation"][ scenario_data[scenario_ids[0]]["loft_insulation"] > 0 - ].mean() + ].mean() scenario_data[scenario_ids[0]]["cavity_wall_insulation"][ scenario_data[scenario_ids[0]]["cavity_wall_insulation"] > 0 - ].mean() + ].mean() # Testing checking floor risk @@ -477,11 +629,7 @@ def app(): def get_flood_risk(lat, lon, radius_km=1): url = "https://environment.data.gov.uk/flood-monitoring/id/floods" - params = { - 'lat': lat, - 'long': lon, - 'dist': radius_km # search radius in km - } + params = {"lat": lat, "long": lon, "dist": radius_km} # search radius in km response = requests.get(url, params=params) response.raise_for_status() @@ -495,20 +643,19 @@ def app(): print(f"{len(flood_warnings)} warning(s) found near the location:") for warning in flood_warnings: print(f"- Area: {warning.get('description')}") - print(f" Severity: {warning.get('severity')} (Level {warning.get('severityLevel')})") + print( + f" Severity: {warning.get('severity')} (Level {warning.get('severityLevel')})" + ) print(f" Message changed at: {warning.get('timeMessageChanged')}") print() return flood_warnings from shapely.geometry import shape, Point + def get_flood_areas_near_point(lat, lon, radius_km=2): url = "https://environment.data.gov.uk/flood-monitoring/id/floodAreas" - params = { - 'lat': lat, - 'long': lon, - 'dist': radius_km - } + params = {"lat": lat, "long": lon, "dist": radius_km} response = requests.get(url, params=params) response.raise_for_status() @@ -531,7 +678,7 @@ def app(): if not features: continue - flood_polygon = shape(features[0]['geometry']) + flood_polygon = shape(features[0]["geometry"]) try: is_inside = flood_polygon.contains(point) @@ -539,12 +686,17 @@ def app(): is_inside = False if is_inside: - print(f"📍 Point is inside flood area: {area['label']} ({area['notation']})") + print( + f"📍 Point is inside flood area: {area['label']} ({area['notation']})" + ) return area from tqdm import tqdm + floor_warnings_data = [] - for _, property in tqdm(property_asset_data.iterrows(), total=len(property_asset_data)): + for _, property in tqdm( + property_asset_data.iterrows(), total=len(property_asset_data) + ): # warnings = floor_warnings_data.extend( # get_flood_risk(lat=property["LATITUDE"], lon=property["LONGITUDE"], radius_km=1) # ) @@ -556,7 +708,7 @@ def app(): "uprn": property["uprn"], "address": property["address"], "postcode": property["postcode"], - "area": resp + "area": resp, } ) continue @@ -570,7 +722,7 @@ def app(): "House_Cavity_Uninsulated_Pitched roof_Post 1970", "other", "House_System_Uninsulated_Pitched roof_Pre 1970", - "House_Solid_Uninsulated_Not Pitched Roof_Pre 1970" + "House_Solid_Uninsulated_Not Pitched Roof_Pre 1970", ] values = [62, 36, 21, 16, 16, 4, 2] @@ -582,36 +734,39 @@ def app(): "Cavity wall insulation, ventilation", "Bespoke retrofit measures", "External wall insulation, roof insulation", - "Flat roof insulation, internal wall insulation" + "Flat roof insulation, internal wall insulation", ] - fig = go.Figure(go.Treemap( - labels=labels, - parents=[""] * len(labels), # No root - values=values, - hovertext=hovertext, - hoverinfo="text", - textinfo="none", - marker=dict( - line=dict(color="white", width=4), - colors=values, - colorscale="Blues" + fig = go.Figure( + go.Treemap( + labels=labels, + parents=[""] * len(labels), # No root + values=values, + hovertext=hovertext, + hoverinfo="text", + textinfo="none", + marker=dict( + line=dict(color="white", width=4), colors=values, colorscale="Blues" + ), ) - )) + ) fig.update_layout( - margin=dict(t=10, l=10, r=10, b=10), - plot_bgcolor="white", - paper_bgcolor="white" + margin=dict(t=10, l=10, r=10, b=10), plot_bgcolor="white", paper_bgcolor="white" ) fig.show() # Get the recommended measures by scenario id - recommendation_cols = [c for c in scenario_data[scenario_ids[1]].columns if "Recommendation:" in c] - measure_counts_by_scenario = scenario_data[scenario_ids[1]].groupby("archetype_group")[ - recommendation_cols - ].sum().reset_index() + recommendation_cols = [ + c for c in scenario_data[scenario_ids[1]].columns if "Recommendation:" in c + ] + measure_counts_by_scenario = ( + scenario_data[scenario_ids[1]] + .groupby("archetype_group")[recommendation_cols] + .sum() + .reset_index() + ) measure_counts_by_scenario.to_csv( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MOD/Pilot Programme/measure_counts_by_scenario.csv" @@ -630,15 +785,13 @@ def app(): to_append = {"uprn": uprn} for _id in scenario_ids: - scenario = scenario_data[_id][ - scenario_data[_id]["uprn"] == uprn - ].squeeze() + scenario = scenario_data[_id][scenario_data[_id]["uprn"] == uprn].squeeze() val = PropertyValuation.estimate_valuation_improvement( current_value=x["valuation"], current_epc=scenario["Current EPC Rating"].value, target_epc=scenario["Predicted Post Works EPC"], - total_cost=None + total_cost=None, ) to_append[_id] = val["average_increase"] diff --git a/etl/customers/newhaven/slides.py b/etl/customers/newhaven/slides.py index 45108fec..efedb844 100644 --- a/etl/customers/newhaven/slides.py +++ b/etl/customers/newhaven/slides.py @@ -3,7 +3,12 @@ import pandas as pd import numpy as np from sqlalchemy.orm import sessionmaker from backend.app.db.connection import db_engine -from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations, Scenario +from backend.app.db.models.recommendations import ( + Recommendation, + PlanModel, + PlanRecommendations, + ScenarioModel, +) from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel from utils.s3 import read_csv_from_s3 @@ -13,56 +18,79 @@ def get_data(portfolio_id, scenario_ids): session.begin() # Get properties and their details for a specific portfolio - properties_query = session.query( - PropertyModel, - PropertyDetailsEpcModel - ).join( - PropertyDetailsEpcModel, PropertyModel.id == PropertyDetailsEpcModel.property_id - ).filter( - PropertyModel.portfolio_id == portfolio_id # Filter by portfolio ID - ).all() + properties_query = ( + session.query(PropertyModel, PropertyDetailsEpcModel) + .join( + PropertyDetailsEpcModel, + PropertyModel.id == PropertyDetailsEpcModel.property_id, + ) + .filter(PropertyModel.portfolio_id == portfolio_id) # Filter by portfolio ID + .all() + ) # Transform properties data to include all fields dynamically properties_data = [ - {**{col.name: getattr(prop.PropertyModel, col.name) for col in PropertyModel.__table__.columns}, - **{col.name: getattr(prop.PropertyDetailsEpcModel, col.name) for col in - PropertyDetailsEpcModel.__table__.columns}} + { + **{ + col.name: getattr(prop.PropertyModel, col.name) + for col in PropertyModel.__table__.columns + }, + **{ + col.name: getattr(prop.PropertyDetailsEpcModel, col.name) + for col in PropertyDetailsEpcModel.__table__.columns + }, + } for prop in properties_query ] # Get property IDs from fetched properties # Get plans linked to the fetched properties - plans_query = session.query(Plan).filter(Plan.scenario_id.in_(scenario_ids)).all() + plans_query = ( + session.query(PlanModel).filter(PlanModel.scenario_id.in_(scenario_ids)).all() + ) # Transform plans data to include all fields dynamically plans_data = [ - {col.name: getattr(plan, col.name) for col in Plan.__table__.columns} + {col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns} for plan in plans_query ] # Extract plan IDs for filtering recommendations through PlanRecommendations - plan_ids = [plan['id'] for plan in plans_data] + plan_ids = [plan["id"] for plan in plans_data] # Get recommendations through PlanRecommendations for those plans and that are default - recommendations_query = session.query( - Recommendation, - Plan.scenario_id - ).join( - PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id - ).join( - Plan, Plan.id == PlanRecommendations.plan_id # Join with Plan to access scenario_id - ).filter( - PlanRecommendations.plan_id.in_(plan_ids), - Recommendation.default == True # Filtering for default recommendations - ).all() + recommendations_query = ( + session.query(Recommendation, PlanModel.scenario_id) + .join( + PlanRecommendations, + Recommendation.id == PlanRecommendations.recommendation_id, + ) + .join( + PlanModel, + PlanModel.id + == PlanRecommendations.plan_id, # Join with Plan to access scenario_id + ) + .filter( + PlanRecommendations.plan_id.in_(plan_ids), + Recommendation.default == True, # Filtering for default recommendations + ) + .all() + ) # Transform recommendations data to include all fields dynamically and include scenario_id recommendations_data = [ - {**{col.name: getattr(rec.Recommendation, col.name) if hasattr(rec, 'Recommendation') else getattr(rec, - col.name) for - col in Recommendation.__table__.columns}, - "Scenario ID": rec.scenario_id} + { + **{ + col.name: ( + getattr(rec.Recommendation, col.name) + if hasattr(rec, "Recommendation") + else getattr(rec, col.name) + ) + for col in Recommendation.__table__.columns + }, + "Scenario ID": rec.scenario_id, + } for rec in recommendations_query ] @@ -71,7 +99,9 @@ def get_data(portfolio_id, scenario_ids): return properties_data, plans_data, recommendations_data -def estimate_post_retrofit_heating_hotwater_kwh(properties_df, recommendations_df, scenario_ids): +def estimate_post_retrofit_heating_hotwater_kwh( + properties_df, recommendations_df, scenario_ids +): # properties_starting_with_electric_heating = properties_df[ # properties_df["mainfuel"].isin( # ["Electricity not community", "Electricity electricity unspecified tariff"] @@ -85,20 +115,29 @@ def estimate_post_retrofit_heating_hotwater_kwh(properties_df, recommendations_d for scenario_id in scenario_ids: # Get the recommendations for the scenario, default scenario_recommendations = recommendations_df[ - (recommendations_df["Scenario ID"] == scenario_id) & - (recommendations_df["default"] == True) - ].copy() + (recommendations_df["Scenario ID"] == scenario_id) + & (recommendations_df["default"] == True) + ].copy() - scenario_recommendations['ligting_kwh'] = scenario_recommendations.apply( - lambda x: x['kwh_savings'] if x['type'] == 'low_energy_lighting' else 0, - axis=1) - scenario_recommendations['solar_kwh'] = scenario_recommendations.apply( - lambda x: x['kwh_savings'] if x['type'] == 'solar_pv' else 0, axis=1) + scenario_recommendations["ligting_kwh"] = scenario_recommendations.apply( + lambda x: x["kwh_savings"] if x["type"] == "low_energy_lighting" else 0, + axis=1, + ) + scenario_recommendations["solar_kwh"] = scenario_recommendations.apply( + lambda x: x["kwh_savings"] if x["type"] == "solar_pv" else 0, axis=1 + ) # Set 'Estimated Kwh Savings' to zero where specific kwh columns are used - scenario_recommendations['Estimated Kwh Savings'] = scenario_recommendations.apply( - lambda x: 0 if x['type'] in ['low_energy_lighting', 'solar_pv'] else x[ - 'kwh_savings'], axis=1) + scenario_recommendations["Estimated Kwh Savings"] = ( + scenario_recommendations.apply( + lambda x: ( + 0 + if x["type"] in ["low_energy_lighting", "solar_pv"] + else x["kwh_savings"] + ), + axis=1, + ) + ) # We need to determine if any of the properties start with electric heating or end with it # property_electric_heating = [] @@ -112,51 +151,76 @@ def estimate_post_retrofit_heating_hotwater_kwh(properties_df, recommendations_d # property_electric_heating.append(pid) # continue - grouped_data = scenario_recommendations.groupby(['property_id']).agg({ - 'Estimated Kwh Savings': 'sum', - 'ligting_kwh': 'sum', - 'solar_kwh': 'sum', - "estimated_cost": "sum" - }).reset_index() + grouped_data = ( + scenario_recommendations.groupby(["property_id"]) + .agg( + { + "Estimated Kwh Savings": "sum", + "ligting_kwh": "sum", + "solar_kwh": "sum", + "estimated_cost": "sum", + } + ) + .reset_index() + ) comparison = properties_df.drop_duplicates().merge( grouped_data, on=["property_id"], how="left" ) comparison["Post Retrofit Heating & Hotwater kwh"] = ( - comparison["current_energy_demand_heating_hotwater"] - \ - comparison["Estimated Kwh Savings"] + comparison["current_energy_demand_heating_hotwater"] + - comparison["Estimated Kwh Savings"] ) - avgs = comparison[['current_energy_demand_heating_hotwater', 'Post Retrofit Heating & Hotwater kwh']].mean() + avgs = comparison[ + [ + "current_energy_demand_heating_hotwater", + "Post Retrofit Heating & Hotwater kwh", + ] + ].mean() # We now, for properties that have a plan, do a before and after with_savings = comparison[~pd.isnull(comparison["Estimated Kwh Savings"])] avgs2 = with_savings[ - ['current_energy_demand_heating_hotwater', 'Post Retrofit Heating & Hotwater kwh']].mean() - avgs2["difference"] = avgs2["current_energy_demand_heating_hotwater"] - avgs2[ - "Post Retrofit Heating & Hotwater kwh"] - avgs2["percentage_reduction"] = 100 * avgs2["difference"] / avgs2["current_energy_demand_heating_hotwater"] + [ + "current_energy_demand_heating_hotwater", + "Post Retrofit Heating & Hotwater kwh", + ] + ].mean() + avgs2["difference"] = ( + avgs2["current_energy_demand_heating_hotwater"] + - avgs2["Post Retrofit Heating & Hotwater kwh"] + ) + avgs2["percentage_reduction"] = ( + 100 * avgs2["difference"] / avgs2["current_energy_demand_heating_hotwater"] + ) # We also calculate the cost per kwh saves total_kwh_saved = ( - with_savings["Estimated Kwh Savings"].sum() + - with_savings["ligting_kwh"].sum() + - with_savings["solar_kwh"].sum() + with_savings["Estimated Kwh Savings"].sum() + + with_savings["ligting_kwh"].sum() + + with_savings["solar_kwh"].sum() ) total_cost = with_savings["estimated_cost"].sum() cost_per_kwh_saved = total_cost / total_kwh_saved scenario_comparison_df.append({"scenario_id": scenario_id, **avgs}) scenario_comparison_df_2.append({"scenario_id": scenario_id, **avgs2}) - cost_per_kwh_saved_table.append({"scenario_id": scenario_id, "cost_per_kwh_saved": cost_per_kwh_saved}) + cost_per_kwh_saved_table.append( + {"scenario_id": scenario_id, "cost_per_kwh_saved": cost_per_kwh_saved} + ) scenario_comparison_population = pd.DataFrame(scenario_comparison_df) scenario_comparison_retrofitted_units = pd.DataFrame(scenario_comparison_df_2) cost_per_kwh_saved_table = pd.DataFrame(cost_per_kwh_saved_table) - return scenario_comparison_population, scenario_comparison_retrofitted_units, cost_per_kwh_saved_table + return ( + scenario_comparison_population, + scenario_comparison_retrofitted_units, + cost_per_kwh_saved_table, + ) def slides(): @@ -167,7 +231,9 @@ def slides(): # Look at one scenario at a time, otherwise this is agony scenario_ids = [47, 48, 49, 50, 51] - properties_data, plans_data, recommendations_data = get_data(portfolio_id, scenario_ids) + properties_data, plans_data, recommendations_data = get_data( + portfolio_id, scenario_ids + ) properties_df = pd.DataFrame(properties_data) plans_df = pd.DataFrame(plans_data) @@ -177,16 +243,19 @@ def slides(): raise ValueError("The number of unique properties is not 2553") # Q1: What is the baseline heating and energy demand for the properties in the portfolio - baseline? - heating_hotwater_kwh = ( - properties_df[['current_energy_demand', 'current_energy_demand_heating_hotwater']] - .mean() - ) + heating_hotwater_kwh = properties_df[ + ["current_energy_demand", "current_energy_demand_heating_hotwater"] + ].mean() # Q2: For each scenario, what is for what is the heating and hot water kwh after retrofit, on the entire # popoulation (incl those without retrofit) and for just those being retrofit # We also calculat the cost per kwh saved - scenario_comparison_population, scenario_comparison_retrofitted_units, cost_per_kwh_saved_table = ( - estimate_post_retrofit_heating_hotwater_kwh(properties_df, recommendations_df, scenario_ids) + ( + scenario_comparison_population, + scenario_comparison_retrofitted_units, + cost_per_kwh_saved_table, + ) = estimate_post_retrofit_heating_hotwater_kwh( + properties_df, recommendations_df, scenario_ids ) # Q3: For each scenario, we want to answer what the heating and hot water kwh looks like after retrofit @@ -194,42 +263,55 @@ def slides(): # By property - recommendations_df["type_mapped"] = recommendations_df["type"].copy().replace( - { - "loft_insulation": "roof_insulation", - "room_roof_insulation": "roof_insulation", - "flat_roof_insulation": "roof_insulation", - "hot_water_tank_insulation": "other", - "cylinder_thermostat": "other", - "sealing_open_fireplace": "other", - "suspended_floor_insulation": "floor_insulation", - "solid_floor_insulation": "floor_insulation", - } + recommendations_df["type_mapped"] = ( + recommendations_df["type"] + .copy() + .replace( + { + "loft_insulation": "roof_insulation", + "room_roof_insulation": "roof_insulation", + "flat_roof_insulation": "roof_insulation", + "hot_water_tank_insulation": "other", + "cylinder_thermostat": "other", + "sealing_open_fireplace": "other", + "suspended_floor_insulation": "floor_insulation", + "solid_floor_insulation": "floor_insulation", + } + ) ) recommendations_df["type_mapped"] = np.where( recommendations_df["description"].str.contains("air source heat pump"), "air_source_heat_pump", - recommendations_df["type_mapped"] + recommendations_df["type_mapped"], ) # Group by 'Plan Name' and 'Recommendation Type' and count unique 'Property ID' - recommendation_summary = recommendations_df[recommendations_df["default"] == True].groupby( - ['Scenario ID', 'type_mapped'] - ).agg({ - 'property_id': 'nunique' - }).reset_index() + recommendation_summary = ( + recommendations_df[recommendations_df["default"] == True] + .groupby(["Scenario ID", "type_mapped"]) + .agg({"property_id": "nunique"}) + .reset_index() + ) - recommendation_summary.columns = ['Scenario ID', 'Type Mapped', 'Number of Properties'] + recommendation_summary.columns = [ + "Scenario ID", + "Type Mapped", + "Number of Properties", + ] recommendation_summary["Percentage of Properties"] = 100 * ( recommendation_summary["Number of Properties"] / properties_df["id"].nunique() ) - recommendation_summary_final_scenario = recommendation_summary[recommendation_summary["Scenario ID"].isin([51])] + recommendation_summary_final_scenario = recommendation_summary[ + recommendation_summary["Scenario ID"].isin([51]) + ] # MVP implementation of funding estimation for the most basic scenario, using GBIS - project_scores_matrix = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/ECO4 Full Project Scores Matrix.csv") + project_scores_matrix = pd.read_csv( + "/Users/khalimconn-kowlessar/Downloads/ECO4 Full Project Scores Matrix.csv" + ) def find_abs(sap_movement, starting_sap, floor_area): starting_band = find_band(starting_sap) @@ -238,7 +320,7 @@ def slides(): return 0 if floor_area <= 72: - floor_area_segment = '0-72' + floor_area_segment = "0-72" elif (floor_area > 72) and (floor_area <= 97): floor_area_segment = "73-97" elif (floor_area > 97) and (floor_area <= 199): @@ -247,26 +329,26 @@ def slides(): floor_area_segment = "200+" return project_scores_matrix[ - (project_scores_matrix["Floor Area Segment"] == floor_area_segment) & - (project_scores_matrix["Starting Band"] == starting_band) & - (project_scores_matrix["Finishing Band"] == finishing_band) - ].squeeze()["Cost Savings"] + (project_scores_matrix["Floor Area Segment"] == floor_area_segment) + & (project_scores_matrix["Starting Band"] == starting_band) + & (project_scores_matrix["Finishing Band"] == finishing_band) + ].squeeze()["Cost Savings"] eco4_scores_sap_table = [ - {'Band': 'High_A', 'From': 96.0, 'Up to': 100.0, 'Mid-point': 98.0}, - {'Band': 'Low_A', 'From': 92.0, 'Up to': 96.0, 'Mid-point': 94.0}, - {'Band': 'High_B', 'From': 86.0, 'Up to': 91.0, 'Mid-point': 88.5}, - {'Band': 'Low_B', 'From': 81.0, 'Up to': 86.0, 'Mid-point': 83.5}, - {'Band': 'High_C', 'From': 74.5, 'Up to': 80.0, 'Mid-point': 77.25}, - {'Band': 'Low_C', 'From': 69.0, 'Up to': 74.5, 'Mid-point': 71.75}, - {'Band': 'High_D', 'From': 61.5, 'Up to': 68.0, 'Mid-point': 64.75}, - {'Band': 'Low_D', 'From': 55.0, 'Up to': 61.5, 'Mid-point': 58.25}, - {'Band': 'High_E', 'From': 46.5, 'Up to': 54.0, 'Mid-point': 50.25}, - {'Band': 'Low_E', 'From': 39.0, 'Up to': 46.5, 'Mid-point': 42.75}, - {'Band': 'High_F', 'From': 29.5, 'Up to': 38.0, 'Mid-point': 33.75}, - {'Band': 'Low_F', 'From': 21.0, 'Up to': 29.5, 'Mid-point': 25.25}, - {'Band': 'High_G', 'From': 10.5, 'Up to': 20.0, 'Mid-point': 15.25}, - {'Band': 'Low_G', 'From': 1.0, 'Up to': 10.5, 'Mid-point': 5.75} + {"Band": "High_A", "From": 96.0, "Up to": 100.0, "Mid-point": 98.0}, + {"Band": "Low_A", "From": 92.0, "Up to": 96.0, "Mid-point": 94.0}, + {"Band": "High_B", "From": 86.0, "Up to": 91.0, "Mid-point": 88.5}, + {"Band": "Low_B", "From": 81.0, "Up to": 86.0, "Mid-point": 83.5}, + {"Band": "High_C", "From": 74.5, "Up to": 80.0, "Mid-point": 77.25}, + {"Band": "Low_C", "From": 69.0, "Up to": 74.5, "Mid-point": 71.75}, + {"Band": "High_D", "From": 61.5, "Up to": 68.0, "Mid-point": 64.75}, + {"Band": "Low_D", "From": 55.0, "Up to": 61.5, "Mid-point": 58.25}, + {"Band": "High_E", "From": 46.5, "Up to": 54.0, "Mid-point": 50.25}, + {"Band": "Low_E", "From": 39.0, "Up to": 46.5, "Mid-point": 42.75}, + {"Band": "High_F", "From": 29.5, "Up to": 38.0, "Mid-point": 33.75}, + {"Band": "Low_F", "From": 21.0, "Up to": 29.5, "Mid-point": 25.25}, + {"Band": "High_G", "From": 10.5, "Up to": 20.0, "Mid-point": 15.25}, + {"Band": "Low_G", "From": 1.0, "Up to": 10.5, "Mid-point": 5.75}, ] eco4_scores_sap_table = pd.DataFrame(eco4_scores_sap_table) @@ -274,8 +356,9 @@ def slides(): # Iterate through each row in the DataFrame to find the correct band value_floored = np.floor(value) return eco4_scores_sap_table[ - (eco4_scores_sap_table["From"] <= value_floored) & (eco4_scores_sap_table["Up to"] >= value_floored) - ].squeeze()["Band"] + (eco4_scores_sap_table["From"] <= value_floored) + & (eco4_scores_sap_table["Up to"] >= value_floored) + ].squeeze()["Band"] def identify_funding_measure(p, p_recs, is_social): measures = ["cavity_wall_insulation", "loft_insulation"] @@ -287,15 +370,17 @@ def slides(): project_abs = find_abs( sap_movement=funding_measure["sap_points"], starting_sap=p["current_sap_points"], - floor_area=p["total_floor_area"] + floor_area=p["total_floor_area"], + ) + property_abs.append( + { + "property_id": p["property_id"], + "measure": funding_measure["type"], + "cost": funding_measure["estimated_cost"], + "abs": project_abs, + "is_social": is_social, + } ) - property_abs.append({ - "property_id": p["property_id"], - "measure": funding_measure["type"], - "cost": funding_measure["estimated_cost"], - "abs": project_abs, - "is_social": is_social - }) if not property_abs: return None @@ -351,7 +436,9 @@ def slides(): band_b_proportion = 0.195 band_c_proportion = 0.219 band_d_proportion = 0.156 - a_to_d_proportion = band_a_proportion + band_b_proportion + band_c_proportion + band_d_proportion + a_to_d_proportion = ( + band_a_proportion + band_b_proportion + band_c_proportion + band_d_proportion + ) benefits_proportion = 0.51 @@ -360,20 +447,26 @@ def slides(): # We scale the private funding based on these two factors private_funding_scaled = private_funding * benefits_proportion * a_to_d_proportion - n_private_projects = np.round((~funding["is_social"]).sum() * benefits_proportion * a_to_d_proportion) + n_private_projects = np.round( + (~funding["is_social"]).sum() * benefits_proportion * a_to_d_proportion + ) # Look at the impact of EWI for scenario ewi_jobs = recommendations_df[ - (recommendations_df["Scenario ID"] == 49) & (recommendations_df["type"] == "external_wall_insulation") - ] + (recommendations_df["Scenario ID"] == 49) + & (recommendations_df["type"] == "external_wall_insulation") + ] ewi_jobs["estimated_cost"].sum() has_cavity = recommendations_df[ - (recommendations_df["type"] == "cavity_wall_insulation") & (recommendations_df["Scenario ID"] == 47) - ] + (recommendations_df["type"] == "cavity_wall_insulation") + & (recommendations_df["Scenario ID"] == 47) + ] # Take the some properties in this - cavity_units = properties_df[properties_df["property_id"].isin(has_cavity["property_id"].values)] + cavity_units = properties_df[ + properties_df["property_id"].isin(has_cavity["property_id"].values) + ] cavity_units[cavity_units.index == 3][["uprn", "property_id"]] @@ -381,41 +474,52 @@ def slides(): # Recommenation type by kwh savings per unit recommendations_final_scenario = recommendations_df[ - recommendations_df["Scenario ID"].isin([51]) & - (recommendations_df["default"] == True) - ].copy() + recommendations_df["Scenario ID"].isin([51]) + & (recommendations_df["default"] == True) + ].copy() # Merge on floor area recommendations_final_scenario = recommendations_final_scenario.merge( properties_df[["property_id", "total_floor_area"]], on="property_id", how="left" ) recommendations_final_scenario = recommendations_final_scenario[ - ~pd.isnull(recommendations_final_scenario["total_floor_area"])] - recommendations_final_scenario["kwh_savings_per_unit"] = recommendations_final_scenario["kwh_savings"] / \ - recommendations_final_scenario["total_floor_area"] - - recommendations_final_scenario["type_mapped2"] = recommendations_df["type"].copy().replace( - { - "room_roof_insulation": "roof_insulation", - "flat_roof_insulation": "roof_insulation", - "hot_water_tank_insulation": "other", - "cylinder_thermostat": "other", - "sealing_open_fireplace": "other", - "suspended_floor_insulation": "floor_insulation", - "solid_floor_insulation": "floor_insulation", - } + ~pd.isnull(recommendations_final_scenario["total_floor_area"]) + ] + recommendations_final_scenario["kwh_savings_per_unit"] = ( + recommendations_final_scenario["kwh_savings"] + / recommendations_final_scenario["total_floor_area"] ) - aggs = recommendations_final_scenario.groupby("type_mapped")[ - ["kwh_savings_per_unit", "estimated_cost"]].mean().reset_index().sort_values( - "kwh_savings_per_unit", ascending=False + recommendations_final_scenario["type_mapped2"] = ( + recommendations_df["type"] + .copy() + .replace( + { + "room_roof_insulation": "roof_insulation", + "flat_roof_insulation": "roof_insulation", + "hot_water_tank_insulation": "other", + "cylinder_thermostat": "other", + "sealing_open_fireplace": "other", + "suspended_floor_insulation": "floor_insulation", + "solid_floor_insulation": "floor_insulation", + } + ) + ) + + aggs = ( + recommendations_final_scenario.groupby("type_mapped")[ + ["kwh_savings_per_unit", "estimated_cost"] + ] + .mean() + .reset_index() + .sort_values("kwh_savings_per_unit", ascending=False) ) aggs["cost_per_kwh_saved"] = aggs["estimated_cost"] / aggs["kwh_savings_per_unit"] # Show more columns with pandas - pd.set_option('display.max_columns', None) + pd.set_option("display.max_columns", None) # Show more rows with pandas - pd.set_option('display.max_rows', None) + pd.set_option("display.max_rows", None) # Show more characters in a column - pd.set_option('display.max_colwidth', None) + pd.set_option("display.max_colwidth", None) def lewes_outputs(): @@ -427,12 +531,14 @@ def lewes_outputs(): """ # get the asset list - asset_list = read_csv_from_s3(bucket_name="retrofit-plan-inputs-dev", filepath="8/90/pilot.csv") + asset_list = read_csv_from_s3( + bucket_name="retrofit-plan-inputs-dev", filepath="8/90/pilot.csv" + ) asset_list = pd.DataFrame(asset_list) # Get non-invasive recommendations non_intrusive_recommendations = read_csv_from_s3( bucket_name="retrofit-plan-inputs-dev", - filepath="8/90/non_invasive_recommendations.csv" + filepath="8/90/non_invasive_recommendations.csv", ) non_intrusive_recommendations = pd.DataFrame(non_intrusive_recommendations) @@ -440,20 +546,21 @@ def lewes_outputs(): portfolio_id = 90 # Look at one scenario at a time, otherwise this is agony scenario_ids = [47, 48, 49, 50, 51] - properties_data, plans_data, recommendations_data = get_data(portfolio_id, scenario_ids) + properties_data, plans_data, recommendations_data = get_data( + portfolio_id, scenario_ids + ) properties_df = pd.DataFrame(properties_data) recommendations_df = pd.DataFrame(recommendations_data) # Unnest this import ast + survey_recs = [] for _, row in non_intrusive_recommendations.iterrows(): recs = ast.literal_eval(row["recommendations"]) ashp_rec = next((r for r in recs if r["type"] == "air_source_heat_pump"), None) solar_rec = next((r for r in recs if r["type"] == "solar_pv"), None) - to_append = { - "uprn": row["uprn"] - } + to_append = {"uprn": row["uprn"]} if ashp_rec["suitable"]: to_append = { **to_append, @@ -479,44 +586,57 @@ def lewes_outputs(): domna_kwh = 10850 scaling_factor = vital_kwh / domna_kwh - next_gen_dataset = properties_df[[ - "uprn", "address", "postcode", - "property_type", "built_form", "current_energy_demand_heating_hotwater", - "mainfuel", "total_floor_area", "floor_height" - ]].rename( - columns={ - "mainfuel": "primary_fuel_type", - "total_floor_area": "gross_floor_area", - "current_energy_demand_heating_hotwater": "estimated_heating_hotwater_kwh" - } - ).merge( - asset_list[["uprn", "number_of_floors"]], - how="left", - on="uprn" - ).merge( - survey_recs, - how="left", - on="uprn" + next_gen_dataset = ( + properties_df[ + [ + "uprn", + "address", + "postcode", + "property_type", + "built_form", + "current_energy_demand_heating_hotwater", + "mainfuel", + "total_floor_area", + "floor_height", + ] + ] + .rename( + columns={ + "mainfuel": "primary_fuel_type", + "total_floor_area": "gross_floor_area", + "current_energy_demand_heating_hotwater": "estimated_heating_hotwater_kwh", + } + ) + .merge(asset_list[["uprn", "number_of_floors"]], how="left", on="uprn") + .merge(survey_recs, how="left", on="uprn") ) next_gen_dataset["estimated_heating_hotwater_kwh_scaled"] = ( next_gen_dataset["estimated_heating_hotwater_kwh"] * scaling_factor ) next_gen_dataset["ashp_suitable"] = next_gen_dataset["ashp_suitable"].fillna(False) - next_gen_dataset["solar_suitable"] = next_gen_dataset["solar_suitable"].fillna(False) + next_gen_dataset["solar_suitable"] = next_gen_dataset["solar_suitable"].fillna( + False + ) # We prepare the scenario outputs by property type grouped_data = next_gen_dataset.copy() grouped_data["property_sub_type"] = grouped_data["built_form"].copy() # If a property is a flat, re-map sub_type just to flat - grouped_data.loc[grouped_data["property_type"] == "Flat", "property_sub_type"] = "Flat" + grouped_data.loc[grouped_data["property_type"] == "Flat", "property_sub_type"] = ( + "Flat" + ) # Same for maisonettes - grouped_data.loc[grouped_data["property_type"] == "Maisonette", "property_sub_type"] = "Maisonette" + grouped_data.loc[ + grouped_data["property_type"] == "Maisonette", "property_sub_type" + ] = "Maisonette" # We now pull out the recommendations impact by property type and sub type # Exclude sealing open fireplaces - recommendations_df = recommendations_df[recommendations_df["type"] != "sealing_open_fireplace"] + recommendations_df = recommendations_df[ + recommendations_df["type"] != "sealing_open_fireplace" + ] # We update the type column so that if type == heating, and the description contains "air source heat pump", # the type is "air_source_heat_pump", else if the description contains "high heat retention storage heaters", else @@ -532,108 +652,130 @@ def lewes_outputs(): np.where( recommendations_df["description"].str.contains("condensing boiler"), "Boiler Upgrade", - recommendations_df["type"] - ) - ) + recommendations_df["type"], + ), + ), ), - recommendations_df["type"] + recommendations_df["type"], ) recommendation_types = recommendations_df["type"].unique().tolist() rename_dict = { - 'hot_water_tank_insulation': 'Hot Water Tank Insulation', - 'windows_glazing': 'Windows Glazing', - 'secondary_heating': 'Secondary Heating', - 'cavity_wall_insulation': 'Cavity Wall Insulation', - 'flat_roof_insulation': 'Flat Roof Insulation', - 'mechanical_ventilation': 'Mechanical Ventilation', - 'loft_insulation': 'Loft Insulation', - 'cylinder_thermostat': 'Cylinder Thermostat', - 'room_roof_insulation': 'Room Roof Insulation', - 'low_energy_lighting': 'Low Energy Lighting', - 'external_wall_insulation': 'External Wall Insulation', - 'solar_pv': 'Solar PV', - 'heating_control': 'Heating Control', - 'solid_floor_insulation': 'Solid Floor Insulation', - 'suspended_floor_insulation': 'Suspended Floor Insulation', - 'internal_wall_insulation': 'Internal Wall Insulation' + "hot_water_tank_insulation": "Hot Water Tank Insulation", + "windows_glazing": "Windows Glazing", + "secondary_heating": "Secondary Heating", + "cavity_wall_insulation": "Cavity Wall Insulation", + "flat_roof_insulation": "Flat Roof Insulation", + "mechanical_ventilation": "Mechanical Ventilation", + "loft_insulation": "Loft Insulation", + "cylinder_thermostat": "Cylinder Thermostat", + "room_roof_insulation": "Room Roof Insulation", + "low_energy_lighting": "Low Energy Lighting", + "external_wall_insulation": "External Wall Insulation", + "solar_pv": "Solar PV", + "heating_control": "Heating Control", + "solid_floor_insulation": "Solid Floor Insulation", + "suspended_floor_insulation": "Suspended Floor Insulation", + "internal_wall_insulation": "Internal Wall Insulation", } property_scenario_impact = [] for scenario_id in tqdm(scenario_ids): # Get the recommendations for the scenario, default scenario_recommendations = recommendations_df[ - (recommendations_df["Scenario ID"] == scenario_id) & - (recommendations_df["default"] == True) - ].copy() + (recommendations_df["Scenario ID"] == scenario_id) + & (recommendations_df["default"] == True) + ].copy() - scenario_recommendations['Estimated Lighting kWh Savings'] = scenario_recommendations.apply( - lambda x: x['kwh_savings'] if x['type'] == 'low_energy_lighting' else 0, - axis=1) - scenario_recommendations['Estimated Solar kWh Savings'] = scenario_recommendations.apply( - lambda x: x['kwh_savings'] if x['type'] == 'solar_pv' else 0, axis=1) + scenario_recommendations["Estimated Lighting kWh Savings"] = ( + scenario_recommendations.apply( + lambda x: x["kwh_savings"] if x["type"] == "low_energy_lighting" else 0, + axis=1, + ) + ) + scenario_recommendations["Estimated Solar kWh Savings"] = ( + scenario_recommendations.apply( + lambda x: x["kwh_savings"] if x["type"] == "solar_pv" else 0, axis=1 + ) + ) # Set 'Estimated Kwh Savings' to zero where specific kwh columns are used - scenario_recommendations['Estimated Heating Demand kWh Savings'] = scenario_recommendations.apply( - lambda x: 0 if x['type'] in ['low_energy_lighting', 'solar_pv'] else x[ - 'kwh_savings'], axis=1) + scenario_recommendations["Estimated Heating Demand kWh Savings"] = ( + scenario_recommendations.apply( + lambda x: ( + 0 + if x["type"] in ["low_energy_lighting", "solar_pv"] + else x["kwh_savings"] + ), + axis=1, + ) + ) - scenario_grouped_data = scenario_recommendations.groupby(['property_id']).agg({ - 'Estimated Heating Demand kWh Savings': 'sum', - 'Estimated Lighting kWh Savings': 'sum', - 'Estimated Solar kWh Savings': 'sum', - "estimated_cost": "sum" - }).reset_index() + scenario_grouped_data = ( + scenario_recommendations.groupby(["property_id"]) + .agg( + { + "Estimated Heating Demand kWh Savings": "sum", + "Estimated Lighting kWh Savings": "sum", + "Estimated Solar kWh Savings": "sum", + "estimated_cost": "sum", + } + ) + .reset_index() + ) comparison = properties_df.drop_duplicates()[ ["uprn", "property_id", "current_energy_demand_heating_hotwater"] - ].merge( - scenario_grouped_data, on=["property_id"], how="left" - ) - comparison["Estimated Heating Demand kWh Savings"] = ( - comparison["Estimated Heating Demand kWh Savings"].fillna(0) - ) - comparison["Estimated Lighting kWh Savings"] = ( - comparison["Estimated Lighting kWh Savings"].fillna(0) - ) - comparison["Estimated Solar kWh Savings"] = ( - comparison["Estimated Solar kWh Savings"].fillna(0) - ) + ].merge(scenario_grouped_data, on=["property_id"], how="left") + comparison["Estimated Heating Demand kWh Savings"] = comparison[ + "Estimated Heating Demand kWh Savings" + ].fillna(0) + comparison["Estimated Lighting kWh Savings"] = comparison[ + "Estimated Lighting kWh Savings" + ].fillna(0) + comparison["Estimated Solar kWh Savings"] = comparison[ + "Estimated Solar kWh Savings" + ].fillna(0) comparison["estimated_cost"] = comparison["estimated_cost"].fillna(0) comparison["post_scenario_heating_hotwater_kwh"] = ( - comparison["current_energy_demand_heating_hotwater"] - comparison["Estimated Heating Demand kWh Savings"] + comparison["current_energy_demand_heating_hotwater"] + - comparison["Estimated Heating Demand kWh Savings"] ) # For each scenario, we create a measure matrix measure_matrix = scenario_recommendations.pivot_table( - index='property_id', - columns='type', - values='id', # Using 'id' just as a placeholder for the pivot + index="property_id", + columns="type", + values="id", # Using 'id' just as a placeholder for the pivot aggfunc=lambda x: True, # If an ID exists for a given type, mark as True - fill_value=False # Fill other entries as False + fill_value=False, # Fill other entries as False ).reset_index() non_zero_heat_demand_impact = comparison[ - (comparison["Estimated Heating Demand kWh Savings"] > 0) | - (comparison["Estimated Lighting kWh Savings"] > 0) | - (comparison["Estimated Solar kWh Savings"] > 0) - ] + (comparison["Estimated Heating Demand kWh Savings"] > 0) + | (comparison["Estimated Lighting kWh Savings"] > 0) + | (comparison["Estimated Solar kWh Savings"] > 0) + ] measure_matrix = measure_matrix[ - measure_matrix["property_id"].isin(non_zero_heat_demand_impact["property_id"].values) + measure_matrix["property_id"].isin( + non_zero_heat_demand_impact["property_id"].values + ) ] measure_matrix = measure_matrix.rename(columns=rename_dict) - comparison = comparison.merge( - measure_matrix, on="property_id", how="left" - ) + comparison = comparison.merge(measure_matrix, on="property_id", how="left") comparison["scenario_id"] = scenario_id property_scenario_impact.append(comparison) property_scenario_impact = pd.concat(property_scenario_impact) # property_scenario_impact = property_scenario_impact.drop(columns=["property_id", "Estimated Kwh Savings"]) - for v in list(rename_dict.values()) + ["Air Source Heat Pump", "High Heat Retention Storage", "Boiler Upgrade"]: + for v in list(rename_dict.values()) + [ + "Air Source Heat Pump", + "High Heat Retention Storage", + "Boiler Upgrade", + ]: # Fill NaNs with False property_scenario_impact[v] = property_scenario_impact[v].fillna(False) @@ -642,18 +784,22 @@ def lewes_outputs(): property_scenario_impact["post_scenario_heating_hotwater_kwh"] * scaling_factor ) - grouped_data = grouped_data.merge( - property_scenario_impact, how="left", on="uprn" - ) + grouped_data = grouped_data.merge(property_scenario_impact, how="left", on="uprn") # Agg the data - grouped_data = grouped_data.groupby(["property_type", "property_sub_type", "scenario_id"]).agg({ - "estimated_heating_hotwater_kwh": "mean", - "estimated_heating_hotwater_kwh_scaled": "mean", - "estimated_cost": "mean", - "post_scenario_heating_hotwater_kwh": "mean", - "post_scenario_heating_hotwater_kwh_scaled": "mean" - }).reset_index() + grouped_data = ( + grouped_data.groupby(["property_type", "property_sub_type", "scenario_id"]) + .agg( + { + "estimated_heating_hotwater_kwh": "mean", + "estimated_heating_hotwater_kwh_scaled": "mean", + "estimated_cost": "mean", + "post_scenario_heating_hotwater_kwh": "mean", + "post_scenario_heating_hotwater_kwh_scaled": "mean", + } + ) + .reset_index() + ) scenario_names = pd.DataFrame( [ @@ -665,45 +811,40 @@ def lewes_outputs(): "scenario_id": 48, "scenario": "Demand reduction – no solid wall, floors or heating/renewables", }, - { - "scenario_id": 49, - "scenario": "Demand reduction – no decant" - }, + {"scenario_id": 49, "scenario": "Demand reduction – no decant"}, { "scenario_id": 50, "scenario": "Demand reduction – no decant + heating & solar", }, - { - "scenario_id": 51, - "scenario": "Whole house retrofit" - } + {"scenario_id": 51, "scenario": "Whole house retrofit"}, ] - ) - grouped_data = grouped_data.merge( - scenario_names, how="left", on="scenario_id" - ) + grouped_data = grouped_data.merge(scenario_names, how="left", on="scenario_id") if not grouped_data[ - grouped_data["estimated_heating_hotwater_kwh"] < grouped_data["post_scenario_heating_hotwater_kwh"]].empty: + grouped_data["estimated_heating_hotwater_kwh"] + < grouped_data["post_scenario_heating_hotwater_kwh"] + ].empty: raise Exception("someting went wrong") - if not grouped_data[grouped_data["estimated_heating_hotwater_kwh_scaled"] < grouped_data[ - "post_scenario_heating_hotwater_kwh_scaled"]].empty: + if not grouped_data[ + grouped_data["estimated_heating_hotwater_kwh_scaled"] + < grouped_data["post_scenario_heating_hotwater_kwh_scaled"] + ].empty: raise Exception("someting went wrong") # Reorder the columns grouped_data = grouped_data[ [ - 'property_type', - 'property_sub_type', - 'scenario', - 'estimated_heating_hotwater_kwh', - 'post_scenario_heating_hotwater_kwh', - 'estimated_heating_hotwater_kwh_scaled', - 'post_scenario_heating_hotwater_kwh_scaled', - 'estimated_cost', + "property_type", + "property_sub_type", + "scenario", + "estimated_heating_hotwater_kwh", + "post_scenario_heating_hotwater_kwh", + "estimated_heating_hotwater_kwh_scaled", + "post_scenario_heating_hotwater_kwh_scaled", + "estimated_cost", ] ] @@ -730,9 +871,7 @@ def lewes_outputs(): scenario_names, how="left", on="scenario_id" ) - lewes_data = next_gen_dataset.merge( - property_scenario_impact, how="left", on="uprn" - ) + lewes_data = next_gen_dataset.merge(property_scenario_impact, how="left", on="uprn") lewes_data = lewes_data.sort_values( ["postcode", "uprn", "scenario_id"], ascending=True @@ -742,31 +881,52 @@ def lewes_outputs(): # TODO - remap the heating type lewes_data = lewes_data[ [ - 'uprn', 'address', 'postcode', 'property_type', 'built_form', + "uprn", + "address", + "postcode", + "property_type", + "built_form", # 'estimated_heating_hotwater_kwh', - 'primary_fuel_type', 'gross_floor_area', 'floor_height', 'number_of_floors', 'ashp_suitable', - 'ashp_size_kw', - 'ashp_cost', 'solar_suitable', 'solar_size_kwp', 'solar_cost', - 'scenario', - 'estimated_heating_hotwater_kwh_scaled', - 'post_scenario_heating_hotwater_kwh_scaled', + "primary_fuel_type", + "gross_floor_area", + "floor_height", + "number_of_floors", + "ashp_suitable", + "ashp_size_kw", + "ashp_cost", + "solar_suitable", + "solar_size_kwp", + "solar_cost", + "scenario", + "estimated_heating_hotwater_kwh_scaled", + "post_scenario_heating_hotwater_kwh_scaled", # 'property_id', - dropped # 'current_energy_demand_heating_hotwater', - 'Estimated Heating Demand kWh Savings', - 'Estimated Lighting kWh Savings', - 'Estimated Solar kWh Savings', - 'estimated_cost', - 'post_scenario_heating_hotwater_kwh', 'Cavity Wall Insulation', 'Cylinder Thermostat', - 'Flat Roof Insulation', - 'Hot Water Tank Insulation', 'Loft Insulation', 'Mechanical Ventilation', 'Room Roof Insulation', + "Estimated Heating Demand kWh Savings", + "Estimated Lighting kWh Savings", + "Estimated Solar kWh Savings", + "estimated_cost", + "post_scenario_heating_hotwater_kwh", + "Cavity Wall Insulation", + "Cylinder Thermostat", + "Flat Roof Insulation", + "Hot Water Tank Insulation", + "Loft Insulation", + "Mechanical Ventilation", + "Room Roof Insulation", # 'scenario_id', - dropped - 'Low Energy Lighting', 'Secondary Heating', 'Windows Glazing', 'External Wall Insulation', - 'Heating Control', - 'Solar PV', - 'Air Source Heat Pump', 'Boiler Upgrade', 'High Heat Retention Storage', - 'Internal Wall Insulation', - 'Solid Floor Insulation', - 'Suspended Floor Insulation', + "Low Energy Lighting", + "Secondary Heating", + "Windows Glazing", + "External Wall Insulation", + "Heating Control", + "Solar PV", + "Air Source Heat Pump", + "Boiler Upgrade", + "High Heat Retention Storage", + "Internal Wall Insulation", + "Solid Floor Insulation", + "Suspended Floor Insulation", ] ].rename( columns={ @@ -783,29 +943,34 @@ def lewes_outputs(): # "estimated_heating_hotwater_kwh": "Estimated Heating & Hot Water kwh", "estimated_heating_hotwater_kwh_scaled": "Estimated Heating & Hot Water kwh", "post_scenario_heating_hotwater_kwh_scaled": "Post Scenario Heating & Hot Water kwh", - "estimated_cost": "Estimated Cost of Scenario" + "estimated_cost": "Estimated Cost of Scenario", } ) # We save this dataset, which will be shared with Lewes Council lewes_data.to_csv( - "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Newhaven/outputs/Lewes property data.csv", index=False + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Newhaven/outputs/Lewes property data.csv", + index=False, ) - df_pivot = property_scenario_impact.pivot_table(index='uprn', columns='scenario', - values=['post_scenario_heating_hotwater_kwh', - 'post_scenario_heating_hotwater_kwh_scaled']) + df_pivot = property_scenario_impact.pivot_table( + index="uprn", + columns="scenario", + values=[ + "post_scenario_heating_hotwater_kwh", + "post_scenario_heating_hotwater_kwh_scaled", + ], + ) # Flattening multi-index columns - df_pivot.columns = [f'{col[0]}_{col[1]}' for col in df_pivot.columns] + df_pivot.columns = [f"{col[0]}_{col[1]}" for col in df_pivot.columns] # Reset the index to have a clean dataframe df_pivot.reset_index(inplace=True) - next_gen_dataset = next_gen_dataset.merge( - df_pivot, how="left", on="uprn" - ) + next_gen_dataset = next_gen_dataset.merge(df_pivot, how="left", on="uprn") next_gen_dataset.to_csv( - "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Newhaven/outputs/next_gen_dataset.csv", index=False + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Newhaven/outputs/next_gen_dataset.csv", + index=False, ) diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/d_restart_failed_subtasks.py b/etl/customers/peabody/Nov 2025 Consulting Project/d_restart_failed_subtasks.py index 68978b08..d86be050 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/d_restart_failed_subtasks.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/d_restart_failed_subtasks.py @@ -10,6 +10,7 @@ Additionally, we wil find the problematic records and remove them Given we ran an EPC C scenario, we should check how many properties, below EPC C we have, that have no plan or recommendations in case something went wrong """ + import pandas as pd from sqlalchemy.orm import Session from backend.app.db.models.portfolio import PropertyModel @@ -19,8 +20,7 @@ from backend.app.db.connection import db_session def get_uprns_for_portfolio(session: Session, portfolio_id: int) -> list[int]: return [ uprn - for (uprn,) in - session.query(PropertyModel.uprn) + for (uprn,) in session.query(PropertyModel.uprn) .filter(PropertyModel.portfolio_id == portfolio_id) .all() if uprn is not None @@ -34,7 +34,7 @@ with db_session() as session: sal = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20251213 Model " "data.xlsx", - sheet_name="Standardised Asset List" + sheet_name="Standardised Asset List", ) missed_properties = sal[~sal["epc_os_uprn"].isin(completed_uprns)] @@ -44,7 +44,7 @@ missed_properties.to_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/" "d_failed_properties_to_restart_20260102.xlsx", sheet_name="Standardised Asset List", - index=False + index=False, ) # Fixing an error - triggered jobs without removing EWI/IWI so need to delete all plans associated to these scenarios: @@ -52,14 +52,14 @@ scenario_id = None from sqlalchemy import select, func from sqlalchemy.orm import Session -from backend.app.db.models.recommendations import Plan +from backend.app.db.models.recommendations import PlanModel def count_plans_for_scenario(session: Session, scenario_id: int) -> int: return session.execute( select(func.count()) - .select_from(Plan) - .where(Plan.scenario_id == scenario_id) + .select_from(PlanModel) + .where(PlanModel.scenario_id == scenario_id) ).scalar_one() @@ -69,8 +69,7 @@ with db_session() as session: def get_plan_ids_for_scenario(session: Session, scenario_id: int) -> list[int]: result = session.execute( - select(Plan.id) - .where(Plan.scenario_id == scenario_id) + select(PlanModel.id).where(PlanModel.scenario_id == scenario_id) ) return [row.id for row in result] @@ -84,7 +83,7 @@ from sqlalchemy.orm import Session def chunked(iterable, size): for i in range(0, len(iterable), size): - yield iterable[i:i + size] + yield iterable[i : i + size] from sqlalchemy import text @@ -103,12 +102,14 @@ def delete_plan_batch(session: Session, plan_ids: list[int]): # recommendation_materials # ---------------------------- session.execute( - text(""" + text( + """ DELETE FROM recommendation_materials rm USING plan_recommendations pr WHERE rm.recommendation_id = pr.recommendation_id AND pr.plan_id = ANY(:plan_ids) - """), + """ + ), params, ) @@ -116,10 +117,12 @@ def delete_plan_batch(session: Session, plan_ids: list[int]): # plan_recommendations # ---------------------------- session.execute( - text(""" + text( + """ DELETE FROM plan_recommendations WHERE plan_id = ANY(:plan_ids) - """), + """ + ), params, ) @@ -127,14 +130,16 @@ def delete_plan_batch(session: Session, plan_ids: list[int]): # recommendations (only those used by these plans) # ---------------------------- session.execute( - text(""" + text( + """ DELETE FROM recommendation r WHERE r.id IN ( SELECT DISTINCT recommendation_id FROM plan_recommendations WHERE plan_id = ANY(:plan_ids) ) - """), + """ + ), params, ) @@ -142,10 +147,12 @@ def delete_plan_batch(session: Session, plan_ids: list[int]): # plans LAST # ---------------------------- session.execute( - text(""" + text( + """ DELETE FROM plan WHERE id = ANY(:plan_ids) - """), + """ + ), params, ) diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/f_diagnostics.py b/etl/customers/peabody/Nov 2025 Consulting Project/f_diagnostics.py index 4b946c60..509c8179 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/f_diagnostics.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/f_diagnostics.py @@ -5,6 +5,7 @@ This includes: # EPC C, there should be a plan 2) If the plan is fabric first, make sure they are actually fabric first """ + import pandas as pd scenario_names = { @@ -33,7 +34,9 @@ for scenario_id, scenario_name in scenario_names.items(): ) # find properties that are below the scenario sap target, but have no recommended measures - df["below_scenario_target"] = df["current_sap_points"] < scenario_sap_targets[scenario_id] + df["below_scenario_target"] = ( + df["current_sap_points"] < scenario_sap_targets[scenario_id] + ) df["no_recommended_measures"] = df["sap_points"] == 0 df["zero_cost"] = df["total_retrofit_cost"] == 0 df["sap_points_above_zero"] = df["sap_points"] > 0 @@ -45,7 +48,9 @@ for scenario_id, scenario_name in scenario_names.items(): ].copy() if scenario_sap_targets[scenario_id] == 81: - problematic_properties = problematic_properties[problematic_properties["property_type"] != "Flat"] + problematic_properties = problematic_properties[ + problematic_properties["property_type"] != "Flat" + ] zero_cost_above_zero_sap = df[ (df["sap_points_above_zero"] & df["zero_cost"]) @@ -61,8 +66,12 @@ for scenario_id, scenario_name in scenario_names.items(): # pd.set_option('display.width', 1000) # problematic_properties.head(len(problematic_properties)) - print(f"We have {len(problematic_properties)} problematic properties for scenario {scenario_name} ({scenario_id})") - print(f"We have {len(zero_cost_above_zero_sap)} zero cost properties for scenario {scenario_name} ({scenario_id})") + print( + f"We have {len(problematic_properties)} problematic properties for scenario {scenario_name} ({scenario_id})" + ) + print( + f"We have {len(zero_cost_above_zero_sap)} zero cost properties for scenario {scenario_name} ({scenario_id})" + ) problems.append(problematic_properties) problems.append(zero_cost_above_zero_sap) @@ -97,12 +106,12 @@ all_problems = all_problems.drop_duplicates(subset=["uprn"]) sal = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20251213 Model " "data.xlsx", - sheet_name="Standardised Asset List" + sheet_name="Standardised Asset List", ) sal2 = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20260105 - additional " "UPRNS.xlsx", - sheet_name="Standardised Asset List" + sheet_name="Standardised Asset List", ) sal = pd.concat([sal, sal2]) @@ -114,7 +123,7 @@ retry.to_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/" "d_problematic_properties_to_review_20260106.xlsx", sheet_name="Standardised Asset List", - index=False + index=False, ) # Delete associated plans @@ -126,19 +135,20 @@ uprns = retry["epc_os_uprn"].tolist() from sqlalchemy.orm import Session from backend.app.db.models.portfolio import PropertyModel from backend.app.db.connection import db_session -from backend.app.db.models.recommendations import Plan +from backend.app.db.models.recommendations import PlanModel from sqlalchemy import select, delete from sqlalchemy.exc import NoResultFound from sqlalchemy.orm import sessionmaker -def get_property_ids_for_uprns(session: Session, portfolio_id: int, uprns: list[int]) -> list[int]: +def get_property_ids_for_uprns( + session: Session, portfolio_id: int, uprns: list[int] +) -> list[int]: return [ property.id for property in session.query(PropertyModel) .filter( - PropertyModel.portfolio_id == portfolio_id, - PropertyModel.uprn.in_(uprns) + PropertyModel.portfolio_id == portfolio_id, PropertyModel.uprn.in_(uprns) ) .all() ] @@ -149,15 +159,21 @@ with db_session() as session: # Get all and delete plans for these property IDs -def get_all_plans_for_property_ids(session: Session, property_ids: list[int]) -> list[Plan]: - return session.query(Plan).filter(Plan.property_id.in_(property_ids)).all() +def get_all_plans_for_property_ids( + session: Session, property_ids: list[int] +) -> list[PlanModel]: + return ( + session.query(PlanModel).filter(PlanModel.property_id.in_(property_ids)).all() + ) -def get_ids_of_plans_for_deletion(session: Session, property_ids: list[int]) -> list[int]: +def get_ids_of_plans_for_deletion( + session: Session, property_ids: list[int] +) -> list[int]: return [ plan.id - for plan in session.query(Plan) - .filter(Plan.property_id.in_(property_ids)) + for plan in session.query(PlanModel) + .filter(PlanModel.property_id.in_(property_ids)) .all() ] @@ -168,7 +184,7 @@ with db_session() as session: def chunked(iterable, size): for i in range(0, len(iterable), size): - yield iterable[i:i + size] + yield iterable[i : i + size] from sqlalchemy import text @@ -187,12 +203,14 @@ def delete_plan_batch(session: Session, plan_ids: list[int]): # recommendation_materials # ---------------------------- session.execute( - text(""" + text( + """ DELETE FROM recommendation_materials rm USING plan_recommendations pr WHERE rm.recommendation_id = pr.recommendation_id AND pr.plan_id = ANY(:plan_ids) - """), + """ + ), params, ) @@ -200,10 +218,12 @@ def delete_plan_batch(session: Session, plan_ids: list[int]): # plan_recommendations # ---------------------------- session.execute( - text(""" + text( + """ DELETE FROM plan_recommendations WHERE plan_id = ANY(:plan_ids) - """), + """ + ), params, ) @@ -211,14 +231,16 @@ def delete_plan_batch(session: Session, plan_ids: list[int]): # recommendations (only those used by these plans) # ---------------------------- session.execute( - text(""" + text( + """ DELETE FROM recommendation r WHERE r.id IN ( SELECT DISTINCT recommendation_id FROM plan_recommendations WHERE plan_id = ANY(:plan_ids) ) - """), + """ + ), params, ) @@ -226,10 +248,12 @@ def delete_plan_batch(session: Session, plan_ids: list[int]): # plans LAST # ---------------------------- session.execute( - text(""" + text( + """ DELETE FROM plan WHERE id = ANY(:plan_ids) - """), + """ + ), params, ) diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/g_rebaselining_installed_measrues.py b/etl/customers/peabody/Nov 2025 Consulting Project/g_rebaselining_installed_measrues.py index 4405d113..c451938d 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/g_rebaselining_installed_measrues.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/g_rebaselining_installed_measrues.py @@ -2,17 +2,22 @@ import pandas as pd from tqdm import tqdm from sqlalchemy.orm import sessionmaker from backend.app.db.connection import db_engine, db_read_session, db_session -from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations, RecommendationMaterials, \ - InstalledMeasure +from backend.app.db.models.recommendations import ( + Recommendation, + PlanModel, + PlanRecommendations, + RecommendationMaterials, + InstalledMeasure, +) from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel from backend.app.utils import sap_to_epc from typing import Dict, List, Set from recommendations.Costs import Costs from backend.app.db.models.portfolio import Epc -pd.set_option('display.max_rows', 500) -pd.set_option('display.max_columns', 500) -pd.set_option('display.width', 1000) +pd.set_option("display.max_rows", 500) +pd.set_option("display.max_columns", 500) +pd.set_option("display.width", 1000) def get_all_data(portfolio_id, scenario_ids): @@ -22,22 +27,26 @@ def get_all_data(portfolio_id, scenario_ids): # -------------------- # Properties # -------------------- - properties_query = session.query( - PropertyModel, - PropertyDetailsEpcModel - ).join( - PropertyDetailsEpcModel, - PropertyModel.id == PropertyDetailsEpcModel.property_id - ).filter( - PropertyModel.portfolio_id == portfolio_id - ).all() + properties_query = ( + session.query(PropertyModel, PropertyDetailsEpcModel) + .join( + PropertyDetailsEpcModel, + PropertyModel.id == PropertyDetailsEpcModel.property_id, + ) + .filter(PropertyModel.portfolio_id == portfolio_id) + .all() + ) properties_data = [ { - **{col.name: getattr(p.PropertyModel, col.name) - for col in PropertyModel.__table__.columns}, - **{col.name: getattr(p.PropertyDetailsEpcModel, col.name) - for col in PropertyDetailsEpcModel.__table__.columns}, + **{ + col.name: getattr(p.PropertyModel, col.name) + for col in PropertyModel.__table__.columns + }, + **{ + col.name: getattr(p.PropertyDetailsEpcModel, col.name) + for col in PropertyDetailsEpcModel.__table__.columns + }, } for p in properties_query ] @@ -45,12 +54,12 @@ def get_all_data(portfolio_id, scenario_ids): # -------------------- # Plans # -------------------- - plans_query = session.query(Plan).filter( - Plan.scenario_id.in_(scenario_ids) - ).all() + plans_query = ( + session.query(PlanModel).filter(PlanModel.scenario_id.in_(scenario_ids)).all() + ) plans_data = [ - {col.name: getattr(plan, col.name) for col in Plan.__table__.columns} + {col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns} for plan in plans_query ] @@ -59,25 +68,27 @@ def get_all_data(portfolio_id, scenario_ids): # -------------------- # Recommendations (NO materials yet) # -------------------- - recommendations_query = session.query( - Recommendation, - Plan.scenario_id - ).join( - PlanRecommendations, - Recommendation.id == PlanRecommendations.recommendation_id - ).join( - Plan, - Plan.id == PlanRecommendations.plan_id - ).filter( - PlanRecommendations.plan_id.in_(plan_ids), - ).all() + recommendations_query = ( + session.query(Recommendation, PlanModel.scenario_id) + .join( + PlanRecommendations, + Recommendation.id == PlanRecommendations.recommendation_id, + ) + .join(PlanModel, PlanModel.id == PlanRecommendations.plan_id) + .filter( + PlanRecommendations.plan_id.in_(plan_ids), + ) + .all() + ) recommendations_data = [ { - **{col.name: getattr(r.Recommendation, col.name) - for col in Recommendation.__table__.columns}, + **{ + col.name: getattr(r.Recommendation, col.name) + for col in Recommendation.__table__.columns + }, "scenario_id": r.scenario_id, - "materials": [] # placeholder + "materials": [], # placeholder } for r in recommendations_query ] @@ -131,7 +142,7 @@ recommendations_df = pd.read_csv( sustainability_data = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody " "- Data Extracts for Domna.xlsx", - sheet_name="Sustainability" + sheet_name="Sustainability", ) sustainability_data_with_sap = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/Parity Data " @@ -140,10 +151,16 @@ sustainability_data_with_sap = pd.read_excel( properties_df["uprn"] = properties_df["uprn"].astype(str) property_data_comparison = properties_df.merge( - sustainability_data, how="inner", left_on="uprn", right_on="UPRN", suffixes=("_prop", "_sust") + sustainability_data, + how="inner", + left_on="uprn", + right_on="UPRN", + suffixes=("_prop", "_sust"), ) -property_data_comparison["wall_type"] = property_data_comparison["walls"].str.split(",").str[0].str.strip() +property_data_comparison["wall_type"] = ( + property_data_comparison["walls"].str.split(",").str[0].str.strip() +) column_pairs = { "built_form": "Attachment", @@ -154,25 +171,28 @@ column_pairs = { combination_tables = {} for v1, v2 in column_pairs.items(): - df = property_data_comparison.groupby([v1, v2]).size().reset_index(name='count') + df = property_data_comparison.groupby([v1, v2]).size().reset_index(name="count") combination_tables[v1] = df # We just need all of the measure types, per property recommendation_measure_types = recommendations_df[ - ["property_id", "measure_type" - , "sap_points", "heat_demand", "kwh_savings", "co2_equivalent_savings", - "energy_cost_savings" - ] + [ + "property_id", + "measure_type", + "sap_points", + "heat_demand", + "kwh_savings", + "co2_equivalent_savings", + "energy_cost_savings", + ] ].drop_duplicates() recommendation_measure_types["flag"] = True # We pivot -recommendations_measures_pivot = recommendation_measure_types[ - ["property_id", "measure_type", "flag"] -].drop_duplicates().pivot( - index='property_id', - columns='measure_type', - values='flag' +recommendations_measures_pivot = ( + recommendation_measure_types[["property_id", "measure_type", "flag"]] + .drop_duplicates() + .pivot(index="property_id", columns="measure_type", values="flag") ) recommendations_measures_pivot = recommendations_measures_pivot.reset_index() @@ -180,137 +200,157 @@ properties_to_recs = properties_df.rename(columns={"solar_pv": "solar_data"}).me recommendations_measures_pivot, how="left", on="property_id" ) -sustainability_data["cavity_wall_insulation"] = sustainability_data["Wall Insulation"].isin( - ["FilledCavity", "FilledCavityPlusInternal", "FilledCavityPlusExternal"] -) -sustainability_data["internal_wall_insulation"] = sustainability_data["Wall Insulation"].isin( - ["Internal", "FilledCavityPlusInternal"] -) -sustainability_data["external_wall_insulation"] = sustainability_data["Wall Insulation"].isin( - ["External", "FilledCavityPlusExternal"] -) +sustainability_data["cavity_wall_insulation"] = sustainability_data[ + "Wall Insulation" +].isin(["FilledCavity", "FilledCavityPlusInternal", "FilledCavityPlusExternal"]) +sustainability_data["internal_wall_insulation"] = sustainability_data[ + "Wall Insulation" +].isin(["Internal", "FilledCavityPlusInternal"]) +sustainability_data["external_wall_insulation"] = sustainability_data[ + "Wall Insulation" +].isin(["External", "FilledCavityPlusExternal"]) sustainability_data["loft_insulation"] = sustainability_data["Roof Insulation"].isin( ["mm300", "mm250", "mm350", "mm400", "mm270"] ) sustainability_data["double_glazing"] = sustainability_data["Glazing"].isin( - ["Double 2002 or later", "Double but age unknown", "Triple", "DoubleKnownData", "Secondary", "TripleKnownData"] + [ + "Double 2002 or later", + "Double but age unknown", + "Triple", + "DoubleKnownData", + "Secondary", + "TripleKnownData", + ] ) sustainability_data["secondary_glazing"] = sustainability_data["Glazing"].isin( ["Secondary"] ) -sustainability_data["suspended_floor_insulation"] = sustainability_data["Floor Insulation"].isin( - ["RetroFitted"] +sustainability_data["suspended_floor_insulation"] = sustainability_data[ + "Floor Insulation" +].isin(["RetroFitted"]) + +sustainability_data["boiler_upgrade"] = sustainability_data["Heating"].isin( + ["Boilers"] +) & sustainability_data["Boiler Efficiency"].isin(["A"]) +sustainability_data["air_source_heat_pump"] = sustainability_data["Heating"].isin( + ["Heat pumps (wet)"] ) -sustainability_data["boiler_upgrade"] = ( - sustainability_data["Heating"].isin(["Boilers"]) & sustainability_data["Boiler Efficiency"].isin(["A"]) -) -sustainability_data["air_source_heat_pump"] = (sustainability_data["Heating"].isin(["Heat pumps (wet)"])) +sustainability_data["time_temperature_zone_control"] = sustainability_data[ + "Controls Adequacy" +].isin(["Top Spec"]) -sustainability_data["time_temperature_zone_control"] = ( - sustainability_data["Controls Adequacy"].isin(["Top Spec"]) -) - -sustainability_data["roomstat_programmer_trvs"] = ( - sustainability_data["Controls Adequacy"].isin(["Optimal"]) -) +sustainability_data["roomstat_programmer_trvs"] = sustainability_data[ + "Controls Adequacy" +].isin(["Optimal"]) sustainability_data["flat_roof_insulation"] = ( - (sustainability_data["Roof Construction"] == "Flat") & - (sustainability_data["Roof Insulation"].isin(["mm50", "mm150", "mm100"])) -) + sustainability_data["Roof Construction"] == "Flat" +) & (sustainability_data["Roof Insulation"].isin(["mm50", "mm150", "mm100"])) properties_to_recs["uprn"] = properties_to_recs["uprn"].astype(str) comparison = sustainability_data.merge( properties_to_recs[ - ["uprn", "cavity_wall_insulation", "external_wall_insulation", "internal_wall_insulation", "loft_insulation", - "double_glazing", "secondary_glazing", "suspended_floor_insulation", "boiler_upgrade", "air_source_heat_pump", - "time_temperature_zone_control", "roomstat_programmer_trvs", "flat_roof_insulation", "room_roof_insulation" - ] + [ + "uprn", + "cavity_wall_insulation", + "external_wall_insulation", + "internal_wall_insulation", + "loft_insulation", + "double_glazing", + "secondary_glazing", + "suspended_floor_insulation", + "boiler_upgrade", + "air_source_heat_pump", + "time_temperature_zone_control", + "roomstat_programmer_trvs", + "flat_roof_insulation", + "room_roof_insulation", + ] ], left_on="UPRN", right_on="uprn", how="left", - suffixes=("", "_from_recs") + suffixes=("", "_from_recs"), ) # Flag entries where we've been told that walls are already insulated, but we have recommendations for wall insulation # ------------ Walls ------------ cwi_conflicting = comparison[ - (comparison["cavity_wall_insulation"]) & - (pd.isnull(comparison["cavity_wall_insulation_from_recs"]) == False) - ].copy() + (comparison["cavity_wall_insulation"]) + & (pd.isnull(comparison["cavity_wall_insulation_from_recs"]) == False) +].copy() cwi_conflicting["conflict_cavity_wall_insulation"] = True iwi_conflicting = comparison[ - (comparison["internal_wall_insulation"]) & - (pd.isnull(comparison["internal_wall_insulation_from_recs"]) == False) - ].copy() + (comparison["internal_wall_insulation"]) + & (pd.isnull(comparison["internal_wall_insulation_from_recs"]) == False) +].copy() iwi_conflicting["conflict_iwi_wall_insulation"] = True ewi_conflicting = comparison[ - (comparison["external_wall_insulation"]) & - (pd.isnull(comparison["external_wall_insulation_from_recs"]) == False) - ].copy() + (comparison["external_wall_insulation"]) + & (pd.isnull(comparison["external_wall_insulation_from_recs"]) == False) +].copy() ewi_conflicting["conflict_ewi_wall_insulation"] = True # ------------ Roof ------------ loft_conflicting = comparison[ - (comparison["loft_insulation"]) & - (pd.isnull(comparison["loft_insulation_from_recs"]) == False) - ].copy() + (comparison["loft_insulation"]) + & (pd.isnull(comparison["loft_insulation_from_recs"]) == False) +].copy() loft_conflicting["conflict_loft_insulation"] = True # ------------ Windows ------------ double_glazing_conflicting = comparison[ - (comparison["double_glazing"] | comparison["secondary_glazing"]) & - (pd.isnull(comparison["double_glazing_from_recs"]) == False) & - (pd.isnull(comparison["secondary_glazing_from_recs"]) == True) - ].copy() + (comparison["double_glazing"] | comparison["secondary_glazing"]) + & (pd.isnull(comparison["double_glazing_from_recs"]) == False) + & (pd.isnull(comparison["secondary_glazing_from_recs"]) == True) +].copy() double_glazing_conflicting["conflict_double_glazing"] = True secondary_glazing_conflicting = comparison[ - (comparison["secondary_glazing"]) & - (pd.isnull(comparison["secondary_glazing_from_recs"]) == False) - ].copy() + (comparison["secondary_glazing"]) + & (pd.isnull(comparison["secondary_glazing_from_recs"]) == False) +].copy() secondary_glazing_conflicting["conflict_secondary_glazing"] = True # ------------ Floors ------------ floors_conflicting = comparison[ - (comparison["suspended_floor_insulation"]) & - (pd.isnull(comparison["suspended_floor_insulation_from_recs"]) == False) - ].copy() + (comparison["suspended_floor_insulation"]) + & (pd.isnull(comparison["suspended_floor_insulation_from_recs"]) == False) +].copy() floors_conflicting["conflict_suspended_floor_insulation"] = True # ------------ Boiler Upgrade ------------ boiler_conflicting = comparison[ - (comparison["boiler_upgrade"]) & - (pd.isnull(comparison["boiler_upgrade_from_recs"]) == False) - ].copy() + (comparison["boiler_upgrade"]) + & (pd.isnull(comparison["boiler_upgrade_from_recs"]) == False) +].copy() boiler_conflicting["conflict_boiler_upgrade"] = True # ------------ ASHP ------------ ashp_conflicting = comparison[ - (comparison["air_source_heat_pump"]) & - (pd.isnull(comparison["air_source_heat_pump_from_recs"]) == False) - ].copy() + (comparison["air_source_heat_pump"]) + & (pd.isnull(comparison["air_source_heat_pump_from_recs"]) == False) +].copy() ashp_conflicting["conflict_air_source_heat_pump"] = True # ------------ heat controls ------------ ttzc_conflicting = comparison[ - (comparison["time_temperature_zone_control"]) & - (pd.isnull(comparison["time_temperature_zone_control_from_recs"]) == False) - ].copy() + (comparison["time_temperature_zone_control"]) + & (pd.isnull(comparison["time_temperature_zone_control_from_recs"]) == False) +].copy() ttzc_conflicting["conflict_time_temperature_zone_control"] = True rst_conflicting = comparison[ - (comparison["roomstat_programmer_trvs"]) & - (pd.isnull(comparison["roomstat_programmer_trvs_from_recs"]) == False) - ].copy() + (comparison["roomstat_programmer_trvs"]) + & (pd.isnull(comparison["roomstat_programmer_trvs_from_recs"]) == False) +].copy() rst_conflicting["conflict_roomstat_programmer_trvs"] = True # ------------ Flat Roof Insulation ----------- flat_roof_conflicting = comparison[ - (comparison["flat_roof_insulation"]) & - (pd.isnull(comparison["flat_roof_insulation_from_recs"]) == False) - ].copy() + (comparison["flat_roof_insulation"]) + & (pd.isnull(comparison["flat_roof_insulation_from_recs"]) == False) +].copy() flat_roof_conflicting["conflict_flat_roof_insulation"] = True # All properties with conflicts @@ -327,22 +367,26 @@ all_conflicts = pd.concat( ashp_conflicting, ttzc_conflicting, rst_conflicting, - flat_roof_conflicting + flat_roof_conflicting, ] ) all_conflicts = all_conflicts[ [ "uprn", - 'conflict_cavity_wall_insulation', - 'conflict_iwi_wall_insulation', - 'conflict_ewi_wall_insulation', - 'conflict_loft_insulation', - 'conflict_double_glazing', - 'conflict_secondary_glazing', - 'conflict_suspended_floor_insulation', 'conflict_boiler_upgrade', - 'conflict_air_source_heat_pump', - 'conflict_time_temperature_zone_control', 'conflict_roomstat_programmer_trvs', 'conflict_flat_roof_insulation'] + "conflict_cavity_wall_insulation", + "conflict_iwi_wall_insulation", + "conflict_ewi_wall_insulation", + "conflict_loft_insulation", + "conflict_double_glazing", + "conflict_secondary_glazing", + "conflict_suspended_floor_insulation", + "conflict_boiler_upgrade", + "conflict_air_source_heat_pump", + "conflict_time_temperature_zone_control", + "conflict_roomstat_programmer_trvs", + "conflict_flat_roof_insulation", + ] ] all_conflicts = all_conflicts.rename( @@ -358,31 +402,29 @@ all_conflicts = all_conflicts.rename( "conflict_air_source_heat_pump": "air_source_heat_pump", "conflict_time_temperature_zone_control": "time_temperature_zone_control", "conflict_roomstat_programmer_trvs": "roomstat_programmer_trvs", - "conflict_flat_roof_insulation": "flat_roof_insulation" - + "conflict_flat_roof_insulation": "flat_roof_insulation", } ) # Reshape by UPRN by melting all_conflicts = all_conflicts.melt( - id_vars=["uprn"], - var_name="measure_type", - value_name="already_installed" + id_vars=["uprn"], var_name="measure_type", value_name="already_installed" ) -recommendations_df["property_id"] = recommendations_df["property_id"].astype(int).astype(str) +recommendations_df["property_id"] = ( + recommendations_df["property_id"].astype(int).astype(str) +) properties_df["property_id"] = properties_df["property_id"].astype(int).astype(str) recs_with_uprn = recommendations_df.merge( properties_df[["property_id", "uprn"]], on="property_id", how="left", - suffixes=("", "_prop") + suffixes=("", "_prop"), ) recs_with_uprn = ( - recs_with_uprn - .sort_values("sap_points", ascending=False) + recs_with_uprn.sort_values("sap_points", ascending=False) .groupby(["uprn", "measure_type"], as_index=False) .first() ) @@ -390,13 +432,24 @@ recs_with_uprn = ( recs_with_uprn["uprn"] = recs_with_uprn["uprn"].astype(str) installed_measures_df = all_conflicts.merge( - recs_with_uprn[["uprn", "measure_type", "sap_points", "heat_demand", "kwh_savings", "co2_equivalent_savings", - "energy_cost_savings"]], + recs_with_uprn[ + [ + "uprn", + "measure_type", + "sap_points", + "heat_demand", + "kwh_savings", + "co2_equivalent_savings", + "energy_cost_savings", + ] + ], how="left", - on=["uprn", "measure_type"] + on=["uprn", "measure_type"], ) -installed_measures_df = installed_measures_df[installed_measures_df["already_installed"] == True] +installed_measures_df = installed_measures_df[ + installed_measures_df["already_installed"] == True +] ## --- Sense checking ---- @@ -423,27 +476,26 @@ def add_mechanical_ventilation_for_fabric(installed_measures_df, recs_with_uprn) recs_with_uprn[ (recs_with_uprn["measure_type"] == "mechanical_ventilation") & (recs_with_uprn["uprn"].isin(fabric_uprns)) - ] + ] .sort_values("sap_points", ascending=False) .drop_duplicates(subset=["uprn"]) ) - mv_installed = mv_recs[[ - "uprn", - "measure_type", - "sap_points", - "heat_demand", - "kwh_savings", - "co2_equivalent_savings", - "energy_cost_savings", - ]].copy() + mv_installed = mv_recs[ + [ + "uprn", + "measure_type", + "sap_points", + "heat_demand", + "kwh_savings", + "co2_equivalent_savings", + "energy_cost_savings", + ] + ].copy() mv_installed["already_installed"] = True - return pd.concat( - [installed_measures_df, mv_installed], - ignore_index=True - ) + return pd.concat([installed_measures_df, mv_installed], ignore_index=True) # installed_measures_df = add_mechanical_ventilation_for_fabric( @@ -453,24 +505,39 @@ def add_mechanical_ventilation_for_fabric(installed_measures_df, recs_with_uprn) assert installed_measures_df[["uprn", "measure_type"]].duplicated().sum() == 0 -for col in ["sap_points", "heat_demand", "kwh_savings", "co2_equivalent_savings", "energy_cost_savings"]: - print(f"n missings for {col}: {pd.isnull(installed_measures_df[col]).sum()}", ) +for col in [ + "sap_points", + "heat_demand", + "kwh_savings", + "co2_equivalent_savings", + "energy_cost_savings", +]: + print( + f"n missings for {col}: {pd.isnull(installed_measures_df[col]).sum()}", + ) # Do some calcs on SAP impact sap_impact = installed_measures_df.groupby(["uprn"])["sap_points"].sum().reset_index() -properties_sap = properties_df[["uprn", "current_sap_points", "current_epc_rating"]].copy() +properties_sap = properties_df[ + ["uprn", "current_sap_points", "current_epc_rating"] +].copy() properties_sap["uprn"] = properties_sap["uprn"].astype(str) -old_sap_vs_new = properties_sap.merge( - sap_impact, how="inner", on="uprn" +old_sap_vs_new = properties_sap.merge(sap_impact, how="inner", on="uprn") +old_sap_vs_new["new_sap_points"] = ( + old_sap_vs_new["current_sap_points"] + old_sap_vs_new["sap_points"] +) +old_sap_vs_new["new_epc_rating"] = old_sap_vs_new["new_sap_points"].apply( + lambda x: sap_to_epc(x) ) -old_sap_vs_new["new_sap_points"] = old_sap_vs_new["current_sap_points"] + old_sap_vs_new["sap_points"] -old_sap_vs_new["new_epc_rating"] = old_sap_vs_new["new_sap_points"].apply(lambda x: sap_to_epc(x)) # How many properties go from below C to above -old_sap_vs_new[old_sap_vs_new["current_sap_points"] < 69]["new_epc_rating"].value_counts() +old_sap_vs_new[old_sap_vs_new["current_sap_points"] < 69][ + "new_epc_rating" +].value_counts() changed = old_sap_vs_new[ - (old_sap_vs_new["current_sap_points"] < 69) & (old_sap_vs_new["new_sap_points"] >= 69) - ] + (old_sap_vs_new["current_sap_points"] < 69) + & (old_sap_vs_new["new_sap_points"] >= 69) +] # What do I need to do: # TODO: - need to get a view of "all" measures for the property, not just recommended. We can do this but just looking @@ -499,22 +566,38 @@ def bulk_insert_installed_measures(installed_measures_df): now = datetime.utcnow() for _, row in installed_measures_df.iterrows(): - records.append({ - "uprn": int(row["uprn"]), - "measure_type": row["measure_type"], - "installed_at": now, - "sap_points": float(row["sap_points"]) if pd.notna(row["sap_points"]) else None, - "carbon_savings": float(row["co2_equivalent_savings"]) if pd.notna(row["co2_equivalent_savings"]) else None, - "kwh_savings": float(row["kwh_savings"]) if pd.notna(row["kwh_savings"]) else None, - "bill_savings": float(row["energy_cost_savings"]) if pd.notna(row["energy_cost_savings"]) else None, - "heat_demand_savings": float(row["heat_demand"]) if pd.notna(row["heat_demand"]) else None, - "source": SOURCE, - "is_active": True, - }) + records.append( + { + "uprn": int(row["uprn"]), + "measure_type": row["measure_type"], + "installed_at": now, + "sap_points": ( + float(row["sap_points"]) if pd.notna(row["sap_points"]) else None + ), + "carbon_savings": ( + float(row["co2_equivalent_savings"]) + if pd.notna(row["co2_equivalent_savings"]) + else None + ), + "kwh_savings": ( + float(row["kwh_savings"]) if pd.notna(row["kwh_savings"]) else None + ), + "bill_savings": ( + float(row["energy_cost_savings"]) + if pd.notna(row["energy_cost_savings"]) + else None + ), + "heat_demand_savings": ( + float(row["heat_demand"]) if pd.notna(row["heat_demand"]) else None + ), + "source": SOURCE, + "is_active": True, + } + ) try: for i in range(0, len(records), BATCH_SIZE): - batch = records[i:i + BATCH_SIZE] + batch = records[i : i + BATCH_SIZE] session.bulk_insert_mappings(InstalledMeasure, batch) session.commit() print(f"✅ Inserted {i + len(batch)} / {len(records)}") @@ -580,9 +663,7 @@ def get_installed_measure_adjustments_by_uprn_for_portfolio( def exclude_ventilation(column): return case( ( - InstalledMeasure.measure_type.notin_( - REBASING_EXCLUDED_MEASURES - ), + InstalledMeasure.measure_type.notin_(REBASING_EXCLUDED_MEASURES), column, ), else_=0.0, @@ -594,33 +675,24 @@ def get_installed_measure_adjustments_by_uprn_for_portfolio( rows = ( session.query( InstalledMeasure.uprn.label("uprn"), - func.coalesce( func.sum(exclude_ventilation(InstalledMeasure.sap_points)), 0.0, ).label("sap_points"), - func.coalesce( func.sum(exclude_ventilation(InstalledMeasure.carbon_savings)), 0.0, ).label("co2"), - func.coalesce( func.sum(exclude_ventilation(InstalledMeasure.kwh_savings)), 0.0, ).label("energy_kwh"), - func.coalesce( func.sum(exclude_ventilation(InstalledMeasure.bill_savings)), 0.0, ).label("energy_bill"), - func.coalesce( - func.sum( - exclude_ventilation( - InstalledMeasure.heat_demand_savings - ) - ), + func.sum(exclude_ventilation(InstalledMeasure.heat_demand_savings)), 0.0, ).label("heat_demand"), ) @@ -657,16 +729,14 @@ def get_installed_measure_types_by_uprn( ) # Convert enums → strings - return { - r[0].value if hasattr(r[0], "value") else r[0] - for r in rows - } + return {r[0].value if hasattr(r[0], "value") else r[0] for r in rows} # ------------------------------------------------------------ # PROPERTY REBASING (READ-ONLY) # ------------------------------------------------------------ + def compute_property_sap_updates( properties: List[PropertyModel], sap_adjustments: Dict[int, float], # keyed by uprn @@ -692,14 +762,16 @@ def compute_property_sap_updates( sap_delta = sap_adjustments[prop.uprn] new_sap = prop.original_sap_points + sap_delta - updates.append({ - "property_id": prop.id, - "uprn": prop.uprn, - "original_sap_points": prop.original_sap_points, - "installed_sap_delta": sap_delta, - "new_sap_points": new_sap, - "is_adjusted": True, - }) + updates.append( + { + "property_id": prop.id, + "uprn": prop.uprn, + "original_sap_points": prop.original_sap_points, + "installed_sap_delta": sap_delta, + "new_sap_points": new_sap, + "is_adjusted": True, + } + ) return updates @@ -708,6 +780,7 @@ def compute_property_sap_updates( # PLAN RECOMPUTATION HELPERS # ------------------------------------------------------------ + def get_effective_plan_recommendations( session, plan_id: int, excluded_measure_types: Set[str] ) -> List[Recommendation]: @@ -715,11 +788,10 @@ def get_effective_plan_recommendations( session.query(Recommendation) .join(PlanRecommendations) .filter(PlanRecommendations.plan_id == plan_id) - .filter(Recommendation.default.is_(True))) + .filter(Recommendation.default.is_(True)) + ) if excluded_measure_types: - q = q.filter( - ~Recommendation.measure_type.in_(excluded_measure_types) - ) + q = q.filter(~Recommendation.measure_type.in_(excluded_measure_types)) return q.all() @@ -791,7 +863,11 @@ def get_installed_measure_types_by_property_id_for_portfolio( installed_by_property[property_id].add(mt) # drag-along rules - if mt in {"cavity_wall_insulation", "internal_wall_insulation", "external_wall_insulation"}: + if mt in { + "cavity_wall_insulation", + "internal_wall_insulation", + "external_wall_insulation", + }: installed_by_property[property_id].add("mechanical_ventilation") return installed_by_property @@ -810,7 +886,9 @@ def get_all_default_plan_recommendations( PlanRecommendations.plan_id, Recommendation, ) - .join(Recommendation, Recommendation.id == PlanRecommendations.recommendation_id) + .join( + Recommendation, Recommendation.id == PlanRecommendations.recommendation_id + ) .filter(PlanRecommendations.plan_id.in_(plan_ids)) .filter(Recommendation.default.is_(True)) .all() @@ -835,9 +913,14 @@ def filter_remaining_recommendations( return recommendations return [ - r for r in recommendations + r + for r in recommendations if ( - (r.measure_type.value if hasattr(r.measure_type, "value") else r.measure_type) + ( + r.measure_type.value + if hasattr(r.measure_type, "value") + else r.measure_type + ) not in installed_types ) ] @@ -845,11 +928,11 @@ def filter_remaining_recommendations( def compute_plan_updates( session, - plans: List[Plan], + plans: List[PlanModel], properties_by_id: Dict[int, PropertyModel], epcs_by_property_id: Dict[int, PropertyDetailsEpcModel], installed_types_by_property_id, - all_ventilation_measures + all_ventilation_measures, ) -> List[dict]: """ Computes plan metrics after marking some recommendations as already installed. @@ -921,39 +1004,34 @@ def compute_plan_updates( # ): # continue - updates.append({ - "plan_id": plan.id, - "property_id": plan.property_id, - - # SAP / EPC - "post_sap_points": post_sap, - "post_epc_rating": sap_to_epc(post_sap), - - # Carbon - "co2_savings": remaining["co2_savings"], - "post_co2_emissions": post_co2, - - # Energy bills - "energy_bill_savings": remaining["energy_bill_savings"], - "post_energy_bill": post_bill, - - # Energy consumption - "energy_consumption_savings": remaining["energy_consumption_savings"], - "post_energy_consumption": post_kwh, - - # Valuation (safe) - "valuation_increase": remaining["valuation_increase"], - "valuation_post_retrofit": ( - prop.current_valuation - + remaining["valuation_increase"] - if prop.current_valuation is not None - else None - ), - - # Costs - "cost_of_works": remaining["cost_of_works"], - "contingency_cost": remaining["contingency_cost"], - }) + updates.append( + { + "plan_id": plan.id, + "property_id": plan.property_id, + # SAP / EPC + "post_sap_points": post_sap, + "post_epc_rating": sap_to_epc(post_sap), + # Carbon + "co2_savings": remaining["co2_savings"], + "post_co2_emissions": post_co2, + # Energy bills + "energy_bill_savings": remaining["energy_bill_savings"], + "post_energy_bill": post_bill, + # Energy consumption + "energy_consumption_savings": remaining["energy_consumption_savings"], + "post_energy_consumption": post_kwh, + # Valuation (safe) + "valuation_increase": remaining["valuation_increase"], + "valuation_post_retrofit": ( + prop.current_valuation + remaining["valuation_increase"] + if prop.current_valuation is not None + else None + ), + # Costs + "cost_of_works": remaining["cost_of_works"], + "contingency_cost": remaining["contingency_cost"], + } + ) property_to_installed_types[prop.id] = installed_types @@ -1065,7 +1143,6 @@ def compute_epc_rebasing_updates( updates[property_id] = { "property_id": property_id, - # Originals (only set once) "original_co2_emissions": ( epc.original_co2_emissions @@ -1087,7 +1164,6 @@ def compute_epc_rebasing_updates( if epc.original_current_energy_demand_heating_hotwater is not None else epc.current_energy_demand_heating_hotwater ), - # Adjustments (always re-applied from originals) "installed_measures_co2_adjustment": adj["co2"], "installed_measures_energy_demand_adjustment": adj["energy_kwh"], @@ -1106,8 +1182,8 @@ def persist_plan_updates(plan_updates: list[dict]): with db_session() as session: plans = ( - session.query(Plan) - .filter(Plan.id.in_([u["plan_id"] for u in plan_updates])) + session.query(PlanModel) + .filter(PlanModel.id.in_([u["plan_id"] for u in plan_updates])) .all() ) @@ -1168,20 +1244,17 @@ def persist_epc_rebasing_updates( # Store originals once epc.original_co2_emissions = u["original_co2_emissions"] - epc.original_primary_energy_consumption = ( - u["original_primary_energy_consumption"] - ) - epc.original_current_energy_demand = ( - u["original_current_energy_demand"] - ) - epc.original_current_energy_demand_heating_hotwater = ( - u["original_current_energy_demand_heating_hotwater"] - ) + epc.original_primary_energy_consumption = u[ + "original_primary_energy_consumption" + ] + epc.original_current_energy_demand = u["original_current_energy_demand"] + epc.original_current_energy_demand_heating_hotwater = u[ + "original_current_energy_demand_heating_hotwater" + ] # Apply rebased values epc.co2_emissions = ( - u["original_co2_emissions"] - - u["installed_measures_co2_adjustment"] + u["original_co2_emissions"] - u["installed_measures_co2_adjustment"] ) epc.primary_energy_consumption = ( @@ -1195,18 +1268,18 @@ def persist_epc_rebasing_updates( ) # Flags + audit fields - epc.installed_measures_co2_adjustment = ( - u["installed_measures_co2_adjustment"] - ) - epc.installed_measures_energy_demand_adjustment = ( - u["installed_measures_energy_demand_adjustment"] - ) - epc.installed_measures_total_energy_bill_adjustment = ( - u["installed_measures_total_energy_bill_adjustment"] - ) - epc.installed_measures_heat_demand_adjustment = ( - u["installed_measures_heat_demand_adjustment"] - ) + epc.installed_measures_co2_adjustment = u[ + "installed_measures_co2_adjustment" + ] + epc.installed_measures_energy_demand_adjustment = u[ + "installed_measures_energy_demand_adjustment" + ] + epc.installed_measures_total_energy_bill_adjustment = u[ + "installed_measures_total_energy_bill_adjustment" + ] + epc.installed_measures_heat_demand_adjustment = u[ + "installed_measures_heat_demand_adjustment" + ] epc.is_epc_adjusted_for_installed_measures = True print(f"✅ Updated {len(epcs)} EPC records") @@ -1254,9 +1327,7 @@ def initialise_original_property_and_epc_values(portfolio_id: int): updated = True if epc.original_primary_energy_consumption is None: - epc.original_primary_energy_consumption = ( - epc.primary_energy_consumption - ) + epc.original_primary_energy_consumption = epc.primary_energy_consumption updated = True if epc.original_current_energy_demand is None: @@ -1314,21 +1385,19 @@ def get_installed_ventilation_adjustments_by_uprn_for_portfolio( rows = ( session.query( InstalledMeasure.uprn.label("uprn"), - - func.coalesce(func.sum(InstalledMeasure.sap_points), 0.0) - .label("sap_points"), - - func.coalesce(func.sum(InstalledMeasure.carbon_savings), 0.0) - .label("co2"), - - func.coalesce(func.sum(InstalledMeasure.kwh_savings), 0.0) - .label("energy_kwh"), - - func.coalesce(func.sum(InstalledMeasure.bill_savings), 0.0) - .label("energy_bill"), - - func.coalesce(func.sum(InstalledMeasure.heat_demand_savings), 0.0) - .label("heat_demand"), + func.coalesce(func.sum(InstalledMeasure.sap_points), 0.0).label( + "sap_points" + ), + func.coalesce(func.sum(InstalledMeasure.carbon_savings), 0.0).label("co2"), + func.coalesce(func.sum(InstalledMeasure.kwh_savings), 0.0).label( + "energy_kwh" + ), + func.coalesce(func.sum(InstalledMeasure.bill_savings), 0.0).label( + "energy_bill" + ), + func.coalesce(func.sum(InstalledMeasure.heat_demand_savings), 0.0).label( + "heat_demand" + ), ) .filter(InstalledMeasure.is_active.is_(True)) .filter(InstalledMeasure.measure_type == "mechanical_ventilation") @@ -1370,8 +1439,9 @@ def mark_recommendations_as_installed( stmt = ( update(Recommendation) .where( - tuple_(Recommendation.property_id, Recommendation.measure_type) - .in_(property_measure_pairs) + tuple_(Recommendation.property_id, Recommendation.measure_type).in_( + property_measure_pairs + ) ) .values(already_installed=True) ) @@ -1400,13 +1470,17 @@ with db_read_session() as session: .all() ) - all_ventilation_measures = get_installed_ventilation_adjustments_by_uprn_for_portfolio(session, PORTFOLIO_ID) - installed_types_by_property_id = get_installed_measure_types_by_property_id_for_portfolio(session, PORTFOLIO_ID) + all_ventilation_measures = ( + get_installed_ventilation_adjustments_by_uprn_for_portfolio( + session, PORTFOLIO_ID + ) + ) + installed_types_by_property_id = ( + get_installed_measure_types_by_property_id_for_portfolio(session, PORTFOLIO_ID) + ) plans = ( - session.query(Plan) - .filter(Plan.portfolio_id == PORTFOLIO_ID) - .all() + session.query(PlanModel).filter(PlanModel.portfolio_id == PORTFOLIO_ID).all() ) epcs = { @@ -1419,23 +1493,17 @@ with db_read_session() as session: ) } - installed_adjustments = ( - get_installed_measure_adjustments_by_uprn_for_portfolio( - session, - PORTFOLIO_ID, - ) + installed_adjustments = get_installed_measure_adjustments_by_uprn_for_portfolio( + session, + PORTFOLIO_ID, ) property_updates = compute_property_sap_updates( - properties, - {uprn: v["sap_points"] for uprn, v in installed_adjustments.items()} + properties, {uprn: v["sap_points"] for uprn, v in installed_adjustments.items()} ) properties_by_id = {p.id: p for p in properties} - property_updates_by_id = { - u["property_id"]: u - for u in property_updates - } + property_updates_by_id = {u["property_id"]: u for u in property_updates} epc_updates = compute_epc_rebasing_updates( epcs, @@ -1453,9 +1521,7 @@ with db_read_session() as session: ) # Used to mark recommendations - pairs = build_installed_recommendation_pairs( - installed_types_by_property_id - ) + pairs = build_installed_recommendation_pairs(installed_types_by_property_id) from copy import deepcopy @@ -1466,36 +1532,33 @@ for u in plan_updates_comparison: if not before: continue - u.update({ - # SAP - "before_sap_points": before.post_sap_points, - "after_sap_points": u["post_sap_points"], - - # Carbon - "before_post_co2_emissions": before.post_co2_emissions, - "after_post_co2_emissions": u["post_co2_emissions"], - - # Costs - "before_cost_of_works": before.cost_of_works, - "after_cost_of_works": u["cost_of_works"], - - "before_contingency_cost": before.contingency_cost, - "after_contingency_cost": u["contingency_cost"], - }) + u.update( + { + # SAP + "before_sap_points": before.post_sap_points, + "after_sap_points": u["post_sap_points"], + # Carbon + "before_post_co2_emissions": before.post_co2_emissions, + "after_post_co2_emissions": u["post_co2_emissions"], + # Costs + "before_cost_of_works": before.cost_of_works, + "after_cost_of_works": u["cost_of_works"], + "before_contingency_cost": before.contingency_cost, + "after_contingency_cost": u["contingency_cost"], + } + ) plan_updates_df = pd.DataFrame(plan_updates_comparison) plan_updates_df["delta_sap_points"] = ( - plan_updates_df["after_sap_points"] - - plan_updates_df["before_sap_points"] + plan_updates_df["after_sap_points"] - plan_updates_df["before_sap_points"] ) plan_updates_df["delta_carbon"] = ( plan_updates_df["after_post_co2_emissions"] - plan_updates_df["before_post_co2_emissions"] ) plan_updates_df["delta_cost_of_works"] = ( - plan_updates_df["after_cost_of_works"] - - plan_updates_df["before_cost_of_works"] + plan_updates_df["after_cost_of_works"] - plan_updates_df["before_cost_of_works"] ) plan_updates_df["delta_contingency_cost"] = ( plan_updates_df["after_contingency_cost"] @@ -1503,12 +1566,14 @@ plan_updates_df["delta_contingency_cost"] = ( ) # High-level sanity checks -summary = plan_updates_df[[ - "delta_sap_points", - "delta_carbon", - "delta_cost_of_works", - "delta_contingency_cost", -]].sum() +summary = plan_updates_df[ + [ + "delta_sap_points", + "delta_carbon", + "delta_cost_of_works", + "delta_contingency_cost", + ] +].sum() print(summary) @@ -1619,17 +1684,15 @@ def apply_appliance_carbon_to_plans( .all() ) - epc_by_property_id = { - e.property_id: e for e in epcs - } + epc_by_property_id = {e.property_id: e for e in epcs} # -------------------------------------------- # Load plans with post carbon # -------------------------------------------- plans = ( - session.query(Plan) - .filter(Plan.portfolio_id == portfolio_id) - .filter(Plan.post_co2_emissions.isnot(None)) + session.query(PlanModel) + .filter(PlanModel.portfolio_id == portfolio_id) + .filter(PlanModel.post_co2_emissions.isnot(None)) .all() ) @@ -1682,13 +1745,7 @@ def apply_appliance_carbon_to_plans( # Get all uprns for entries in already installed, from the database with db_read_session() as session: - db_uprns = { - str(r[0]) - for r in ( - session.query(InstalledMeasure.uprn) - .all() - ) - } + db_uprns = {str(r[0]) for r in (session.query(InstalledMeasure.uprn).all())} # What is the overlap of these properties and the properties in portfolo 430 sal_data = pd.read_excel( diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/h_reset_estimated_epcs.py b/etl/customers/peabody/Nov 2025 Consulting Project/h_reset_estimated_epcs.py index 67ff2c85..e3008f65 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/h_reset_estimated_epcs.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/h_reset_estimated_epcs.py @@ -3,31 +3,41 @@ from sqlalchemy.orm import Session from sqlalchemy import text, select from backend.app.db.connection import db_read_session from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel -from backend.app.db.models.recommendations import Plan +from backend.app.db.models.recommendations import PlanModel PORTFOLIO_ID = 435 with db_read_session() as session: # Get all properties from PropertyDetailsEpcModel, where estimated is True, for portfolio 419 - estimated_epcs = session.query(PropertyDetailsEpcModel).filter( - # PropertyDetailsEpcModel.estimated == True, - PropertyDetailsEpcModel.property_id.in_( - session.query(PropertyModel.id).filter(PropertyModel.portfolio_id == PORTFOLIO_ID) + estimated_epcs = ( + session.query(PropertyDetailsEpcModel) + .filter( + # PropertyDetailsEpcModel.estimated == True, + PropertyDetailsEpcModel.property_id.in_( + session.query(PropertyModel.id).filter( + PropertyModel.portfolio_id == PORTFOLIO_ID + ) + ) ) - ).all() + .all() + ) # Get the ids estimated_epc_ids = [epc.property_id for epc in estimated_epcs] # I want to get the UPRNS for these properties, from the property model with db_read_session() as session: - estimated_uprns = session.query(PropertyModel.uprn).filter( - PropertyModel.id.in_( - session.query(PropertyDetailsEpcModel.property_id).filter( - PropertyDetailsEpcModel.id.in_(estimated_epc_ids) + estimated_uprns = ( + session.query(PropertyModel.uprn) + .filter( + PropertyModel.id.in_( + session.query(PropertyDetailsEpcModel.property_id).filter( + PropertyDetailsEpcModel.id.in_(estimated_epc_ids) + ) ) ) - ).all() + .all() + ) estimated_uprns_list = [uprn for (uprn,) in estimated_uprns] @@ -35,16 +45,16 @@ with db_read_session() as session: sal_1 = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20251213 Model " "data.xlsx", - sheet_name="Standardised Asset List" + sheet_name="Standardised Asset List", ) sal_2 = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20260105 - additional " "UPRNS.xlsx", - sheet_name="Standardised Asset List" + sheet_name="Standardised Asset List", ) sal = pd.concat([sal_1, sal_2]) -sal = sal.drop_duplicates(subset=['epc_os_uprn']) +sal = sal.drop_duplicates(subset=["epc_os_uprn"]) estimated_to_refresh = sal[sal["epc_os_uprn"].isin(estimated_uprns_list)].copy() @@ -55,20 +65,24 @@ SCENARIOS = [ # 861, # EPC C, No EWI/IWI, No Solid Floor, ASHP 3.0 COP # 859, # EPC C - no solid floor, ashp 3.0 # 885, # EPC B - fabric first, no solid floor, ashp 3.0 - 908, 909, 910 + 908, + 909, + 910, ] # Get all plans, associated to these properties - the property IDs are in estimated_epc_ids with db_read_session() as session: result = session.execute( - select(Plan.id, Plan.property_id) - .where(Plan.property_id.in_(estimated_epc_ids)) + select(PlanModel.id, PlanModel.property_id).where( + PlanModel.property_id.in_(estimated_epc_ids) + ) ) plans = [ { "plan_id": row.id, "property_id": row.property_id, - } for row in result + } + for row in result ] df = pd.DataFrame(plans) @@ -96,12 +110,14 @@ def delete_plan_batch(session: Session, plan_ids: list[int]): # recommendation_materials # ---------------------------- session.execute( - text(""" + text( + """ DELETE FROM recommendation_materials rm USING plan_recommendations pr WHERE rm.recommendation_id = pr.recommendation_id AND pr.plan_id = ANY(:plan_ids) - """), + """ + ), params, ) @@ -109,10 +125,12 @@ def delete_plan_batch(session: Session, plan_ids: list[int]): # plan_recommendations # ---------------------------- session.execute( - text(""" + text( + """ DELETE FROM plan_recommendations WHERE plan_id = ANY(:plan_ids) - """), + """ + ), params, ) @@ -120,14 +138,16 @@ def delete_plan_batch(session: Session, plan_ids: list[int]): # recommendations (only those used by these plans) # ---------------------------- session.execute( - text(""" + text( + """ DELETE FROM recommendation r WHERE r.id IN ( SELECT DISTINCT recommendation_id FROM plan_recommendations WHERE plan_id = ANY(:plan_ids) ) - """), + """ + ), params, ) @@ -135,17 +155,21 @@ def delete_plan_batch(session: Session, plan_ids: list[int]): # plans LAST # ---------------------------- session.execute( - text(""" + text( + """ DELETE FROM plan WHERE id = ANY(:plan_ids) - """), + """ + ), params, ) # Store the SAL -filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20260101 " - "sal.xlsx") +filename = ( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20260101 " + "sal.xlsx" +) with pd.ExcelWriter(filename) as writer: sal.to_excel(writer, sheet_name="Standardised Asset List", index=False) @@ -164,34 +188,36 @@ with pd.ExcelWriter(filename) as writer: b1 = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 " "sal.xlsx", - sheet_name="batch 1" + sheet_name="batch 1", ) b2 = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 " "sal.xlsx", - sheet_name="batch 2" + sheet_name="batch 2", ) b3 = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 " "sal.xlsx", - sheet_name="batch 3" + sheet_name="batch 3", ) b4 = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 " "sal.xlsx", - sheet_name="batch 4" + sheet_name="batch 4", ) b5 = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 " "sal.xlsx", - sheet_name="batch 5" + sheet_name="batch 5", ) # Batch 6 should be the remaining total = pd.concat([b1, b2, b3, b4, b5]) remaining = sal[~sal["epc_os_uprn"].isin(total["epc_os_uprn"].values)] # Create new output -filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/" - "20260107 corrected batch 6 sal.xlsx") +filename = ( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/" + "20260107 corrected batch 6 sal.xlsx" +) with pd.ExcelWriter(filename) as writer: sal.to_excel(writer, sheet_name="Standardised Asset List", index=False) @@ -206,6 +232,4 @@ with pd.ExcelWriter(filename) as writer: b5.to_excel(writer, sheet_name="batch 5", index=False) remaining.to_excel(writer, sheet_name="batch 6", index=False) -all_together = pd.concat( - [b1, b2, b3, b4, b5, remaining] -) +all_together = pd.concat([b1, b2, b3, b4, b5, remaining]) diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/k_deck_stats.py b/etl/customers/peabody/Nov 2025 Consulting Project/k_deck_stats.py index 68655e80..0ec34e7c 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/k_deck_stats.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/k_deck_stats.py @@ -110,14 +110,17 @@ import pandas as pd # Solar PV savings - we need the amount of solar PV bill savings from sqlalchemy.orm import sessionmaker from backend.app.db.connection import db_engine -from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations, RecommendationMaterials +from backend.app.db.models.recommendations import ( + Recommendation, + PlanModel, + PlanRecommendations, + RecommendationMaterials, +) from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel from collections import defaultdict PORTFOLIO_ID = 485 # Peabody -SCENARIOS = [ - 970 -] +SCENARIOS = [970] scenario_names = { 970: "EPC C - no solid floor, ashp 3.0", } @@ -130,22 +133,26 @@ def get_data(portfolio_id, scenario_ids): # -------------------- # Properties # -------------------- - properties_query = session.query( - PropertyModel, - PropertyDetailsEpcModel - ).join( - PropertyDetailsEpcModel, - PropertyModel.id == PropertyDetailsEpcModel.property_id - ).filter( - PropertyModel.portfolio_id == portfolio_id - ).all() + properties_query = ( + session.query(PropertyModel, PropertyDetailsEpcModel) + .join( + PropertyDetailsEpcModel, + PropertyModel.id == PropertyDetailsEpcModel.property_id, + ) + .filter(PropertyModel.portfolio_id == portfolio_id) + .all() + ) properties_data = [ { - **{col.name: getattr(p.PropertyModel, col.name) - for col in PropertyModel.__table__.columns}, - **{col.name: getattr(p.PropertyDetailsEpcModel, col.name) - for col in PropertyDetailsEpcModel.__table__.columns}, + **{ + col.name: getattr(p.PropertyModel, col.name) + for col in PropertyModel.__table__.columns + }, + **{ + col.name: getattr(p.PropertyDetailsEpcModel, col.name) + for col in PropertyDetailsEpcModel.__table__.columns + }, } for p in properties_query ] @@ -153,12 +160,12 @@ def get_data(portfolio_id, scenario_ids): # -------------------- # Plans # -------------------- - plans_query = session.query(Plan).filter( - Plan.scenario_id.in_(scenario_ids) - ).all() + plans_query = ( + session.query(PlanModel).filter(PlanModel.scenario_id.in_(scenario_ids)).all() + ) plans_data = [ - {col.name: getattr(plan, col.name) for col in Plan.__table__.columns} + {col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns} for plan in plans_query ] @@ -167,27 +174,29 @@ def get_data(portfolio_id, scenario_ids): # -------------------- # Recommendations (NO materials yet) # -------------------- - recommendations_query = session.query( - Recommendation, - Plan.scenario_id - ).join( - PlanRecommendations, - Recommendation.id == PlanRecommendations.recommendation_id - ).join( - Plan, - Plan.id == PlanRecommendations.plan_id - ).filter( - PlanRecommendations.plan_id.in_(plan_ids), - Recommendation.default.is_(True), - Recommendation.already_installed.is_(False) - ).all() + recommendations_query = ( + session.query(Recommendation, PlanModel.scenario_id) + .join( + PlanRecommendations, + Recommendation.id == PlanRecommendations.recommendation_id, + ) + .join(PlanModel, PlanModel.id == PlanRecommendations.plan_id) + .filter( + PlanRecommendations.plan_id.in_(plan_ids), + Recommendation.default.is_(True), + Recommendation.already_installed.is_(False), + ) + .all() + ) recommendations_data = [ { - **{col.name: getattr(r.Recommendation, col.name) - for col in Recommendation.__table__.columns}, + **{ + col.name: getattr(r.Recommendation, col.name) + for col in Recommendation.__table__.columns + }, "scenario_id": r.scenario_id, - "materials": [] # placeholder + "materials": [], # placeholder } for r in recommendations_query ] @@ -197,23 +206,25 @@ def get_data(portfolio_id, scenario_ids): # -------------------- # Recommendation materials (SEPARATE QUERY) # -------------------- - materials_query = session.query( - RecommendationMaterials - ).filter( - RecommendationMaterials.recommendation_id.in_(recommendation_ids) - ).all() + materials_query = ( + session.query(RecommendationMaterials) + .filter(RecommendationMaterials.recommendation_id.in_(recommendation_ids)) + .all() + ) # Group materials by recommendation_id materials_by_recommendation = defaultdict(list) for m in materials_query: - materials_by_recommendation[m.recommendation_id].append({ - "material_id": m.material_id, - "depth": m.depth, - "quantity": m.quantity, - "quantity_unit": m.quantity_unit, - "estimated_cost": m.estimated_cost, - }) + materials_by_recommendation[m.recommendation_id].append( + { + "material_id": m.material_id, + "depth": m.depth, + "quantity": m.quantity, + "quantity_unit": m.quantity_unit, + "estimated_cost": m.estimated_cost, + } + ) # Attach materials safely (no filtering side effects) for r in recommendations_data: @@ -236,12 +247,11 @@ with pd.ExcelWriter("hackney.xlsx", engine="openpyxl") as writer: recommendations_df.to_excel(writer, sheet_name="recommendations", index=False) properties_df.to_excel(writer, sheet_name="properties", index=False) - + # solar_pv_recommendations = recommendations_df[recommendations_df["measure_type"] == "solar_pv"] # average_savings = solar_pv_recommendations.groupby("scenario_id")["energy_cost_savings"].mean().reset_index() - # # Check tenures # initial_asset_data = pd.read_excel( # "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody " diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/m_reduced_sample_revised.py b/etl/customers/peabody/Nov 2025 Consulting Project/m_reduced_sample_revised.py index a18dc315..b7010cf7 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/m_reduced_sample_revised.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/m_reduced_sample_revised.py @@ -4,7 +4,7 @@ import pandas as pd full_sal = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final " "SAL/Depracated/20260107 corrected batch 6 sal.xlsx", - sheet_name="Standardised Asset List" + sheet_name="Standardised Asset List", ) # ------Pull in the reduced sample ------ @@ -12,7 +12,7 @@ full_sal = pd.read_excel( reduced_sal = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260112 - " "ownership filtered sal.xlsx", - sheet_name="Standardised Asset List" + sheet_name="Standardised Asset List", ) # ------ Pull in the confirmed ownership column from Peabody ------ @@ -20,18 +20,20 @@ new_asset_data = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/2025_11_11 " "- Peabody " "- Data Extracts for Domna v2.xlsx", - sheet_name="Properties" + sheet_name="Properties", ) correct_sample = new_asset_data[ ~new_asset_data["AH Tenure"].isin( - ["Commercial", - "Freeholder", - "HOMEBUY / EQUITY LOAN", - "Leaseholder", - "Outright Sale", - "SHARED EQUITY", - "Shared Ownership"] + [ + "Commercial", + "Freeholder", + "HOMEBUY / EQUITY LOAN", + "Leaseholder", + "Outright Sale", + "SHARED EQUITY", + "Shared Ownership", + ] ) ].copy() @@ -41,9 +43,7 @@ stuff_to_add = correct_sample[ ~correct_sample["UPRN"].isin(reduced_sal["landlord_property_id"].values) ]["UPRN"].values -sal_to_add = full_sal[ - full_sal["domna_property_id"].isin(stuff_to_add) -].copy() +sal_to_add = full_sal[full_sal["domna_property_id"].isin(stuff_to_add)].copy() # ------- Stuff to remove ------- stuff_to_remove = reduced_sal[ @@ -88,7 +88,7 @@ from backend.app.db.models.portfolio import PropertyModel from backend.app.db.connection import db_session, db_read_session from sqlalchemy import select, func from sqlalchemy.orm import Session -from backend.app.db.models.recommendations import Plan +from backend.app.db.models.recommendations import PlanModel uprns_to_be_deleted = to_delete["epc_os_uprn"].values.tolist() diff --git a/etl/customers/slide_utils.py b/etl/customers/slide_utils.py index 9170ab17..5e027a56 100644 --- a/etl/customers/slide_utils.py +++ b/etl/customers/slide_utils.py @@ -7,7 +7,7 @@ from sqlalchemy.sql import true from backend.app.db.utils import row2dict from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel from backend.app.db.models.recommendations import Recommendation -from backend.app.db.models.recommendations import Plan +from backend.app.db.models.recommendations import PlanModel from backend.app.utils import sap_to_epc EPC_COLOURS = { @@ -17,7 +17,7 @@ EPC_COLOURS = { "D": "#fdd401", "E": "#fdab67", "F": "#ee8023", - "G": "#e71437" + "G": "#e71437", } @@ -33,22 +33,27 @@ def get_properties_with_default_recommendations(session: Session, portfolio_id: its associated default recommendations if any. """ # Adjust the join to correctly filter recommendations while including all properties - query = session.query(PropertyModel, Recommendation).outerjoin(Recommendation, - (Recommendation.property_id == PropertyModel.id) & ( - Recommendation.default == true())) \ - .filter(PropertyModel.portfolio_id == portfolio_id) \ + query = ( + session.query(PropertyModel, Recommendation) + .outerjoin( + Recommendation, + (Recommendation.property_id == PropertyModel.id) + & (Recommendation.default == true()), + ) + .filter(PropertyModel.portfolio_id == portfolio_id) .all() + ) properties = {} for property, recommendation in query: # Ensure the property is added once with an empty list of recommendations initially if property.id not in properties: properties[property.id] = row2dict(property) - properties[property.id]['recommendations'] = [] + properties[property.id]["recommendations"] = [] # Append recommendations if they exist and meet the criteria (already filtered by the query) if recommendation and recommendation.default: - properties[property.id]['recommendations'].append(row2dict(recommendation)) + properties[property.id]["recommendations"].append(row2dict(recommendation)) return list(properties.values()) @@ -62,11 +67,16 @@ def get_property_details_by_portfolio_id(session: Session, portfolio_id: int): :return: A list of dictionaries, where each dictionary represents a property's details. Returns an empty list if no property details are found. """ - property_details = session.query(PropertyDetailsEpcModel).filter( - PropertyDetailsEpcModel.portfolio_id == portfolio_id).all() + property_details = ( + session.query(PropertyDetailsEpcModel) + .filter(PropertyDetailsEpcModel.portfolio_id == portfolio_id) + .all() + ) # Convert the SQLAlchemy objects to dictionaries - property_details_dict = [row2dict(pd) for pd in property_details] if property_details else [] + property_details_dict = ( + [row2dict(pd) for pd in property_details] if property_details else [] + ) return property_details_dict @@ -80,7 +90,9 @@ def get_plan_by_portfolio_id(session: Session, portfolio_id: int): :return: A list of dictionaries, where each dictionary represents a plan. Returns an empty list if no plans are found. """ - plans = session.query(Plan).filter(Plan.portfolio_id == portfolio_id).all() + plans = ( + session.query(PlanModel).filter(PlanModel.portfolio_id == portfolio_id).all() + ) # Convert the SQLAlchemy objects to dictionaries plans_dict = [row2dict(plan) for plan in plans] if plans else [] @@ -88,7 +100,14 @@ def get_plan_by_portfolio_id(session: Session, portfolio_id: int): return plans_dict -def plot_epc_distribution(df, customer_key, title='Your Units', background_color='white', bar_height=0.4, font_size=15): +def plot_epc_distribution( + df, + customer_key, + title="Your Units", + background_color="white", + bar_height=0.4, + font_size=15, +): """ Plots a horizontal bar chart of EPC rating distribution with adjustable bar thickness and text sizes. Allows setting the plot background color and dynamically adjusts text size and bar spacing. @@ -100,75 +119,113 @@ def plot_epc_distribution(df, customer_key, title='Your Units', background_color :param font_size: Base font size for text annotations (default 15) """ # Calculate dynamic figure size or adjust based on preferences - square_size = max(6, len(df) * 0.6) # Ensure minimum size and adjust based on number of entries + square_size = max( + 6, len(df) * 0.6 + ) # Ensure minimum size and adjust based on number of entries fig, ax = plt.subplots(figsize=(square_size, square_size)) fig.patch.set_facecolor(background_color) # Set figure background color ax.set_facecolor(background_color) # Set axes background color - df['percentage'] = df['percentage'].round(1) # Round the percentage values to 1 decimal place - df_sorted = df.sort_values('percentage', ascending=True) + df["percentage"] = df["percentage"].round( + 1 + ) # Round the percentage values to 1 decimal place + df_sorted = df.sort_values("percentage", ascending=True) # Plot bars with specified height for adjustable thickness - bars = ax.barh(df_sorted['current_epc_rating'], df_sorted['percentage'], - color=df_sorted['current_epc_rating'].map(EPC_COLOURS), edgecolor='none', height=bar_height) + bars = ax.barh( + df_sorted["current_epc_rating"], + df_sorted["percentage"], + color=df_sorted["current_epc_rating"].map(EPC_COLOURS), + edgecolor="none", + height=bar_height, + ) - epc_rating_font_size = font_size * 2 # EPC rating font size larger than base font size - count_percentage_font_size = font_size # Count (percentage) font size as base font size + epc_rating_font_size = ( + font_size * 2 + ) # EPC rating font size larger than base font size + count_percentage_font_size = ( + font_size # Count (percentage) font size as base font size + ) # Annotate bars with EPC ratings inside and count with percentage values outside for index, bar in enumerate(bars): width = bar.get_width() - epc_rating = df_sorted.iloc[index]['current_epc_rating'] - count = df_sorted.iloc[index]['count'] - percentage = df_sorted.iloc[index]['percentage'] + epc_rating = df_sorted.iloc[index]["current_epc_rating"] + count = df_sorted.iloc[index]["count"] + percentage = df_sorted.iloc[index]["percentage"] # EPC rating inside the bar with increased font size - ax.text(width - (width * 0.05), bar.get_y() + bar.get_height() / 2, - f"{epc_rating}", va='center', ha='right', color='white', fontsize=epc_rating_font_size) + ax.text( + width - (width * 0.05), + bar.get_y() + bar.get_height() / 2, + f"{epc_rating}", + va="center", + ha="right", + color="white", + fontsize=epc_rating_font_size, + ) # Count and percentage outside the bar, original font size - ax.text(width + 1, bar.get_y() + bar.get_height() / 2, - f"{count} ({percentage}%)", va='center', color='black', fontsize=count_percentage_font_size) + ax.text( + width + 1, + bar.get_y() + bar.get_height() / 2, + f"{count} ({percentage}%)", + va="center", + color="black", + fontsize=count_percentage_font_size, + ) - ax.set_title(title, fontsize=font_size * 1.2) # Adjust title font size proportionally - ax.tick_params(axis='x', which='both', bottom=False, top=False, - labelbottom=False) # Remove x-axis tick marks and values - ax.tick_params(axis='y', which='both', left=False, right=False, - labelleft=False) # Remove y-axis tick marks and labels - ax.spines['top'].set_visible(False) # Remove top spine - ax.spines['right'].set_visible(False) # Remove right spine - ax.spines['left'].set_visible(False) # Remove left spine - ax.spines['bottom'].set_visible(False) # Remove bottom spine + ax.set_title( + title, fontsize=font_size * 1.2 + ) # Adjust title font size proportionally + ax.tick_params( + axis="x", which="both", bottom=False, top=False, labelbottom=False + ) # Remove x-axis tick marks and values + ax.tick_params( + axis="y", which="both", left=False, right=False, labelleft=False + ) # Remove y-axis tick marks and labels + ax.spines["top"].set_visible(False) # Remove top spine + ax.spines["right"].set_visible(False) # Remove right spine + ax.spines["left"].set_visible(False) # Remove left spine + ax.spines["bottom"].set_visible(False) # Remove bottom spine plt.tight_layout() # Adjust layout plt.show() # Save the figure as an image - figure_path = f'etl/customers/{customer_key}/epc_distribution_plot.png' - fig.savefig(figure_path, bbox_inches='tight') + figure_path = f"etl/customers/{customer_key}/epc_distribution_plot.png" + fig.savefig(figure_path, bbox_inches="tight") plt.close(fig) # Close the figure to free memory return fig, figure_path -def save_plot_to_image(figure, path='plot.png'): +def save_plot_to_image(figure, path="plot.png"): """ Saves a matplotlib figure to an image file for insertion into PowerPoint. """ - figure.savefig(path, bbox_inches='tight') + figure.savefig(path, bbox_inches="tight") plt.close(figure) -def save_figure_as_image(figure, filename='temp_plot.png'): +def save_figure_as_image(figure, filename="temp_plot.png"): """ Saves a matplotlib figure to an image file. """ figure.savefig(filename, dpi=300) - plt.close(figure) # Close the figure to prevent it from displaying in notebooks or Python environments + plt.close( + figure + ) # Close the figure to prevent it from displaying in notebooks or Python environments -def add_commentary_with_bullets(slide, commentary, top_inches, left_inches=Inches(1), width_inches=Inches(8), - height_inches=Inches(2)): +def add_commentary_with_bullets( + slide, + commentary, + top_inches, + left_inches=Inches(1), + width_inches=Inches(8), + height_inches=Inches(2), +): """ Adds commentary with bullet points to a slide. @@ -179,7 +236,9 @@ def add_commentary_with_bullets(slide, commentary, top_inches, left_inches=Inche :param width_inches: The width of the commentary text box. :param height_inches: The height of the commentary text box. """ - txBox = slide.shapes.add_textbox(left_inches, top_inches, width_inches, height_inches) + txBox = slide.shapes.add_textbox( + left_inches, top_inches, width_inches, height_inches + ) tf = txBox.text_frame # Configure text frame @@ -192,7 +251,9 @@ def add_commentary_with_bullets(slide, commentary, top_inches, left_inches=Inche for i, section in enumerate(sections): if i > 0: - p = tf.add_paragraph() # Add a new paragraph for each section after the first + p = ( + tf.add_paragraph() + ) # Add a new paragraph for each section after the first else: p = tf.paragraphs[0] # Use the first paragraph for the first section p.text = section @@ -215,7 +276,9 @@ def add_slide_with_image(prs, title, img_path=None, commentary=None): # Determine the position of the commentary text box based on whether an image is included if img_path: # Add the image - slide.shapes.add_picture(img_path, Inches(1), Inches(1.5), Inches(8), Inches(4.5)) + slide.shapes.add_picture( + img_path, Inches(1), Inches(1.5), Inches(8), Inches(4.5) + ) # Position for commentary when image is present commentary_top = Inches(6) else: @@ -237,16 +300,18 @@ def create_powerpoint(data, save_location): prs = Presentation() for slide, slide_data in data.items(): - slide_figure_path = data[slide].get('image_path') - text = data[slide].get('text') - title = data[slide].get('title', "") + slide_figure_path = data[slide].get("image_path") + text = data[slide].get("text") + title = data[slide].get("title", "") add_slide_with_image(prs, title, slide_figure_path, text) # Save the presentation prs.save(save_location) -def create_recommendations_summary(recommendations_df, properties_df, property_details_df, sap_target): +def create_recommendations_summary( + recommendations_df, properties_df, property_details_df, sap_target +): # Aggregate the impact of the recommendations # We want: # Total number of sap points @@ -254,40 +319,52 @@ def create_recommendations_summary(recommendations_df, properties_df, property_d # total bill savings # total cost # Total Co2 impact - recommendations_summary = recommendations_df.groupby(["property_id"]).agg( - total_sap_points=("sap_points", "sum"), - total_valuation_impact=("property_valuation_increase", "sum"), - total_bill_savings=("energy_cost_savings", "sum"), - total_cost=("estimated_cost", "sum"), - total_carbon=("co2_equivalent_savings", "sum"), - adjusted_heat_demand=("adjusted_heat_demand", "sum") - ).reset_index() + recommendations_summary = ( + recommendations_df.groupby(["property_id"]) + .agg( + total_sap_points=("sap_points", "sum"), + total_valuation_impact=("property_valuation_increase", "sum"), + total_bill_savings=("energy_cost_savings", "sum"), + total_cost=("estimated_cost", "sum"), + total_carbon=("co2_equivalent_savings", "sum"), + adjusted_heat_demand=("adjusted_heat_demand", "sum"), + ) + .reset_index() + ) # Merge on current sap points, current CO2, current adjusted_heat_demand, current annual bill recommendations_summary = recommendations_summary.merge( - properties_df[["id", "uprn", "current_sap_points"]].rename(columns={"id": "property_id"}), on="property_id", - how="left" + properties_df[["id", "uprn", "current_sap_points"]].rename( + columns={"id": "property_id"} + ), + on="property_id", + how="left", ) recommendations_summary["expected_sap_points"] = ( - recommendations_summary["current_sap_points"] + recommendations_summary["total_sap_points"] + recommendations_summary["current_sap_points"] + + recommendations_summary["total_sap_points"] ) - recommendations_summary["expected_epc_rating"] = recommendations_summary["expected_sap_points"].apply( - lambda x: sap_to_epc(x) + recommendations_summary["expected_epc_rating"] = recommendations_summary[ + "expected_sap_points" + ].apply(lambda x: sap_to_epc(x)) + recommendations_summary["sap_difference"] = ( + sap_target - recommendations_summary["expected_sap_points"] ) - recommendations_summary["sap_difference"] = sap_target - recommendations_summary["expected_sap_points"] if property_details_df is not None: recommendations_summary = recommendations_summary.merge( - property_details_df[["uprn", "co2_emissions", "adjusted_energy_consumption", "energy_bill"]].rename( + property_details_df[ + ["uprn", "co2_emissions", "adjusted_energy_consumption", "energy_bill"] + ].rename( columns={ "id": "property_id", "co2_emissions": "current_co2", "adjusted_energy_consumption": "current_energy", - "energy_bill": "current_energy_bill" + "energy_bill": "current_energy_bill", } ), on="uprn", - how="left" + how="left", ) return recommendations_summary diff --git a/sfr/principal_pitch/2_export_data.py b/sfr/principal_pitch/2_export_data.py index a65509d5..d5a81423 100644 --- a/sfr/principal_pitch/2_export_data.py +++ b/sfr/principal_pitch/2_export_data.py @@ -13,7 +13,7 @@ from sqlalchemy.orm import sessionmaker from backend.app.db.connection import db_engine, db_read_session from backend.app.db.models.recommendations import ( Recommendation, - Plan, + PlanModel, PlanRecommendations, RecommendationMaterials, ) @@ -73,12 +73,12 @@ def get_data(portfolio_id, scenario_ids): # -------------------- latest_plans_subq = ( session.query( - Plan.scenario_id, - Plan.property_id, - func.max(Plan.created_at).label("latest_created_at"), + PlanModel.scenario_id, + PlanModel.property_id, + func.max(PlanModel.created_at).label("latest_created_at"), ) - .filter(Plan.scenario_id.in_(scenario_ids)) - .group_by(Plan.scenario_id, Plan.property_id) + .filter(PlanModel.scenario_id.in_(scenario_ids)) + .group_by(PlanModel.scenario_id, PlanModel.property_id) .subquery() ) @@ -87,12 +87,12 @@ def get_data(portfolio_id, scenario_ids): # ).all() plans_query = ( - session.query(Plan) + session.query(PlanModel) .join( latest_plans_subq, - (Plan.scenario_id == latest_plans_subq.c.scenario_id) - & (Plan.property_id == latest_plans_subq.c.property_id) - & (Plan.created_at == latest_plans_subq.c.latest_created_at), + (PlanModel.scenario_id == latest_plans_subq.c.scenario_id) + & (PlanModel.property_id == latest_plans_subq.c.property_id) + & (PlanModel.created_at == latest_plans_subq.c.latest_created_at), ) .all() ) @@ -108,7 +108,7 @@ def get_data(portfolio_id, scenario_ids): # ) plans_data = [ - {col.name: getattr(plan, col.name) for col in Plan.__table__.columns} + {col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns} for plan in plans_query ] @@ -118,12 +118,14 @@ def get_data(portfolio_id, scenario_ids): # Recommendations (NO materials yet) # -------------------- recommendations_query = ( - session.query(Recommendation, Plan.scenario_id, PlanRecommendations.plan_id) + session.query( + Recommendation, PlanModel.scenario_id, PlanRecommendations.plan_id + ) .join( PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id, ) - .join(Plan, Plan.id == PlanRecommendations.plan_id) + .join(PlanModel, PlanModel.id == PlanRecommendations.plan_id) .filter( PlanRecommendations.plan_id.in_(plan_ids), Recommendation.default.is_(True), From 958ab72e0acefcca541559f8608ed3252c21d7eb Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Thu, 12 Feb 2026 12:24:47 +0000 Subject: [PATCH 098/340] deploy to main with new policy --- backend/address2UPRN/main.py | 51 ++++++++++++++++++++++++- backend/postcode_splitter/main.py | 6 +++ infrastructure/terraform/shared/main.tf | 15 ++++++++ utils/s3.py | 1 - 4 files changed, 70 insertions(+), 3 deletions(-) diff --git a/backend/address2UPRN/main.py b/backend/address2UPRN/main.py index 8d1ba21d..0aedd082 100644 --- a/backend/address2UPRN/main.py +++ b/backend/address2UPRN/main.py @@ -10,11 +10,13 @@ from typing import Set import json import requests from uuid import UUID +import uuid from backend.app.db.functions.tasks.Tasks import SubTaskInterface +from utils.s3 import save_csv_to_s3 +from datetime import datetime logger = setup_logger() - EPC_AUTH_TOKEN = os.getenv( "EPC_AUTH_TOKEN", ) @@ -502,6 +504,46 @@ def resolve_uprns_for_postcode_group( ) +def save_results_to_s3( + results_df: pd.DataFrame, task_id: str, sub_task_id: str, bucket_name: str = None +) -> bool: + """ + Save results DataFrame to S3 as CSV. + + :param results_df: The DataFrame containing results + :param task_id: The task ID (used for file naming) + :param bucket_name: The S3 bucket name (defaults to env variable) + :return: True if successful, False otherwise + """ + if bucket_name is None: + bucket_name = os.getenv("S3_BUCKET_NAME") + + if not bucket_name: + logger.error( + "S3 bucket name not provided and S3_BUCKET_NAME environment variable not set" + ) + return False + + try: + # Create a filename with the task ID + file_name = f"{datetime.now().isoformat()}_{str(uuid.uuid4())[:8]}" + file_key = f"ara_raw_outputs/{task_id}/{sub_task_id}/{file_name}.csv" + + # Save to S3 + success = save_csv_to_s3(results_df, bucket_name, file_key) + + if success: + logger.info(f"Successfully saved results to s3://{bucket_name}/{file_key}") + return True + else: + logger.error(f"Failed to save results to S3") + return False + + except Exception as e: + logger.error(f"Error saving results to S3: {str(e)}") + return False + + def test(a, b): assert a == b, f"erorr: {a}{type(a)} != {b}: {type(b)}" @@ -760,7 +802,12 @@ def handler(event, context, local=False): # Create results DataFrame result_df = pd.DataFrame(results_data) - logger.info(f"Created results DataFrame with {len(result_df)} rows") + + # Save results to S3 + try: + save_results_to_s3(result_df, str(task_id), str(subtask_id)) + except Exception as s3_error: + logger.error(f"Failed to save results to S3: {s3_error}") results.append( { diff --git a/backend/postcode_splitter/main.py b/backend/postcode_splitter/main.py index 943435b9..73a79d2c 100644 --- a/backend/postcode_splitter/main.py +++ b/backend/postcode_splitter/main.py @@ -164,6 +164,12 @@ def handler(event, context, local=False): # just do 5 well we are testing, sqs connection if local: df = df.head(5) + + # TODO: DELETE ME, if you see this in the PR. + # TODO: DELETE ME, if you see this in the PR. + # TODO: DELETE ME, if you see this in the PR. + df = df.head(5) + logger.info(f"CSV loaded: {len(df)} rows, {len(df.columns)} columns") # Sanitise postcodes diff --git a/infrastructure/terraform/shared/main.tf b/infrastructure/terraform/shared/main.tf index 5e189dc9..4ec57c3e 100644 --- a/infrastructure/terraform/shared/main.tf +++ b/infrastructure/terraform/shared/main.tf @@ -305,6 +305,21 @@ module "address2uprn_registry" { } +# S3 policy for postcode splitter to read from retrofit data bucket +module "address2uprn_s3_read_and_write" { + source = "../modules/s3_iam_policy" + + policy_name = "Address2UPRNReadandWriteS3" + policy_description = "Allow address2uprn Lambda to read and write from retrofit-data bucket" + bucket_arns = ["arn:aws:s3:::retrofit-data-${var.stage}"] + actions = ["s3:GetObject", "s3:ListBucket", "s3:PutObject"] + resource_paths = ["/*"] +} + +output "postcode_splitter_s3_read_arn" { + value = module.postcode_splitter_s3_read.policy_arn +} + ################################################ # Condition ETL – Lambda ECR ################################################ diff --git a/utils/s3.py b/utils/s3.py index 2e67d4f0..0e79c26b 100644 --- a/utils/s3.py +++ b/utils/s3.py @@ -8,7 +8,6 @@ from botocore.exceptions import NoCredentialsError, PartialCredentialsError logger = setup_logger() - def read_from_s3(bucket_name, s3_file_name): """ Read an object from s3. Decoding of the data is left for outside of this function From d9708fe516b276b931f45f5f4da6251ae3afab22 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Thu, 12 Feb 2026 12:30:28 +0000 Subject: [PATCH 099/340] push policy --- infrastructure/terraform/lambda/address2UPRN/main.tf | 6 ++++++ infrastructure/terraform/shared/main.tf | 4 ++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/infrastructure/terraform/lambda/address2UPRN/main.tf b/infrastructure/terraform/lambda/address2UPRN/main.tf index caf06785..12f0a4b3 100644 --- a/infrastructure/terraform/lambda/address2UPRN/main.tf +++ b/infrastructure/terraform/lambda/address2UPRN/main.tf @@ -44,3 +44,9 @@ module "address2uprn" { }, ) } + +# Attach S3 read policy to the Lambda execution role +resource "aws_iam_role_policy_attachment" "address2uprn_read_and_write" { + role = module.lambda.role_name + policy_arn = data.terraform_remote_state.shared.outputs.address_2_uprn_s3_read_and_write_arn +} \ No newline at end of file diff --git a/infrastructure/terraform/shared/main.tf b/infrastructure/terraform/shared/main.tf index 4ec57c3e..9733f5f9 100644 --- a/infrastructure/terraform/shared/main.tf +++ b/infrastructure/terraform/shared/main.tf @@ -316,8 +316,8 @@ module "address2uprn_s3_read_and_write" { resource_paths = ["/*"] } -output "postcode_splitter_s3_read_arn" { - value = module.postcode_splitter_s3_read.policy_arn +output "address_2_uprn_s3_read_and_write_arn" { + value = module.address2uprn_s3_read_and_write.policy_arn } ################################################ From 7c88e22424a1f4d93c6a6f9c5d56578438e45c3d Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 12 Feb 2026 12:31:37 +0000 Subject: [PATCH 100/340] Define Plan and Scenario domain classes --- backend/app/db/models/portfolio.py | 151 ++++++++++++++++------- backend/app/db/models/recommendations.py | 4 +- backend/domain/plan.py | 30 +++++ backend/domain/scenario.py | 46 +++++++ 4 files changed, 186 insertions(+), 45 deletions(-) create mode 100644 backend/domain/plan.py create mode 100644 backend/domain/scenario.py diff --git a/backend/app/db/models/portfolio.py b/backend/app/db/models/portfolio.py index d151bdc4..54de8dcc 100644 --- a/backend/app/db/models/portfolio.py +++ b/backend/app/db/models/portfolio.py @@ -1,7 +1,17 @@ import enum import pytz import datetime -from sqlalchemy import Column, Integer, Text, Boolean, Float, DateTime, Enum, ForeignKey, CheckConstraint +from sqlalchemy import ( + Column, + Integer, + Text, + Boolean, + Float, + DateTime, + Enum, + ForeignKey, + CheckConstraint, +) from sqlalchemy.ext.declarative import declarative_base from backend.app.db.models.users import UserModel # noqa from backend.app.db.models.materials import MaterialType @@ -31,23 +41,43 @@ class PortfolioGoal(enum.Enum): class Portfolio(Base): - __tablename__ = 'portfolio' + __tablename__ = "portfolio" id = Column(Integer, primary_key=True, autoincrement=True) name = Column(Text, nullable=False) budget = Column(Float) - status = Column(Enum(PortfolioStatus, values_callable=lambda x: [e.value for e in x]), nullable=False) - goal = Column(Enum(PortfolioGoal, values_callable=lambda x: [e.value for e in x]), nullable=False) + status = Column( + Enum(PortfolioStatus, values_callable=lambda x: [e.value for e in x]), + nullable=False, + ) + goal = Column( + Enum(PortfolioGoal, values_callable=lambda x: [e.value for e in x]), + nullable=False, + ) cost = Column(Float) number_of_properties = Column(Integer) - co2_equivalent_savings = Column(Float) # Unit is always tonnes so we don't need to store the unit - energy_savings = Column(Float) # Unit is always kWh so we don't need to store the unit - energy_cost_savings = Column(Float) # Unit is always £ so we don't need to store the unit for the moment - property_valuation_increase = Column(Float) # Unit is always £ so we don't need to store the unit for the moment - rental_yield_increase = Column(Float) # Unit is always £ so we don't need to store the unit for the moment + co2_equivalent_savings = Column( + Float + ) # Unit is always tonnes so we don't need to store the unit + energy_savings = Column( + Float + ) # Unit is always kWh so we don't need to store the unit + energy_cost_savings = Column( + Float + ) # Unit is always £ so we don't need to store the unit for the moment + property_valuation_increase = Column( + Float + ) # Unit is always £ so we don't need to store the unit for the moment + rental_yield_increase = Column( + Float + ) # Unit is always £ so we don't need to store the unit for the moment total_work_hours = Column(Float) labour_days = Column(Float) - created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)) - updated_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)) + created_at = Column( + DateTime, nullable=False, default=datetime.datetime.now(pytz.utc) + ) + updated_at = Column( + DateTime, nullable=False, default=datetime.datetime.now(pytz.utc) + ) # Aggregations for summary epc_breakdown_pre_retrofit = Column(Text) epc_breakdown_post_retrofit = Column(Text) @@ -71,7 +101,7 @@ class PropertyCreationStatus(enum.Enum): ERROR = "ERROR" -class Epc(enum.Enum): +class Epc(enum.Enum): # TODO: Move to domain? A = "A" B = "B" C = "C" @@ -82,20 +112,27 @@ class Epc(enum.Enum): class PropertyModel(Base): - __tablename__ = 'property' + __tablename__ = "property" id = Column(Integer, primary_key=True, autoincrement=True) - portfolio_id = Column(Integer, ForeignKey('portfolio.id'), nullable=False) + portfolio_id = Column(Integer, ForeignKey("portfolio.id"), nullable=False) creation_status = Column(Enum(PropertyCreationStatus), nullable=False) uprn = Column(Integer) landlord_property_id = Column(Text) building_reference_number = Column(Integer) - status = Column(Enum(PortfolioStatus, values_callable=lambda x: [e.value for e in x]), nullable=False) + status = Column( + Enum(PortfolioStatus, values_callable=lambda x: [e.value for e in x]), + nullable=False, + ) address = Column(Text) postcode = Column(Text) has_pre_condition_report = Column(Boolean) has_recommendations = Column(Boolean) - created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)) - updated_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)) + created_at = Column( + DateTime, nullable=False, default=datetime.datetime.now(pytz.utc) + ) + updated_at = Column( + DateTime, nullable=False, default=datetime.datetime.now(pytz.utc) + ) property_type = Column(Text) built_form = Column(Text) local_authority = Column(Text) @@ -127,7 +164,7 @@ rating_lookup = { "Average": FeatureRating.AVERAGE, "Poor": FeatureRating.POOR, "Very Poor": FeatureRating.VERY_POOR, - "N/A": FeatureRating.NA + "N/A": FeatureRating.NA, } @@ -136,32 +173,45 @@ def get_feature_rating_from_string(rating_str: str): class PropertyDetailsEpcModel(Base): - __tablename__ = 'property_details_epc' + __tablename__ = "property_details_epc" id = Column(Integer, primary_key=True, autoincrement=True) - property_id = Column(Integer, ForeignKey('property.id'), nullable=False) - portfolio_id = Column(Integer, ForeignKey('portfolio.id'), nullable=False) + property_id = Column(Integer, ForeignKey("property.id"), nullable=False) + portfolio_id = Column(Integer, ForeignKey("portfolio.id"), nullable=False) full_address = Column(Text) lodgement_date = Column(DateTime) is_expired = Column(Boolean) total_floor_area = Column(Float) walls = Column(Text) - walls_rating = Column(Integer, CheckConstraint('walls_rating>=1 AND walls_rating<=5')) + walls_rating = Column( + Integer, CheckConstraint("walls_rating>=1 AND walls_rating<=5") + ) roof = Column(Text) - roof_rating = Column(Integer, CheckConstraint('roof_rating>=1 AND roof_rating<=5')) + roof_rating = Column(Integer, CheckConstraint("roof_rating>=1 AND roof_rating<=5")) floor = Column(Text) - floor_rating = Column(Integer, CheckConstraint('floor_rating>=1 AND floor_rating<=5')) + floor_rating = Column( + Integer, CheckConstraint("floor_rating>=1 AND floor_rating<=5") + ) windows = Column(Text) - windows_rating = Column(Integer, CheckConstraint('windows_rating>=1 AND windows_rating<=5')) + windows_rating = Column( + Integer, CheckConstraint("windows_rating>=1 AND windows_rating<=5") + ) heating = Column(Text) - heating_rating = Column(Integer, CheckConstraint('heating_rating>=1 AND heating_rating<=5')) + heating_rating = Column( + Integer, CheckConstraint("heating_rating>=1 AND heating_rating<=5") + ) heating_controls = Column(Text) heating_controls_rating = Column( - Integer, CheckConstraint('heating_controls_rating>=1 AND heating_controls_rating<=5') + Integer, + CheckConstraint("heating_controls_rating>=1 AND heating_controls_rating<=5"), ) hot_water = Column(Text) - hot_water_rating = Column(Integer, CheckConstraint('hot_water_rating>=1 AND hot_water_rating<=5')) + hot_water_rating = Column( + Integer, CheckConstraint("hot_water_rating>=1 AND hot_water_rating<=5") + ) lighting = Column(Text) - lighting_rating = Column(Integer, CheckConstraint('lighting_rating>=1 AND lighting_rating<=5')) + lighting_rating = Column( + Integer, CheckConstraint("lighting_rating>=1 AND lighting_rating<=5") + ) mainfuel = Column(Text) ventilation = Column(Text) solar_pv = Column(Text) @@ -219,7 +269,7 @@ class PropertyDetailsSpatial(Base): class PropertyDetailsMeter(Base): - __tablename__ = 'property_details_meter' + __tablename__ = "property_details_meter" id = Column(Integer, primary_key=True, autoincrement=True) uprn = Column(Integer, nullable=False) energy_supplier = Column(Text) @@ -230,11 +280,13 @@ class PropertyDetailsMeter(Base): class PropertyTargetsModel(Base): - __tablename__ = 'property_targets' + __tablename__ = "property_targets" id = Column(Integer, primary_key=True, autoincrement=True) - property_id = Column(Integer, ForeignKey('property.id'), nullable=False) - portfolio_id = Column(Integer, ForeignKey('portfolio.id'), nullable=False) - created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)) + property_id = Column(Integer, ForeignKey("property.id"), nullable=False) + portfolio_id = Column(Integer, ForeignKey("portfolio.id"), nullable=False) + created_at = Column( + DateTime, nullable=False, default=datetime.datetime.now(pytz.utc) + ) epc = Column(Enum(Epc)) heat_demand = Column(Text) @@ -242,23 +294,36 @@ class PropertyTargetsModel(Base): class PortfolioUsers(Base): __tablename__ = "portfolioUsers" id = Column(Integer, primary_key=True, autoincrement=True) - user_id = Column(Integer, ForeignKey('user.id'), nullable=False) - portfolioId = Column(Integer, ForeignKey('portfolio.id'), nullable=False) + user_id = Column(Integer, ForeignKey("user.id"), nullable=False) + portfolioId = Column(Integer, ForeignKey("portfolio.id"), nullable=False) role = Column(Text, nullable=False) - created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)) - updated_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)) + created_at = Column( + DateTime, nullable=False, default=datetime.datetime.now(pytz.utc) + ) + updated_at = Column( + DateTime, nullable=False, default=datetime.datetime.now(pytz.utc) + ) class PropertyInstalledMeasures(Base): """ This model keeps a record of the installed measures for each property, at the UPRN level """ - __tablename__ = 'property_installed_measures' + + __tablename__ = "property_installed_measures" id = Column(Integer, primary_key=True, autoincrement=True) uprn = Column(Integer, nullable=False) measure_type = Column( - Enum(MaterialType, values_callable=lambda x: [e.value for e in x], create_constraint=False), - nullable=False + Enum( + MaterialType, + values_callable=lambda x: [e.value for e in x], + create_constraint=False, + ), + nullable=False, + ) + created_at = Column( + DateTime, nullable=False, default=datetime.datetime.now(pytz.utc) + ) + installed_at = Column( + DateTime, nullable=False, default=datetime.datetime.now(pytz.utc) ) - created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)) - installed_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)) diff --git a/backend/app/db/models/recommendations.py b/backend/app/db/models/recommendations.py index 759c088e..356c0fd7 100644 --- a/backend/app/db/models/recommendations.py +++ b/backend/app/db/models/recommendations.py @@ -66,7 +66,7 @@ class RecommendationMaterials(Base): estimated_cost = Column(Float, nullable=False) -class PlanTypeEnum(enum.Enum): +class PlanTypeEnum(enum.Enum): # TODO: move this to domain? SOLAR_ECO4 = "solar_eco4" SOLAR_HHRSH_ECO4 = "solar_hhrsh_eco4" EMPTY_CAVITY_ECO = "empty_cavity_eco" @@ -93,7 +93,7 @@ class PlanModel(Base): BigInteger, ForeignKey("scenario.id") ) - created_at: Mapped = mapped_column( # type: ignore + created_at: Mapped[datetime] = mapped_column( # type: ignore TIMESTAMP, nullable=False, server_default=func.now() ) diff --git a/backend/domain/plan.py b/backend/domain/plan.py new file mode 100644 index 00000000..b14213c1 --- /dev/null +++ b/backend/domain/plan.py @@ -0,0 +1,30 @@ +from datetime import datetime +from typing import Optional + +from backend.app.db.models.portfolio import Epc +from backend.app.db.models.recommendations import PlanTypeEnum +from backend.domain.scenario import Scenario + + +class Plan: + property_id: int + portfolio_id: int + scenario: Scenario + created_at: datetime + is_default: bool + + valuation_increase_lower_bound: Optional[float] = None + valuation_increase_upper_bound: Optional[float] = None + valuation_increase_average: Optional[float] = None + plan_type: Optional[PlanTypeEnum] = None + post_sap_points: Optional[float] = None + post_epc_rating: Optional[Epc] = None + post_co2_emissions: Optional[float] = None + co2_savings: Optional[float] = None + post_energy_bill: Optional[float] = None + post_energy_consumption: Optional[float] = None + energy_consumption_savings: Optional[float] = None + valuation_post_retrofit: Optional[float] = None + valuation_increase: Optional[float] = None + cost_of_works: Optional[float] = None + contingency_cost: Optional[float] = None diff --git a/backend/domain/scenario.py b/backend/domain/scenario.py new file mode 100644 index 00000000..4a15fc09 --- /dev/null +++ b/backend/domain/scenario.py @@ -0,0 +1,46 @@ +from datetime import datetime +from typing import Optional + + +class Scenario: + name: str + created_at: datetime + housing_type: str + goal: str # TODO: make enum + goal_value: str + trigger_file_path: str + multi_plan: bool + is_default: bool # TODO: isn't this Plan-level? + + budget: Optional[float] = None + already_installed_file_path: Optional[str] = None + patches_file_path: Optional[str] = None + non_invasive_recommendations_file_path: Optional[str] = None + exclusions: Optional[str] = None + + # Previously portfolio-level fields + # TODO: are these needed scenario-level? + cost: Optional[float] = None + contingency: Optional[float] = None + funding: Optional[float] = None + total_work_hours: Optional[float] = None + energy_savings: Optional[float] = None + co2_equivalent_savings: Optional[float] = None + energy_cost_savings: Optional[float] = None + epc_breakdown_pre_retrofit: Optional[int] = None + epc_breakdown_post_retrofit: Optional[int] = None + number_of_properties: Optional[int] = None + n_units_to_retrofit: Optional[int] = None + co2_per_unit_pre_retrofit: Optional[str] = None + co2_per_unit_post_retrofit: Optional[str] = None + energy_bill_per_unit_pre_retrofit: Optional[str] = None + energy_bill_per_unit_post_retrofit: Optional[str] = None + energy_consumption_per_unit_pre_retrofit: Optional[str] = None + energy_consumption_per_unit_post_retrofit: Optional[str] = None + valuation_improvement_per_unit: Optional[str] = None + cost_per_unit: Optional[str] = None + cost_per_co2_saved: Optional[str] = None + cost_per_sap_point: Optional[str] = None + valuation_return_on_ivestment: Optional[str] = None + property_valuation_increase: Optional[float] = None + labour_days: Optional[float] = None From 37c89fb6ef35e6db86440c025b610ddc695c24c1 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Thu, 12 Feb 2026 12:34:58 +0000 Subject: [PATCH 101/340] address2uprn --- infrastructure/terraform/lambda/address2UPRN/main.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/infrastructure/terraform/lambda/address2UPRN/main.tf b/infrastructure/terraform/lambda/address2UPRN/main.tf index 12f0a4b3..a6f56074 100644 --- a/infrastructure/terraform/lambda/address2UPRN/main.tf +++ b/infrastructure/terraform/lambda/address2UPRN/main.tf @@ -47,6 +47,6 @@ module "address2uprn" { # Attach S3 read policy to the Lambda execution role resource "aws_iam_role_policy_attachment" "address2uprn_read_and_write" { - role = module.lambda.role_name + role = module.address2uprn.role_name policy_arn = data.terraform_remote_state.shared.outputs.address_2_uprn_s3_read_and_write_arn } \ No newline at end of file From d7a76821457104071fdf1addd2f0910d0a850fa3 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Thu, 12 Feb 2026 12:40:39 +0000 Subject: [PATCH 102/340] terraform version --- .github/workflows/deploy_terraform.yml | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index da98f4d9..e8e82edf 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -116,7 +116,8 @@ jobs: stage: ${{ needs.determine_stage.outputs.stage }} ecr_repo: address2uprn-${{ needs.determine_stage.outputs.stage }} image_digest: ${{ needs.address2uprn_image.outputs.image_digest }} - terraform_apply: ${{ needs.determine_stage.outputs.terraform_apply }} + # terraform_apply: ${{ needs.determine_stage.outputs.terraform_apply }} + terraform_apply: 'true' secrets: AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} @@ -157,7 +158,8 @@ jobs: stage: ${{ needs.determine_stage.outputs.stage }} ecr_repo: postcode_splitter-${{ needs.determine_stage.outputs.stage }} image_digest: ${{ needs.postcodeSplitter_image.outputs.image_digest }} - terraform_apply: ${{ needs.determine_stage.outputs.terraform_apply }} + # terraform_apply: ${{ needs.determine_stage.outputs.terraform_apply }} + terraform_apply: 'true' secrets: AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} From c31ad577a6945b189484ad2172436eb3f50189d7 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 12 Feb 2026 12:44:18 +0000 Subject: [PATCH 103/340] define class methods to construct domain classes from sqlalchemy models --- backend/domain/plan.py | 9 ++++++++- backend/domain/scenario.py | 7 +++++++ 2 files changed, 15 insertions(+), 1 deletion(-) diff --git a/backend/domain/plan.py b/backend/domain/plan.py index b14213c1..b3411b10 100644 --- a/backend/domain/plan.py +++ b/backend/domain/plan.py @@ -1,8 +1,9 @@ +from __future__ import annotations from datetime import datetime from typing import Optional from backend.app.db.models.portfolio import Epc -from backend.app.db.models.recommendations import PlanTypeEnum +from backend.app.db.models.recommendations import PlanModel, PlanTypeEnum, ScenarioModel from backend.domain.scenario import Scenario @@ -28,3 +29,9 @@ class Plan: valuation_increase: Optional[float] = None cost_of_works: Optional[float] = None contingency_cost: Optional[float] = None + + @classmethod + def from_sqlalchemy( + cls, plan_model: PlanModel, scenario_model: ScenarioModel + ) -> Plan: + raise NotImplementedError diff --git a/backend/domain/scenario.py b/backend/domain/scenario.py index 4a15fc09..f4d639cb 100644 --- a/backend/domain/scenario.py +++ b/backend/domain/scenario.py @@ -1,6 +1,9 @@ +from __future__ import annotations from datetime import datetime from typing import Optional +from backend.app.db.models.recommendations import ScenarioModel + class Scenario: name: str @@ -44,3 +47,7 @@ class Scenario: valuation_return_on_ivestment: Optional[str] = None property_valuation_increase: Optional[float] = None labour_days: Optional[float] = None + + @classmethod + def from_sqlalchemy(cls, scenario_model: ScenarioModel) -> Scenario: + raise NotImplementedError From 80cd44c97a51e40b09642e3a6eae1d1d28e115b0 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 12 Feb 2026 12:44:42 +0000 Subject: [PATCH 104/340] move domain into app directory --- backend/{ => app}/domain/plan.py | 0 backend/{ => app}/domain/scenario.py | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename backend/{ => app}/domain/plan.py (100%) rename backend/{ => app}/domain/scenario.py (100%) diff --git a/backend/domain/plan.py b/backend/app/domain/plan.py similarity index 100% rename from backend/domain/plan.py rename to backend/app/domain/plan.py diff --git a/backend/domain/scenario.py b/backend/app/domain/scenario.py similarity index 100% rename from backend/domain/scenario.py rename to backend/app/domain/scenario.py From a0515ea3bb720b81c0f133b1a1844ea1513f159a Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 12 Feb 2026 12:45:17 +0000 Subject: [PATCH 105/340] correct import path following move of domain --- backend/app/domain/plan.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/app/domain/plan.py b/backend/app/domain/plan.py index b3411b10..3b79d89d 100644 --- a/backend/app/domain/plan.py +++ b/backend/app/domain/plan.py @@ -4,7 +4,7 @@ from typing import Optional from backend.app.db.models.portfolio import Epc from backend.app.db.models.recommendations import PlanModel, PlanTypeEnum, ScenarioModel -from backend.domain.scenario import Scenario +from backend.app.domain.scenario import Scenario class Plan: From 4ddb5592f3b18ba2e295608012922d7d1b037bb2 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 12 Feb 2026 12:58:44 +0000 Subject: [PATCH 106/340] give classes immutable records to protect udpating --- backend/app/domain/classes/plan.py | 46 +++++++++++++++ backend/app/domain/classes/scenario.py | 58 +++++++++++++++++++ .../{plan.py => records/plan_record.py} | 17 ++---- .../scenario_record.py} | 24 +++----- 4 files changed, 118 insertions(+), 27 deletions(-) create mode 100644 backend/app/domain/classes/plan.py create mode 100644 backend/app/domain/classes/scenario.py rename backend/app/domain/{plan.py => records/plan_record.py} (71%) rename backend/app/domain/{scenario.py => records/scenario_record.py} (71%) diff --git a/backend/app/domain/classes/plan.py b/backend/app/domain/classes/plan.py new file mode 100644 index 00000000..401204aa --- /dev/null +++ b/backend/app/domain/classes/plan.py @@ -0,0 +1,46 @@ +from __future__ import annotations +from dataclasses import replace +from typing import Optional + +from backend.app.db.models.recommendations import PlanModel +from backend.app.domain.classes.scenario import Scenario +from backend.app.domain.records.plan_record import PlanRecord + + +class Plan: + def __init__( + self, record: PlanRecord, scenario: Scenario, id: Optional[int] = None + ): + self.id = id + self._record = record + self.scenario = scenario + + @classmethod + def from_sqlalchemy(cls, plan_model: PlanModel, scenario: Scenario) -> Plan: + record = PlanRecord( + property_id=plan_model.property_id, + portfolio_id=plan_model.portfolio_id, + scenario_id=plan_model.scenario_id, + created_at=plan_model.created_at, + is_default=plan_model.is_default, + valuation_increase_lower_bound=plan_model.valuation_increase_lower_bound, + valuation_increase_upper_bound=plan_model.valuation_increase_upper_bound, + valuation_increase_average=plan_model.valuation_increase_average, + plan_type=plan_model.plan_type, + post_sap_points=plan_model.post_sap_points, + post_epc_rating=plan_model.post_epc_rating, + post_co2_emissions=plan_model.post_co2_emissions, + co2_savings=plan_model.co2_savings, + post_energy_bill=plan_model.post_energy_bill, + energy_bill_savings=plan_model.energy_bill_savings, + post_energy_consumption=plan_model.post_energy_consumption, + energy_consumption_savings=plan_model.energy_consumption_savings, + valuation_post_retrofit=plan_model.valuation_post_retrofit, + valuation_increase=plan_model.valuation_increase, + cost_of_works=plan_model.cost_of_works, + contingency_cost=plan_model.contingency_cost, + ) + return cls(record=record, scenario=scenario, id=plan_model.id) + + def set_default(self, value: bool) -> None: + self._record = replace(self._record, is_default=value) diff --git a/backend/app/domain/classes/scenario.py b/backend/app/domain/classes/scenario.py new file mode 100644 index 00000000..657ca1ef --- /dev/null +++ b/backend/app/domain/classes/scenario.py @@ -0,0 +1,58 @@ +from __future__ import annotations +from dataclasses import replace +from typing import Optional + +from backend.app.db.models.recommendations import ScenarioModel +from backend.app.domain.records.scenario_record import ScenarioRecord + + +class Scenario: + def __init__(self, record: ScenarioRecord, id: Optional[int] = None): + self.id = id + self._record = record + + @classmethod + def from_sqlalchemy(cls, scenario_model: ScenarioModel) -> Scenario: + record = ScenarioRecord( + name=scenario_model.name, + created_at=scenario_model.created_at, + housing_type=scenario_model.housing_type, + goal=scenario_model.goal, + goal_value=scenario_model.goal_value, + trigger_file_path=scenario_model.trigger_file_path, + multi_plan=scenario_model.multi_plan, + is_default=scenario_model.is_default, + budget=scenario_model.budget, + already_installed_file_path=scenario_model.already_installed_file_path, + patches_file_path=scenario_model.patches_file_path, + non_invasive_recommendations_file_path=scenario_model.non_invasive_recommendations_file_path, + exclusions=scenario_model.exclusions, + cost=scenario_model.cost, + contingency=scenario_model.contingency, + funding=scenario_model.funding, + total_work_hours=scenario_model.total_work_hours, + energy_savings=scenario_model.energy_savings, + co2_equivalent_savings=scenario_model.co2_equivalent_savings, + energy_cost_savings=scenario_model.energy_cost_savings, + epc_breakdown_pre_retrofit=scenario_model.epc_breakdown_pre_retrofit, + epc_breakdown_post_retrofit=scenario_model.epc_breakdown_post_retrofit, + number_of_properties=scenario_model.number_of_properties, + n_units_to_retrofit=scenario_model.n_units_to_retrofit, + co2_per_unit_pre_retrofit=scenario_model.co2_per_unit_pre_retrofit, + co2_per_unit_post_retrofit=scenario_model.co2_per_unit_post_retrofit, + energy_bill_per_unit_pre_retrofit=scenario_model.energy_bill_per_unit_pre_retrofit, + energy_bill_per_unit_post_retrofit=scenario_model.energy_bill_per_unit_post_retrofit, + energy_consumption_per_unit_pre_retrofit=scenario_model.energy_consumption_per_unit_pre_retrofit, + energy_consumption_per_unit_post_retrofit=scenario_model.energy_consumption_per_unit_post_retrofit, + valuation_improvement_per_unit=scenario_model.valuation_improvement_per_unit, + cost_per_unit=scenario_model.cost_per_unit, + cost_per_co2_saved=scenario_model.cost_per_co2_saved, + cost_per_sap_point=scenario_model.cost_per_sap_point, + valuation_return_on_investment=scenario_model.valuation_return_on_investment, + property_valuation_increase=scenario_model.property_valuation_increase, + labour_days=scenario_model.labour_days, + ) + return cls(record, scenario_model.id) + + def set_default(self, value: bool) -> None: + self._record = replace(self._record, is_default=value) diff --git a/backend/app/domain/plan.py b/backend/app/domain/records/plan_record.py similarity index 71% rename from backend/app/domain/plan.py rename to backend/app/domain/records/plan_record.py index 3b79d89d..dee7cb4b 100644 --- a/backend/app/domain/plan.py +++ b/backend/app/domain/records/plan_record.py @@ -1,16 +1,16 @@ -from __future__ import annotations +from dataclasses import dataclass from datetime import datetime from typing import Optional from backend.app.db.models.portfolio import Epc -from backend.app.db.models.recommendations import PlanModel, PlanTypeEnum, ScenarioModel -from backend.app.domain.scenario import Scenario +from backend.app.db.models.recommendations import PlanTypeEnum -class Plan: +@dataclass(frozen=True) +class PlanRecord: property_id: int portfolio_id: int - scenario: Scenario + scenario_id: Optional[int] created_at: datetime is_default: bool @@ -23,15 +23,10 @@ class Plan: post_co2_emissions: Optional[float] = None co2_savings: Optional[float] = None post_energy_bill: Optional[float] = None + energy_bill_savings: Optional[float] = None post_energy_consumption: Optional[float] = None energy_consumption_savings: Optional[float] = None valuation_post_retrofit: Optional[float] = None valuation_increase: Optional[float] = None cost_of_works: Optional[float] = None contingency_cost: Optional[float] = None - - @classmethod - def from_sqlalchemy( - cls, plan_model: PlanModel, scenario_model: ScenarioModel - ) -> Plan: - raise NotImplementedError diff --git a/backend/app/domain/scenario.py b/backend/app/domain/records/scenario_record.py similarity index 71% rename from backend/app/domain/scenario.py rename to backend/app/domain/records/scenario_record.py index f4d639cb..09367203 100644 --- a/backend/app/domain/scenario.py +++ b/backend/app/domain/records/scenario_record.py @@ -1,28 +1,24 @@ -from __future__ import annotations +from dataclasses import dataclass from datetime import datetime from typing import Optional -from backend.app.db.models.recommendations import ScenarioModel - -class Scenario: +@dataclass(frozen=True) +class ScenarioRecord: name: str created_at: datetime housing_type: str - goal: str # TODO: make enum + goal: str goal_value: str trigger_file_path: str multi_plan: bool - is_default: bool # TODO: isn't this Plan-level? - + is_default: bool budget: Optional[float] = None already_installed_file_path: Optional[str] = None patches_file_path: Optional[str] = None non_invasive_recommendations_file_path: Optional[str] = None exclusions: Optional[str] = None - # Previously portfolio-level fields - # TODO: are these needed scenario-level? cost: Optional[float] = None contingency: Optional[float] = None funding: Optional[float] = None @@ -30,8 +26,8 @@ class Scenario: energy_savings: Optional[float] = None co2_equivalent_savings: Optional[float] = None energy_cost_savings: Optional[float] = None - epc_breakdown_pre_retrofit: Optional[int] = None - epc_breakdown_post_retrofit: Optional[int] = None + epc_breakdown_pre_retrofit: Optional[str] = None + epc_breakdown_post_retrofit: Optional[str] = None number_of_properties: Optional[int] = None n_units_to_retrofit: Optional[int] = None co2_per_unit_pre_retrofit: Optional[str] = None @@ -44,10 +40,6 @@ class Scenario: cost_per_unit: Optional[str] = None cost_per_co2_saved: Optional[str] = None cost_per_sap_point: Optional[str] = None - valuation_return_on_ivestment: Optional[str] = None + valuation_return_on_investment: Optional[str] = None property_valuation_increase: Optional[float] = None labour_days: Optional[float] = None - - @classmethod - def from_sqlalchemy(cls, scenario_model: ScenarioModel) -> Scenario: - raise NotImplementedError From f296a865ff9416d315759ea7416d29e35ad30600 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Thu, 12 Feb 2026 13:04:40 +0000 Subject: [PATCH 107/340] added s3 bucket name --- infrastructure/terraform/lambda/address2UPRN/main.tf | 1 + infrastructure/terraform/lambda/postcodeSplitter/main.tf | 1 + 2 files changed, 2 insertions(+) diff --git a/infrastructure/terraform/lambda/address2UPRN/main.tf b/infrastructure/terraform/lambda/address2UPRN/main.tf index a6f56074..79e2bb2f 100644 --- a/infrastructure/terraform/lambda/address2UPRN/main.tf +++ b/infrastructure/terraform/lambda/address2UPRN/main.tf @@ -41,6 +41,7 @@ module "address2uprn" { DATA_BUCKET = "test" ENGINE_SQS_URL = "test" ENERGY_ASSESSMENTS_BUCKET = "test" + S3_BUCKET_NAME = data.terraform_remote_state.retrofit_sap_data.outputs.bucket_name }, ) } diff --git a/infrastructure/terraform/lambda/postcodeSplitter/main.tf b/infrastructure/terraform/lambda/postcodeSplitter/main.tf index 81120772..78d927d3 100644 --- a/infrastructure/terraform/lambda/postcodeSplitter/main.tf +++ b/infrastructure/terraform/lambda/postcodeSplitter/main.tf @@ -55,6 +55,7 @@ module "lambda" { ENGINE_SQS_URL = "test" ENERGY_ASSESSMENTS_BUCKET = "test" ADDRESS2UPRN_QUEUE_URL = data.terraform_remote_state.address2uprn.outputs.address2uprn_queue_url + S3_BUCKET_NAME = "retrofit-data-dev" # Hardcoded as deployed via serverless i believe }, ) } From 1bf322005c0599067fa2f41aa3707230f3167d7f Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Thu, 12 Feb 2026 13:55:03 +0000 Subject: [PATCH 108/340] added outputs --- infrastructure/terraform/lambda/address2UPRN/main.tf | 2 +- infrastructure/terraform/shared/main.tf | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/infrastructure/terraform/lambda/address2UPRN/main.tf b/infrastructure/terraform/lambda/address2UPRN/main.tf index 79e2bb2f..5f0c4a11 100644 --- a/infrastructure/terraform/lambda/address2UPRN/main.tf +++ b/infrastructure/terraform/lambda/address2UPRN/main.tf @@ -41,7 +41,7 @@ module "address2uprn" { DATA_BUCKET = "test" ENGINE_SQS_URL = "test" ENERGY_ASSESSMENTS_BUCKET = "test" - S3_BUCKET_NAME = data.terraform_remote_state.retrofit_sap_data.outputs.bucket_name + S3_BUCKET_NAME = data.terraform_remote_state.shared.outputs.retrofit_sap_data_bucket_name }, ) } diff --git a/infrastructure/terraform/shared/main.tf b/infrastructure/terraform/shared/main.tf index 9733f5f9..eb2a679d 100644 --- a/infrastructure/terraform/shared/main.tf +++ b/infrastructure/terraform/shared/main.tf @@ -133,6 +133,11 @@ module "retrofit_sap_data" { allowed_origins = var.allowed_origins } +output "retrofit_sap_data_bucket_name" { + value = module.retrofit_sap_data.bucket_name + description = "Name of the retrofit SAP data bucket" +} + module "retrofit_carbon_predictions" { source = "../modules/s3" bucketname = "retrofit-carbon-predictions-${var.stage}" From f955184260fd978449465695810ef6fc44799b3e Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 12 Feb 2026 14:25:35 +0000 Subject: [PATCH 109/340] refactor processor --- .../db/functions/recommendations_functions.py | 2 +- backend/app/domain/classes/plan.py | 11 +-- .../categorisation/categorisation_logic.py | 6 +- backend/categorisation/processor.py | 71 +++++++++++++------ 4 files changed, 61 insertions(+), 29 deletions(-) diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py index 5ff91909..1864a330 100644 --- a/backend/app/db/functions/recommendations_functions.py +++ b/backend/app/db/functions/recommendations_functions.py @@ -621,7 +621,7 @@ def get_plans_by_portfolio_id(portfolio_id: int) -> List[PlanModel]: raise NotImplementedError -def get_scenario(scenario_id: int) -> List[ScenarioModel]: +def get_scenario(scenario_id: int) -> ScenarioModel: raise NotImplementedError diff --git a/backend/app/domain/classes/plan.py b/backend/app/domain/classes/plan.py index 401204aa..3540c603 100644 --- a/backend/app/domain/classes/plan.py +++ b/backend/app/domain/classes/plan.py @@ -11,12 +11,15 @@ class Plan: def __init__( self, record: PlanRecord, scenario: Scenario, id: Optional[int] = None ): - self.id = id - self._record = record - self.scenario = scenario + self.id: Optional[int] = id + self.record: PlanRecord = record + self.scenario: Scenario = scenario @classmethod def from_sqlalchemy(cls, plan_model: PlanModel, scenario: Scenario) -> Plan: + if not scenario: + raise ValueError(f"No Scenario associated with Plan of ID {plan_model.id}") + record = PlanRecord( property_id=plan_model.property_id, portfolio_id=plan_model.portfolio_id, @@ -43,4 +46,4 @@ class Plan: return cls(record=record, scenario=scenario, id=plan_model.id) def set_default(self, value: bool) -> None: - self._record = replace(self._record, is_default=value) + self.record = replace(self.record, is_default=value) diff --git a/backend/categorisation/categorisation_logic.py b/backend/categorisation/categorisation_logic.py index f9503e50..2f540a55 100644 --- a/backend/categorisation/categorisation_logic.py +++ b/backend/categorisation/categorisation_logic.py @@ -1,12 +1,12 @@ from typing import List -from backend.app.db.models.recommendations import PlanModel +from backend.app.domain.classes.plan import Plan class CategorisationLogic: @staticmethod - def get_compliant_plans(plans: List[PlanModel]) -> List[PlanModel]: + def get_compliant_plans(plans: List[Plan]) -> List[Plan]: raise NotImplementedError @staticmethod - def get_cheapest_plan(plans: List[PlanModel]) -> PlanModel: + def get_cheapest_plan(plans: List[Plan]) -> Plan: raise NotImplementedError diff --git a/backend/categorisation/processor.py b/backend/categorisation/processor.py index 53d7846c..55a1a1c6 100644 --- a/backend/categorisation/processor.py +++ b/backend/categorisation/processor.py @@ -1,35 +1,64 @@ +from collections import defaultdict from typing import List from backend.app.db.functions.recommendations_functions import ( get_plans_by_portfolio_id, - get_property_ids, + get_scenario, set_plan_default, ) -from backend.app.db.models.recommendations import PlanModel +from backend.app.domain.classes.plan import Plan from backend.categorisation.categorisation_logic import CategorisationLogic +from utils.logger import setup_logger + +logger = setup_logger() def process_portfolio(portfolio_id: int) -> None: - # Get all plans (including scenarios) for all properties in the portfolio - plans: List[PlanModel] = get_plans_by_portfolio_id(portfolio_id) + plans = _load_plans_for_portfolio(portfolio_id) + plans_by_property = _group_plans_by_property(plans) - # For each property, get all compliant plans - property_ids: List[int] = get_property_ids(portfolio_id) + for property_plans in plans_by_property.values(): + cheapest_plan = _choose_cheapest_relevant_plan(property_plans) + _update_default_flags(property_plans, cheapest_plan) - # For each property, find the cheapest compliant plan - for id in property_ids: - plans_for_property: List[PlanModel] = [ - plan for plan in plans if plan.property_id == id - ] - compliant_plans_for_property: List[PlanModel] = ( - CategorisationLogic.get_compliant_plans(plans_for_property) +def _load_plans_for_portfolio(portfolio_id: int) -> List[Plan]: + plan_models = get_plans_by_portfolio_id(portfolio_id) + plans: List[Plan] = [] + + for model in plan_models: + if not model.scenario_id: + logger.info(f"No Scenario associated with Plan of ID {model.id}") + continue + + scenario_model = get_scenario(model.scenario_id) + plans.append(Plan.from_sqlalchemy(model, scenario_model)) + + return plans + + +def _group_plans_by_property(plans: List[Plan]) -> dict[int, List[Plan]]: + grouped: dict[int, List[Plan]] = defaultdict(list) + + for plan in plans: + grouped[plan.record.property_id].append(plan) + + return grouped + + +def _choose_cheapest_relevant_plan(plans: List[Plan]) -> Plan: + compliant_plans = CategorisationLogic.get_compliant_plans(plans) + + plans_to_consider = compliant_plans or plans + return CategorisationLogic.get_cheapest_plan(plans_to_consider) + + +def _update_default_flags(plans: List[Plan], cheapest_plan: Plan) -> None: + for plan in plans: + if plan.id is None: + raise ValueError("Cannot update Plan with missing ID") + + set_plan_default( + plan.id, + plan.id == cheapest_plan.id, ) - - # Choose cheapest compliant plan, or fallback to cheapest overall plan - plans_to_consider = compliant_plans_for_property or plans_for_property - cheapest_plan = CategorisationLogic.get_cheapest_plan(plans_to_consider) - - # Update DB: set is_default = True for cheapest plan, False for others - for plan in plans_for_property: - set_plan_default(plan.id, plan.id == cheapest_plan.id) From 3761d0bbe76d072ca0b797df303c2c46982c6510 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 12 Feb 2026 14:32:48 +0000 Subject: [PATCH 110/340] fix pylance problem in logger --- utils/logger.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/utils/logger.py b/utils/logger.py index d643f36a..45370d3d 100644 --- a/utils/logger.py +++ b/utils/logger.py @@ -1,7 +1,13 @@ import logging +from os import PathLike +from typing import Optional, Union -def setup_logger(log_file=None, level=logging.INFO, overwrite_handler=False): +def setup_logger( + log_file: Optional[Union[str, PathLike[str]]] = None, + level: int = logging.INFO, + overwrite_handler: bool = False, +) -> logging.Logger: # Create a logger and set the logging level logger = logging.getLogger() logger.setLevel(level) From 3bdd4a4a97efc87fc24eeded8e6f3a2f58cf70f6 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Thu, 12 Feb 2026 15:03:38 +0000 Subject: [PATCH 111/340] test first with just 5 --- .devcontainer/backend/Dockerfile | 2 + .devcontainer/backend/devcontainer.json | 3 +- backend/address2UPRN/main.py | 52 ++++++++----------------- 3 files changed, 20 insertions(+), 37 deletions(-) diff --git a/.devcontainer/backend/Dockerfile b/.devcontainer/backend/Dockerfile index 99cd66d6..f48fb99f 100644 --- a/.devcontainer/backend/Dockerfile +++ b/.devcontainer/backend/Dockerfile @@ -3,6 +3,8 @@ FROM python:3.11.10-bullseye ARG USER=vscode ARG DEBIAN_FRONTEND=noninteractive +ARG DOCKER_GID=1003 + # 1) Toolchain + utilities for building libpostal RUN apt-get update && apt-get install -y --no-install-recommends \ diff --git a/.devcontainer/backend/devcontainer.json b/.devcontainer/backend/devcontainer.json index 6e2edc93..73348c4d 100644 --- a/.devcontainer/backend/devcontainer.json +++ b/.devcontainer/backend/devcontainer.json @@ -6,7 +6,8 @@ "workspaceFolder": "/workspaces/model", "postStartCommand": "bash .devcontainer/backend/post-install.sh", "mounts": [ - "source=${localEnv:HOME},target=/home/vscode,type=bind" + "source=${localEnv:HOME},target=/home/vscode,type=bind", + "source=/var/run/docker.sock,target=/var/run/docker.sock,type=bind" ], "customizations": { "vscode": { diff --git a/backend/address2UPRN/main.py b/backend/address2UPRN/main.py index 0aedd082..e635b305 100644 --- a/backend/address2UPRN/main.py +++ b/backend/address2UPRN/main.py @@ -329,9 +329,6 @@ def get_uprn_candidates( def get_uprn_with_epc_df( user_inputed_address: str, epc_df: pd.DataFrame, - return_address=False, - return_EPC=False, - return_score=True, ): """ Return uprn (str) using a pre-fetched EPC dataframe. @@ -371,8 +368,6 @@ def get_uprn_with_epc_df( return None address = top_rank_df["address"].values[0] - lexiscore = float(top_rank_df["lexiscore"].values[0]) - epc = top_rank_df["current-energy-efficiency"].values[0] score = float(top_rank_df["lexiscore"].values[0]) # logger.info(f"Address found to be: {address}, with lexiscore {lexiscore}") @@ -382,20 +377,7 @@ def get_uprn_with_epc_df( if found_uprn == "": return None - if return_address: - if return_EPC is False: - return found_uprn, address - else: - if return_score is False: - return found_uprn, address, epc - else: - return ( - found_uprn, - address, - epc, - score, - ) - return found_uprn + return (found_uprn, address, score) def get_uprn( @@ -688,7 +670,11 @@ def handler(event, context, local=False): # Create user_input column by concatenating Address 1 and Address 2 df["user_input"] = ( - df["Address 1"].fillna("") + " " + df["Address 2"].fillna("") + df["Address 1"].fillna("") + + " " + + df["Address 2"].fillna("") + + " " + + df["Address 3"].fillna("") ).str.strip() logger.info(f"Created user_input column from Address 1 and Address 2") @@ -743,14 +729,11 @@ def handler(event, context, local=False): result = get_uprn_with_epc_df( user_inputed_address=user_input, epc_df=epc_df, - return_address=True, - return_EPC=True, - return_score=True, ) # Parse result tuple if successful if result: - uprn, found_address, epc, score = result + uprn, found_address, score = result uprns_found += 1 logger.info( f"Found UPRN for {user_input} in {postcode}: {uprn} (score: {score})" @@ -759,10 +742,9 @@ def handler(event, context, local=False): results_data.append( { **row, # Include all original data - "found_uprn": uprn, - "found_address": found_address, - "epc_rating": epc, - "lexiscore": score, + "uprn": uprn, + "domna_found_address": found_address, + "domna_lexiscore": score, } ) else: @@ -772,10 +754,9 @@ def handler(event, context, local=False): results_data.append( { **row, # Include all original data - "found_uprn": None, - "found_address": None, - "epc_rating": None, - "lexiscore": None, + "uprn": None, + "domna_found_address": None, + "domna_lexiscore": None, } ) @@ -789,10 +770,9 @@ def handler(event, context, local=False): results_data.append( { **row, - "found_uprn": None, - "found_address": None, - "epc_rating": None, - "score": None, + "uprn": None, + "domna_found_address": None, + "domna_lexiscore": None, "error": str(e), } ) From 70fd417c4a5d4a4e886cbf2b720379e7c195dc8f Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 12 Feb 2026 15:04:02 +0000 Subject: [PATCH 112/340] =?UTF-8?q?Check=20whether=20plan=20with=20EPC=20g?= =?UTF-8?q?oal=20is=20compliant=20=F0=9F=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/app/domain/classes/plan.py | 5 +- backend/app/domain/records/plan_record.py | 1 - .../tests/test_plan_is_compliant.py | 73 +++++++++++++++++++ pytest.ini | 2 +- 4 files changed, 78 insertions(+), 3 deletions(-) create mode 100644 backend/categorisation/tests/test_plan_is_compliant.py diff --git a/backend/app/domain/classes/plan.py b/backend/app/domain/classes/plan.py index 3540c603..76aba958 100644 --- a/backend/app/domain/classes/plan.py +++ b/backend/app/domain/classes/plan.py @@ -23,7 +23,6 @@ class Plan: record = PlanRecord( property_id=plan_model.property_id, portfolio_id=plan_model.portfolio_id, - scenario_id=plan_model.scenario_id, created_at=plan_model.created_at, is_default=plan_model.is_default, valuation_increase_lower_bound=plan_model.valuation_increase_lower_bound, @@ -45,5 +44,9 @@ class Plan: ) return cls(record=record, scenario=scenario, id=plan_model.id) + @property + def is_compliant(self) -> bool: + raise NotImplementedError + def set_default(self, value: bool) -> None: self.record = replace(self.record, is_default=value) diff --git a/backend/app/domain/records/plan_record.py b/backend/app/domain/records/plan_record.py index dee7cb4b..2df7a7c6 100644 --- a/backend/app/domain/records/plan_record.py +++ b/backend/app/domain/records/plan_record.py @@ -10,7 +10,6 @@ from backend.app.db.models.recommendations import PlanTypeEnum class PlanRecord: property_id: int portfolio_id: int - scenario_id: Optional[int] created_at: datetime is_default: bool diff --git a/backend/categorisation/tests/test_plan_is_compliant.py b/backend/categorisation/tests/test_plan_is_compliant.py new file mode 100644 index 00000000..41fb1b85 --- /dev/null +++ b/backend/categorisation/tests/test_plan_is_compliant.py @@ -0,0 +1,73 @@ +from typing import Callable +import pytest +from datetime import datetime + +from backend.app.domain.classes.plan import Plan +from backend.app.domain.classes.scenario import Scenario +from backend.app.domain.records.plan_record import PlanRecord +from backend.app.domain.records.scenario_record import ScenarioRecord +from backend.app.db.models.portfolio import Epc + + +@pytest.fixture +def created_at_datetime() -> datetime: + return datetime.now() + + +@pytest.fixture +def epc_c_scenario(created_at_datetime: datetime) -> "Scenario": + # arrange + scenario_record = ScenarioRecord( + name="EPC C", + created_at=created_at_datetime, + housing_type="", + goal="EPC", + goal_value="C", + trigger_file_path="", + multi_plan=False, + is_default=False, + ) + return Scenario(record=scenario_record, id=1) + + +@pytest.fixture +def plan_factory( + epc_c_scenario: "Scenario", created_at_datetime: datetime +) -> Callable[[int, "Epc"], "Plan"]: + # returns a function to create plans with different attributes + def _create_plan(post_sap_points: int, post_epc_rating: "Epc") -> "Plan": + plan_record = PlanRecord( + property_id=1, + portfolio_id=1, + created_at=created_at_datetime, + is_default=False, + post_sap_points=post_sap_points, + post_epc_rating=post_epc_rating, + ) + return Plan(record=plan_record, scenario=epc_c_scenario, id=1) + + return _create_plan + + +@pytest.mark.parametrize( + "post_sap_points, post_epc_rating, expected_compliance", + [ + (75, Epc.C, True), + (100, Epc.A, True), + (60, Epc.D, False), + ], +) +def test_scenario_goal_is_epc_c( + plan_factory: Callable[[int, "Epc"], "Plan"], + post_sap_points: int, + post_epc_rating: "Epc", + expected_compliance: bool, +) -> None: + # arrange + plan = plan_factory(post_sap_points, post_epc_rating) + + # act + actual_compliance: bool = plan.is_compliant + + # assert + assert actual_compliance == expected_compliance diff --git a/pytest.ini b/pytest.ini index ee203d46..9c9f8234 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,4 +1,4 @@ [pytest] pythonpath = . addopts = --cov-report term-missing --cov=etl/epc --cov=recommendations --cov=backend --cov=etl/epc_clean --cov=etl/spatial -testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests backend/address2UPRN/tests backend/onboarders/tests +testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests backend/address2UPRN/tests backend/onboarders/tests backend/categorisation/tests From c2f29e86dfd5658dd6979b4da0b91a541814ff00 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Thu, 12 Feb 2026 15:11:20 +0000 Subject: [PATCH 113/340] made tests pass and redploy --- .github/workflows/deploy_terraform.yml | 3 +++ backend/address2UPRN/main.py | 17 ++++++++--------- backend/postcode_splitter/main.py | 2 +- 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index e8e82edf..90595632 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -204,3 +204,6 @@ jobs: AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} AWS_REGION: ${{ secrets.DEV_AWS_REGION }} + + + diff --git a/backend/address2UPRN/main.py b/backend/address2UPRN/main.py index e635b305..f4aa0dc9 100644 --- a/backend/address2UPRN/main.py +++ b/backend/address2UPRN/main.py @@ -329,6 +329,7 @@ def get_uprn_candidates( def get_uprn_with_epc_df( user_inputed_address: str, epc_df: pd.DataFrame, + verbose=False, ): """ Return uprn (str) using a pre-fetched EPC dataframe. @@ -377,15 +378,16 @@ def get_uprn_with_epc_df( if found_uprn == "": return None - return (found_uprn, address, score) + if verbose: + return (found_uprn, address, score) + else: + return found_uprn def get_uprn( user_inputed_address: str, postcode: str, - return_address=False, - return_EPC=False, - return_score=True, + verbose=False, ): """ Return uprn (str) @@ -400,9 +402,7 @@ def get_uprn( return get_uprn_with_epc_df( user_inputed_address=user_inputed_address, epc_df=df, - return_address=return_address, - return_EPC=return_EPC, - return_score=return_score, + verbose=verbose, ) @@ -727,8 +727,7 @@ def handler(event, context, local=False): # Get UPRN using the pre-fetched EPC data with all return options result = get_uprn_with_epc_df( - user_inputed_address=user_input, - epc_df=epc_df, + user_inputed_address=user_input, epc_df=epc_df, verbose=True ) # Parse result tuple if successful diff --git a/backend/postcode_splitter/main.py b/backend/postcode_splitter/main.py index 73a79d2c..8c0048e2 100644 --- a/backend/postcode_splitter/main.py +++ b/backend/postcode_splitter/main.py @@ -168,7 +168,7 @@ def handler(event, context, local=False): # TODO: DELETE ME, if you see this in the PR. # TODO: DELETE ME, if you see this in the PR. # TODO: DELETE ME, if you see this in the PR. - df = df.head(5) + df = df.head(1983) logger.info(f"CSV loaded: {len(df)} rows, {len(df.columns)} columns") From c4e30a0d561db675a368eb9f2778953803475a6c Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Thu, 12 Feb 2026 15:11:36 +0000 Subject: [PATCH 114/340] made tests pass and redploy --- backend/postcode_splitter/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/postcode_splitter/main.py b/backend/postcode_splitter/main.py index 8c0048e2..73a79d2c 100644 --- a/backend/postcode_splitter/main.py +++ b/backend/postcode_splitter/main.py @@ -168,7 +168,7 @@ def handler(event, context, local=False): # TODO: DELETE ME, if you see this in the PR. # TODO: DELETE ME, if you see this in the PR. # TODO: DELETE ME, if you see this in the PR. - df = df.head(1983) + df = df.head(5) logger.info(f"CSV loaded: {len(df)} rows, {len(df.columns)} columns") From 1c2b1422fe89f25784dfd523c7f1096e996dafcd Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Thu, 12 Feb 2026 15:24:38 +0000 Subject: [PATCH 115/340] running 1983 --- backend/postcode_splitter/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/postcode_splitter/main.py b/backend/postcode_splitter/main.py index 73a79d2c..8c0048e2 100644 --- a/backend/postcode_splitter/main.py +++ b/backend/postcode_splitter/main.py @@ -168,7 +168,7 @@ def handler(event, context, local=False): # TODO: DELETE ME, if you see this in the PR. # TODO: DELETE ME, if you see this in the PR. # TODO: DELETE ME, if you see this in the PR. - df = df.head(5) + df = df.head(1983) logger.info(f"CSV loaded: {len(df)} rows, {len(df.columns)} columns") From 5dc9cea564517844b29b6a11687ea0a478a6d182 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Thu, 12 Feb 2026 15:25:49 +0000 Subject: [PATCH 116/340] running 1983 --- .github/workflows/deploy_fastapi_backend.yml | 1 + .github/workflows/deploy_terraform.yml | 3 +++ 2 files changed, 4 insertions(+) diff --git a/.github/workflows/deploy_fastapi_backend.yml b/.github/workflows/deploy_fastapi_backend.yml index 32e30bfa..b60fa1d1 100644 --- a/.github/workflows/deploy_fastapi_backend.yml +++ b/.github/workflows/deploy_fastapi_backend.yml @@ -135,3 +135,4 @@ jobs: # Deploy to AWS Lambda via Serverless sls deploy --stage ${{ github.ref_name }} --verbose + diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index 90595632..834a60c2 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -207,3 +207,6 @@ jobs: + + + From 080000123f8f5445f49bb18b9a1aa4fc1394fa5a Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 12 Feb 2026 15:40:03 +0000 Subject: [PATCH 117/340] =?UTF-8?q?cater=20for=20goal=5Fvalue=20being=20NU?= =?UTF-8?q?LL=20=F0=9F=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/app/db/models/portfolio.py | 2 +- backend/app/db/models/recommendations.py | 6 +- backend/app/domain/classes/plan.py | 10 +++ backend/app/domain/classes/scenario.py | 4 +- backend/app/domain/records/scenario_record.py | 6 +- .../tests/test_plan_is_compliant.py | 63 ++++++++++--------- 6 files changed, 54 insertions(+), 37 deletions(-) diff --git a/backend/app/db/models/portfolio.py b/backend/app/db/models/portfolio.py index 54de8dcc..f6a99a97 100644 --- a/backend/app/db/models/portfolio.py +++ b/backend/app/db/models/portfolio.py @@ -32,7 +32,7 @@ class PortfolioStatus(enum.Enum): NEEDS_REVIEW = "needs review" -class PortfolioGoal(enum.Enum): +class PortfolioGoal(enum.Enum): # TODO: Move to domain? VALUATION_IMPROVEMENT = "Valuation Improvement" INCREASING_EPC = "Increasing EPC" REDUCING_CO2_EMISSIONS = "Reducing CO2 emissions" diff --git a/backend/app/db/models/recommendations.py b/backend/app/db/models/recommendations.py index 356c0fd7..82032d35 100644 --- a/backend/app/db/models/recommendations.py +++ b/backend/app/db/models/recommendations.py @@ -13,7 +13,7 @@ from sqlalchemy.orm import declarative_base, Mapped, mapped_column from sqlalchemy.sql import func from datetime import datetime -from backend.app.db.models.portfolio import Portfolio, PropertyModel +from backend.app.db.models.portfolio import Portfolio, PortfolioGoal, PropertyModel from backend.app.db.models.materials import Material from backend.app.db.models.portfolio import Epc from datatypes.enums import QuantityUnits @@ -152,8 +152,8 @@ class ScenarioModel(Base): BigInteger, ForeignKey(Portfolio.id), nullable=False ) housing_type: Mapped[str] = mapped_column(String, nullable=False) - goal: Mapped[str] = mapped_column(String, nullable=False) - goal_value: Mapped[str] = mapped_column(String, nullable=False) + goal: Mapped[PortfolioGoal] = mapped_column(Enum(PortfolioGoal), nullable=False) + goal_value: Mapped[Optional[str]] = mapped_column(String, nullable=False) trigger_file_path: Mapped[str] = mapped_column(String, nullable=False) already_installed_file_path: Mapped[Optional[str]] = mapped_column(String) patches_file_path: Mapped[Optional[str]] = mapped_column(String) diff --git a/backend/app/domain/classes/plan.py b/backend/app/domain/classes/plan.py index 76aba958..b44543a6 100644 --- a/backend/app/domain/classes/plan.py +++ b/backend/app/domain/classes/plan.py @@ -2,6 +2,7 @@ from __future__ import annotations from dataclasses import replace from typing import Optional +from backend.app.db.models.portfolio import PortfolioGoal from backend.app.db.models.recommendations import PlanModel from backend.app.domain.classes.scenario import Scenario from backend.app.domain.records.plan_record import PlanRecord @@ -48,5 +49,14 @@ class Plan: def is_compliant(self) -> bool: raise NotImplementedError + goal: PortfolioGoal = self.scenario.record.goal + goal_value: str = self.scenario.record.goal_value + + match goal: + case PortfolioGoal.INCREASING_EPC: + return True + case _: + raise NotImplementedError + def set_default(self, value: bool) -> None: self.record = replace(self.record, is_default=value) diff --git a/backend/app/domain/classes/scenario.py b/backend/app/domain/classes/scenario.py index 657ca1ef..3c22657e 100644 --- a/backend/app/domain/classes/scenario.py +++ b/backend/app/domain/classes/scenario.py @@ -9,7 +9,7 @@ from backend.app.domain.records.scenario_record import ScenarioRecord class Scenario: def __init__(self, record: ScenarioRecord, id: Optional[int] = None): self.id = id - self._record = record + self.record = record @classmethod def from_sqlalchemy(cls, scenario_model: ScenarioModel) -> Scenario: @@ -55,4 +55,4 @@ class Scenario: return cls(record, scenario_model.id) def set_default(self, value: bool) -> None: - self._record = replace(self._record, is_default=value) + self.record = replace(self.record, is_default=value) diff --git a/backend/app/domain/records/scenario_record.py b/backend/app/domain/records/scenario_record.py index 09367203..48ddf0ca 100644 --- a/backend/app/domain/records/scenario_record.py +++ b/backend/app/domain/records/scenario_record.py @@ -2,14 +2,15 @@ from dataclasses import dataclass from datetime import datetime from typing import Optional +from backend.app.db.models.portfolio import PortfolioGoal + @dataclass(frozen=True) class ScenarioRecord: name: str created_at: datetime housing_type: str - goal: str - goal_value: str + goal: PortfolioGoal trigger_file_path: str multi_plan: bool is_default: bool @@ -19,6 +20,7 @@ class ScenarioRecord: non_invasive_recommendations_file_path: Optional[str] = None exclusions: Optional[str] = None + goal_value: Optional[str] = None cost: Optional[float] = None contingency: Optional[float] = None funding: Optional[float] = None diff --git a/backend/categorisation/tests/test_plan_is_compliant.py b/backend/categorisation/tests/test_plan_is_compliant.py index 41fb1b85..c0f7add0 100644 --- a/backend/categorisation/tests/test_plan_is_compliant.py +++ b/backend/categorisation/tests/test_plan_is_compliant.py @@ -1,4 +1,4 @@ -from typing import Callable +from typing import Callable, Optional import pytest from datetime import datetime @@ -6,7 +6,7 @@ from backend.app.domain.classes.plan import Plan from backend.app.domain.classes.scenario import Scenario from backend.app.domain.records.plan_record import PlanRecord from backend.app.domain.records.scenario_record import ScenarioRecord -from backend.app.db.models.portfolio import Epc +from backend.app.db.models.portfolio import Epc, PortfolioGoal @pytest.fixture @@ -14,28 +14,17 @@ def created_at_datetime() -> datetime: return datetime.now() -@pytest.fixture -def epc_c_scenario(created_at_datetime: datetime) -> "Scenario": - # arrange - scenario_record = ScenarioRecord( - name="EPC C", - created_at=created_at_datetime, - housing_type="", - goal="EPC", - goal_value="C", - trigger_file_path="", - multi_plan=False, - is_default=False, - ) - return Scenario(record=scenario_record, id=1) - - @pytest.fixture def plan_factory( - epc_c_scenario: "Scenario", created_at_datetime: datetime -) -> Callable[[int, "Epc"], "Plan"]: - # returns a function to create plans with different attributes - def _create_plan(post_sap_points: int, post_epc_rating: "Epc") -> "Plan": + created_at_datetime: datetime, +) -> Callable[[int, "Epc", "Scenario"], "Plan"]: + """ + Returns a factory function to create plans with different attributes and scenarios. + """ + + def _create_plan( + post_sap_points: int, post_epc_rating: "Epc", scenario: "Scenario" + ) -> "Plan": plan_record = PlanRecord( property_id=1, portfolio_id=1, @@ -44,27 +33,43 @@ def plan_factory( post_sap_points=post_sap_points, post_epc_rating=post_epc_rating, ) - return Plan(record=plan_record, scenario=epc_c_scenario, id=1) + return Plan(record=plan_record, scenario=scenario, id=1) return _create_plan @pytest.mark.parametrize( - "post_sap_points, post_epc_rating, expected_compliance", + "scenario_name, goal_value, post_sap_points, post_epc_rating, expected_compliance", [ - (75, Epc.C, True), - (100, Epc.A, True), - (60, Epc.D, False), + ("EPC C", "C", 75, Epc.C, True), + ("EPC A", "A", 100, Epc.A, True), + ("EPC D", "D", 60, Epc.D, False), + ("Achieve EPC B", None, 100, Epc.A, True), + ("Achieve EPC B", None, 60, Epc.D, False), ], ) def test_scenario_goal_is_epc_c( - plan_factory: Callable[[int, "Epc"], "Plan"], + plan_factory: Callable[[int, "Epc", "Scenario"], "Plan"], + scenario_name: str, + goal_value: Optional[str], post_sap_points: int, post_epc_rating: "Epc", expected_compliance: bool, ) -> None: # arrange - plan = plan_factory(post_sap_points, post_epc_rating) + scenario_record = ScenarioRecord( + name=scenario_name, + created_at=datetime.now(), + housing_type="", + goal=PortfolioGoal.INCREASING_EPC, + goal_value=goal_value, + trigger_file_path="", + multi_plan=False, + is_default=False, + ) + scenario = Scenario(record=scenario_record, id=1) + + plan = plan_factory(post_sap_points, post_epc_rating, scenario) # act actual_compliance: bool = plan.is_compliant From 04cc6468dd18307586e4dde0c6c4ce48e6959d4d Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Thu, 12 Feb 2026 15:44:36 +0000 Subject: [PATCH 118/340] save --- .github/workflows/_deploy_lambda.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/_deploy_lambda.yml b/.github/workflows/_deploy_lambda.yml index b8731446..b2f2ce49 100644 --- a/.github/workflows/_deploy_lambda.yml +++ b/.github/workflows/_deploy_lambda.yml @@ -112,3 +112,5 @@ jobs: -var="lambda_name=${{ inputs.lambda_name }}" \ -var="ecr_repo_url=${{ steps.repo.outputs.ecr_repo_url }}" \ -var="image_digest=${{ inputs.image_digest }}" + + From 4325bdf9900b3abc4e1d8f17c572f181136e18c8 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Thu, 12 Feb 2026 16:05:16 +0000 Subject: [PATCH 119/340] get rid of local is true to remove suspicion --- backend/postcode_splitter/main.py | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/backend/postcode_splitter/main.py b/backend/postcode_splitter/main.py index 8c0048e2..e834c44e 100644 --- a/backend/postcode_splitter/main.py +++ b/backend/postcode_splitter/main.py @@ -97,7 +97,7 @@ def send_to_address2uprn_queue(task_id: str, rows: list) -> str: return response["MessageId"] -def handler(event, context, local=False): +def handler(event, context): print(f"Function: {context.function_name}") print(f"Request ID: {context.aws_request_id}") @@ -117,12 +117,6 @@ def handler(event, context, local=False): task_id = None subtask_id = None try: - # For local development - if local is True: - record = {} - record["body"] = ( - '{"task_id":"e31f2f21-175b-4a91-a3ec-a6baa325e917","s3_uri":"s3://retrofit-data-dev/ara_raw_inputs/peabody/2025_11_11 - Peabody - Data Extracts for Domna_transformed.csv"}' - ) # Parse body (inputs) if isinstance(record.get("body"), str): body = json.loads(record["body"]) @@ -161,13 +155,7 @@ def handler(event, context, local=False): csv_data = read_csv_from_s3_dict(bucket, key) df = pd.DataFrame(csv_data) - # just do 5 well we are testing, sqs connection - if local: - df = df.head(5) - # TODO: DELETE ME, if you see this in the PR. - # TODO: DELETE ME, if you see this in the PR. - # TODO: DELETE ME, if you see this in the PR. df = df.head(1983) logger.info(f"CSV loaded: {len(df)} rows, {len(df.columns)} columns") From 385a1b8e84ad39fb9b309489e3e9b113e5f4fe7a Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Thu, 12 Feb 2026 16:07:53 +0000 Subject: [PATCH 120/340] get rid of local is true to remove suspicion --- .github/workflows/deploy_terraform.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index 834a60c2..7e24f60f 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -210,3 +210,9 @@ jobs: + + + + + + From bf0fce8ca5af592fea52fcadb27d994c721e21ba Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 12 Feb 2026 16:08:37 +0000 Subject: [PATCH 121/340] =?UTF-8?q?Check=20whether=20plan=20with=20EPC=20g?= =?UTF-8?q?oal=20is=20compliant=20(and=20change=20goal=5Fvalue=20back=20to?= =?UTF-8?q?=20required)=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/app/db/models/recommendations.py | 2 +- backend/app/domain/classes/plan.py | 15 ++++- backend/app/domain/records/scenario_record.py | 2 +- .../tests/test_plan_is_compliant.py | 61 +++++++++---------- 4 files changed, 42 insertions(+), 38 deletions(-) diff --git a/backend/app/db/models/recommendations.py b/backend/app/db/models/recommendations.py index 82032d35..addb5e80 100644 --- a/backend/app/db/models/recommendations.py +++ b/backend/app/db/models/recommendations.py @@ -153,7 +153,7 @@ class ScenarioModel(Base): ) housing_type: Mapped[str] = mapped_column(String, nullable=False) goal: Mapped[PortfolioGoal] = mapped_column(Enum(PortfolioGoal), nullable=False) - goal_value: Mapped[Optional[str]] = mapped_column(String, nullable=False) + goal_value: Mapped[str] = mapped_column(String, nullable=False) trigger_file_path: Mapped[str] = mapped_column(String, nullable=False) already_installed_file_path: Mapped[Optional[str]] = mapped_column(String) patches_file_path: Mapped[Optional[str]] = mapped_column(String) diff --git a/backend/app/domain/classes/plan.py b/backend/app/domain/classes/plan.py index b44543a6..1efe87a5 100644 --- a/backend/app/domain/classes/plan.py +++ b/backend/app/domain/classes/plan.py @@ -6,6 +6,7 @@ from backend.app.db.models.portfolio import PortfolioGoal from backend.app.db.models.recommendations import PlanModel from backend.app.domain.classes.scenario import Scenario from backend.app.domain.records.plan_record import PlanRecord +from backend.app.utils import sap_to_epc class Plan: @@ -47,14 +48,22 @@ class Plan: @property def is_compliant(self) -> bool: - raise NotImplementedError - goal: PortfolioGoal = self.scenario.record.goal goal_value: str = self.scenario.record.goal_value match goal: case PortfolioGoal.INCREASING_EPC: - return True + if self.record.post_epc_rating: + post_epc = self.record.post_epc_rating.value + elif self.record.post_sap_points: + post_epc = sap_to_epc(self.record.post_sap_points) + else: + return False + + if post_epc <= goal_value: + return True + + return False case _: raise NotImplementedError diff --git a/backend/app/domain/records/scenario_record.py b/backend/app/domain/records/scenario_record.py index 48ddf0ca..0865cc88 100644 --- a/backend/app/domain/records/scenario_record.py +++ b/backend/app/domain/records/scenario_record.py @@ -11,6 +11,7 @@ class ScenarioRecord: created_at: datetime housing_type: str goal: PortfolioGoal + goal_value: str trigger_file_path: str multi_plan: bool is_default: bool @@ -20,7 +21,6 @@ class ScenarioRecord: non_invasive_recommendations_file_path: Optional[str] = None exclusions: Optional[str] = None - goal_value: Optional[str] = None cost: Optional[float] = None contingency: Optional[float] = None funding: Optional[float] = None diff --git a/backend/categorisation/tests/test_plan_is_compliant.py b/backend/categorisation/tests/test_plan_is_compliant.py index c0f7add0..62756652 100644 --- a/backend/categorisation/tests/test_plan_is_compliant.py +++ b/backend/categorisation/tests/test_plan_is_compliant.py @@ -1,4 +1,4 @@ -from typing import Callable, Optional +from typing import Callable import pytest from datetime import datetime @@ -15,16 +15,27 @@ def created_at_datetime() -> datetime: @pytest.fixture -def plan_factory( - created_at_datetime: datetime, -) -> Callable[[int, "Epc", "Scenario"], "Plan"]: - """ - Returns a factory function to create plans with different attributes and scenarios. - """ +def epc_c_scenario(created_at_datetime: datetime) -> "Scenario": + # arrange + scenario_record = ScenarioRecord( + name="EPC C", + created_at=created_at_datetime, + housing_type="", + goal=PortfolioGoal.INCREASING_EPC, + goal_value="C", + trigger_file_path="", + multi_plan=False, + is_default=False, + ) + return Scenario(record=scenario_record, id=1) - def _create_plan( - post_sap_points: int, post_epc_rating: "Epc", scenario: "Scenario" - ) -> "Plan": + +@pytest.fixture +def plan_factory( + epc_c_scenario: "Scenario", created_at_datetime: datetime +) -> Callable[[int, "Epc"], "Plan"]: + # returns a function to create plans with different attributes + def _create_plan(post_sap_points: int, post_epc_rating: "Epc") -> "Plan": plan_record = PlanRecord( property_id=1, portfolio_id=1, @@ -33,43 +44,27 @@ def plan_factory( post_sap_points=post_sap_points, post_epc_rating=post_epc_rating, ) - return Plan(record=plan_record, scenario=scenario, id=1) + return Plan(record=plan_record, scenario=epc_c_scenario, id=1) return _create_plan @pytest.mark.parametrize( - "scenario_name, goal_value, post_sap_points, post_epc_rating, expected_compliance", + "post_sap_points, post_epc_rating, expected_compliance", [ - ("EPC C", "C", 75, Epc.C, True), - ("EPC A", "A", 100, Epc.A, True), - ("EPC D", "D", 60, Epc.D, False), - ("Achieve EPC B", None, 100, Epc.A, True), - ("Achieve EPC B", None, 60, Epc.D, False), + (75, Epc.C, True), + (100, Epc.A, True), + (60, Epc.D, False), ], ) def test_scenario_goal_is_epc_c( - plan_factory: Callable[[int, "Epc", "Scenario"], "Plan"], - scenario_name: str, - goal_value: Optional[str], + plan_factory: Callable[[int, "Epc"], "Plan"], post_sap_points: int, post_epc_rating: "Epc", expected_compliance: bool, ) -> None: # arrange - scenario_record = ScenarioRecord( - name=scenario_name, - created_at=datetime.now(), - housing_type="", - goal=PortfolioGoal.INCREASING_EPC, - goal_value=goal_value, - trigger_file_path="", - multi_plan=False, - is_default=False, - ) - scenario = Scenario(record=scenario_record, id=1) - - plan = plan_factory(post_sap_points, post_epc_rating, scenario) + plan = plan_factory(post_sap_points, post_epc_rating) # act actual_compliance: bool = plan.is_compliant From 857d7e3da1073fe9957f366c930df9585e3e58f0 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 12 Feb 2026 16:10:11 +0000 Subject: [PATCH 122/340] =?UTF-8?q?Check=20whether=20plan=20with=20EPC=20g?= =?UTF-8?q?oal=20is=20compliant=20=F0=9F=9F=AA?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/app/domain/classes/plan.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/backend/app/domain/classes/plan.py b/backend/app/domain/classes/plan.py index 1efe87a5..e1215178 100644 --- a/backend/app/domain/classes/plan.py +++ b/backend/app/domain/classes/plan.py @@ -49,23 +49,24 @@ class Plan: @property def is_compliant(self) -> bool: goal: PortfolioGoal = self.scenario.record.goal - goal_value: str = self.scenario.record.goal_value match goal: case PortfolioGoal.INCREASING_EPC: - if self.record.post_epc_rating: - post_epc = self.record.post_epc_rating.value - elif self.record.post_sap_points: - post_epc = sap_to_epc(self.record.post_sap_points) - else: - return False - - if post_epc <= goal_value: - return True - - return False + return self._is_compliant_epc() case _: raise NotImplementedError def set_default(self, value: bool) -> None: self.record = replace(self.record, is_default=value) + + def _is_compliant_epc(self) -> bool: + goal_value: str = self.scenario.record.goal_value + + if self.record.post_epc_rating: + post_epc = self.record.post_epc_rating.value + elif self.record.post_sap_points: + post_epc = sap_to_epc(self.record.post_sap_points) + else: + return False + + return post_epc <= goal_value From 51e910ce6ec1031467efa300352d267f2a515487 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Thu, 12 Feb 2026 16:28:27 +0000 Subject: [PATCH 123/340] add a workflow button --- .github/workflows/deploy_terraform.yml | 1 + sfr/principal_pitch/2_export_data.py | 8 +++++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index 7e24f60f..02bb1b76 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -9,6 +9,7 @@ on: - '.github/workflows/deploy_terraform.yml' - '.github/workflows/_build_image.yml' - '.github/workflows/_deploy_lambda.yml' + workflow_dispatch: jobs: determine_stage: diff --git a/sfr/principal_pitch/2_export_data.py b/sfr/principal_pitch/2_export_data.py index 9470710d..81e7a9fc 100644 --- a/sfr/principal_pitch/2_export_data.py +++ b/sfr/principal_pitch/2_export_data.py @@ -28,12 +28,12 @@ from sqlalchemy import func # PORTFOLIO_ID = 206 # SCENARIOS = [389] -PORTFOLIO_ID = 476 +PORTFOLIO_ID = 561 SCENARIOS = [ - 953, + 1053, ] scenario_names = { - 953: "All Properties, Most Economic", + 1053: "EPC C", } project_name = "manchester" @@ -286,6 +286,8 @@ for scenario_id in SCENARIOS: "current_sap_points", "total_floor_area", "number_of_rooms", + "lodgement_date", + "is_expired", "id", ] ] From 4b07310d6b8aef447c7195b3cc5a19f154e9142b Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 12 Feb 2026 17:36:47 +0000 Subject: [PATCH 124/340] define database methods --- .../db/functions/recommendations_functions.py | 29 ++++++++++++++----- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py index 1864a330..2f85cbec 100644 --- a/backend/app/db/functions/recommendations_functions.py +++ b/backend/app/db/functions/recommendations_functions.py @@ -1,8 +1,9 @@ -from typing import List -from sqlalchemy import text -from sqlalchemy import insert, delete +from typing import Any, List, Optional +from sqlalchemy import text, insert, delete, select, update from sqlalchemy.orm import Session from sqlalchemy.exc import SQLAlchemyError +from sqlmodel import Session + from backend.app.db.models.recommendations import ( PlanModel, Recommendation, @@ -618,12 +619,26 @@ def clear_portfolio_in_batches( def get_plans_by_portfolio_id(portfolio_id: int) -> List[PlanModel]: - raise NotImplementedError + stmt = select(PlanModel).where(PlanModel.portfolio_id == portfolio_id) + with db_read_session() as session: + session_any: Any = session # Typehint as Any to satisfy Pylance... + return session_any.exec(stmt).all() -def get_scenario(scenario_id: int) -> ScenarioModel: - raise NotImplementedError +def get_scenario(scenario_id: int) -> Optional[ScenarioModel]: + stmt = select(ScenarioModel).where(ScenarioModel.id == scenario_id) + with db_read_session() as session: + session_any: Any = session # Typehint as Any to satisfy Pylance... + return session_any.exec(stmt).scalar_one_or_none() def set_plan_default(plan_id: int, is_default: bool) -> bool: - raise NotImplementedError + with db_read_session() as session: + stmt = ( + update(PlanModel) + .where(PlanModel.id == plan_id) + .values(is_default=is_default) + ) + result = session.exec(stmt) + session.commit() + return result.rowcount > 0 From d07fc351a59292a57c3b47eb8b0436d9434f6346 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Thu, 12 Feb 2026 18:04:27 +0000 Subject: [PATCH 125/340] added permission to add --- backend/postcode_splitter/main.py | 152 +++++++++++++++--- .../terraform/lambda/postcodeSplitter/main.tf | 2 +- infrastructure/terraform/shared/main.tf | 2 +- 3 files changed, 132 insertions(+), 24 deletions(-) diff --git a/backend/postcode_splitter/main.py b/backend/postcode_splitter/main.py index e834c44e..2714f330 100644 --- a/backend/postcode_splitter/main.py +++ b/backend/postcode_splitter/main.py @@ -4,12 +4,13 @@ import json import pandas as pd import requests import boto3 -from uuid import UUID +from uuid import UUID, uuid4 from urllib.parse import unquote -from utils.s3 import read_csv_from_s3 as read_csv_from_s3_dict +from utils.s3 import read_csv_from_s3 as read_csv_from_s3_dict, save_csv_to_s3 from utils.logger import setup_logger from tqdm import tqdm from backend.app.db.functions.tasks.Tasks import SubTaskInterface +from datetime import datetime logger = setup_logger() @@ -62,13 +63,55 @@ def parse_s3_uri(s3_uri: str) -> tuple[str, str]: raise ValueError(f"Could not parse S3 URI") from e -def send_to_address2uprn_queue(task_id: str, rows: list) -> str: +def upload_batch_to_s3(batch_df: pd.DataFrame, task_id: str, sub_task_id: str, bucket_name: str = None) -> str: """ - Send a postcode group to the address2UPRN SQS queue. + Upload batch DataFrame to S3 as CSV. + + Args: + batch_df: The DataFrame containing batch data + task_id: The parent task ID (used for file path) + sub_task_id: The subtask ID (used for file path) + bucket_name: The S3 bucket name (defaults to env variable) + + Returns: + S3 URI (s3://bucket/key) of the uploaded file + """ + if bucket_name is None: + bucket_name = os.getenv("S3_BUCKET_NAME") + + if not bucket_name: + logger.error( + "S3 bucket name not provided and S3_BUCKET_NAME environment variable not set" + ) + raise ValueError("S3_BUCKET_NAME not configured") + + try: + file_name = f"{datetime.now().isoformat()}_{str(uuid4())[:8]}" + file_key = f"ara_postcode_splitter_batches/{task_id}/{sub_task_id}/{file_name}.csv" + + success = save_csv_to_s3(batch_df, bucket_name, file_key) + + if success: + s3_uri = f"s3://{bucket_name}/{file_key}" + logger.info(f"Successfully uploaded batch to {s3_uri}") + return s3_uri + else: + logger.error(f"Failed to upload batch to S3") + raise ValueError("Failed to save CSV to S3") + + except Exception as e: + logger.error(f"Error uploading batch to S3: {str(e)}") + raise + + +def send_to_address2uprn_queue(task_id: str, sub_task_id: str, s3_uri: str) -> str: + """ + Send a batch to the address2UPRN SQS queue with S3 reference. Args: task_id: The parent task ID - rows: List of row dictionaries for this postcode group + sub_task_id: The new subtask ID for this batch + s3_uri: S3 URI pointing to the batch CSV file Returns: Message ID from SQS @@ -81,7 +124,8 @@ def send_to_address2uprn_queue(task_id: str, rows: list) -> str: message_body = { "task_id": task_id, - "rows": rows, + "sub_task_id": sub_task_id, + "s3_uri": s3_uri, } response = sqs_client.send_message( @@ -91,12 +135,59 @@ def send_to_address2uprn_queue(task_id: str, rows: list) -> str: logger.info( f"Sent message to address2UPRN queue. " - f"Task: {task_id}, MessageId: {response['MessageId']}" + f"Task: {task_id}, SubTask: {sub_task_id}, MessageId: {response['MessageId']}" ) return response["MessageId"] +def create_batch_and_send_to_address2uprn( + batch_rows: list, + task_id: str, + subtask_interface: SubTaskInterface, + bucket_name: str, +) -> str: + """ + Create a batch DataFrame, upload to S3, create subtask, and send to address2UPRN queue. + + Args: + batch_rows: List of row dictionaries for this batch + task_id: The parent task ID + subtask_interface: SubTaskInterface instance + bucket_name: S3 bucket name + + Returns: + The created batch subtask ID + """ + # Generate unique batch subtask ID + batch_sub_task_id = str(uuid4()) + + # Upload batch to S3 + batch_df = pd.DataFrame(batch_rows) + s3_uri = upload_batch_to_s3(batch_df, str(task_id), batch_sub_task_id, bucket_name) + + # Create a new subtask for this batch with all inputs + created_batch_sub_task_id = subtask_interface.create_subtask( + task_id=task_id, + inputs={ + "task_id": str(task_id), + "sub_task_id": batch_sub_task_id, + "batch_size": len(batch_rows), + "s3_uri": s3_uri, + } + ) + logger.info(f"Created batch subtask {created_batch_sub_task_id}") + + # Send message with S3 reference + send_to_address2uprn_queue( + task_id=str(task_id), + sub_task_id=batch_sub_task_id, + s3_uri=s3_uri, + ) + + return created_batch_sub_task_id + + def handler(event, context): print(f"Function: {context.function_name}") print(f"Request ID: {context.aws_request_id}") @@ -112,6 +203,7 @@ def handler(event, context): results = [] errors = [] subtask_interface = SubTaskInterface() + bucket_name = os.getenv("S3_BUCKET_NAME") for record in records: task_id = None @@ -148,6 +240,12 @@ def handler(event, context): ) logger.info(f"Created subtask {subtask_id} for task {task_id}") + # Mark subtask as in progress + subtask_interface.update_subtask_status( + subtask_id, "in progress" + ) + logger.info(f"Marked subtask {subtask_id} as in progress") + # Read CSV from S3 logger.info(f"Processing S3 URI: {s3_uri}") bucket, key = parse_s3_uri(s3_uri) @@ -184,9 +282,11 @@ def handler(event, context): for postcode, rows in postcode_to_addresses.items(): all_rows.extend(rows) try: - send_to_address2uprn_queue( - task_id=str(task_id), - rows=all_rows, + create_batch_and_send_to_address2uprn( + batch_rows=all_rows, + task_id=task_id, + subtask_interface=subtask_interface, + bucket_name=bucket_name, ) logger.info( f"Sent all {len(all_rows)} rows in single batch to address2UPRN queue" @@ -214,9 +314,11 @@ def handler(event, context): # First, send the current batch if it has data if batch_rows: try: - send_to_address2uprn_queue( - task_id=str(task_id), - rows=batch_rows, + create_batch_and_send_to_address2uprn( + batch_rows=batch_rows, + task_id=task_id, + subtask_interface=subtask_interface, + bucket_name=bucket_name, ) logger.info( f"Sent batch of {len(batch_rows)} rows to address2UPRN queue" @@ -236,9 +338,11 @@ def handler(event, context): # Send the large postcode on its own try: - send_to_address2uprn_queue( - task_id=str(task_id), - rows=rows, + create_batch_and_send_to_address2uprn( + batch_rows=rows, + task_id=task_id, + subtask_interface=subtask_interface, + bucket_name=bucket_name, ) logger.info( f"Sent large postcode {postcode} ({len(rows)} rows) to address2UPRN queue" @@ -263,9 +367,11 @@ def handler(event, context): f"Batch threshold reached: current {len(batch_rows)} + next postcode {len(rows)} = {current_batch_size} > {batch_size}" ) try: - send_to_address2uprn_queue( - task_id=str(task_id), - rows=batch_rows, + create_batch_and_send_to_address2uprn( + batch_rows=batch_rows, + task_id=task_id, + subtask_interface=subtask_interface, + bucket_name=bucket_name, ) logger.info( f"Sent batch of {len(batch_rows)} rows to address2UPRN queue (total sent: {total_sent})" @@ -290,9 +396,11 @@ def handler(event, context): # Send remaining batch if batch_rows: try: - send_to_address2uprn_queue( - task_id=str(task_id), - rows=batch_rows, + create_batch_and_send_to_address2uprn( + batch_rows=batch_rows, + task_id=task_id, + subtask_interface=subtask_interface, + bucket_name=bucket_name, ) total_sent += len(batch_rows) logger.info( diff --git a/infrastructure/terraform/lambda/postcodeSplitter/main.tf b/infrastructure/terraform/lambda/postcodeSplitter/main.tf index 78d927d3..e17d272d 100644 --- a/infrastructure/terraform/lambda/postcodeSplitter/main.tf +++ b/infrastructure/terraform/lambda/postcodeSplitter/main.tf @@ -55,7 +55,7 @@ module "lambda" { ENGINE_SQS_URL = "test" ENERGY_ASSESSMENTS_BUCKET = "test" ADDRESS2UPRN_QUEUE_URL = data.terraform_remote_state.address2uprn.outputs.address2uprn_queue_url - S3_BUCKET_NAME = "retrofit-data-dev" # Hardcoded as deployed via serverless i believe + S3_BUCKET_NAME = data.terraform_remote_state.shared.outputs.retrofit_sap_data_bucket_name }, ) } diff --git a/infrastructure/terraform/shared/main.tf b/infrastructure/terraform/shared/main.tf index eb2a679d..acf8c281 100644 --- a/infrastructure/terraform/shared/main.tf +++ b/infrastructure/terraform/shared/main.tf @@ -386,7 +386,7 @@ module "postcode_splitter_s3_read" { policy_name = "PostcodeSplitterReadS3" policy_description = "Allow postcode splitter Lambda to read from retrofit-data bucket" bucket_arns = ["arn:aws:s3:::retrofit-data-${var.stage}"] - actions = ["s3:GetObject"] + actions = ["s3:GetObject", "s3:ListBucket", "s3:PutObject"] resource_paths = ["/*"] } From dac676f538844d8c0b97c5ed23cddc9738750d27 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Thu, 12 Feb 2026 18:05:29 +0000 Subject: [PATCH 126/340] don't bombard yet --- backend/postcode_splitter/main.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/backend/postcode_splitter/main.py b/backend/postcode_splitter/main.py index 2714f330..7aaf1fbb 100644 --- a/backend/postcode_splitter/main.py +++ b/backend/postcode_splitter/main.py @@ -63,7 +63,9 @@ def parse_s3_uri(s3_uri: str) -> tuple[str, str]: raise ValueError(f"Could not parse S3 URI") from e -def upload_batch_to_s3(batch_df: pd.DataFrame, task_id: str, sub_task_id: str, bucket_name: str = None) -> str: +def upload_batch_to_s3( + batch_df: pd.DataFrame, task_id: str, sub_task_id: str, bucket_name: str = None +) -> str: """ Upload batch DataFrame to S3 as CSV. @@ -87,7 +89,9 @@ def upload_batch_to_s3(batch_df: pd.DataFrame, task_id: str, sub_task_id: str, b try: file_name = f"{datetime.now().isoformat()}_{str(uuid4())[:8]}" - file_key = f"ara_postcode_splitter_batches/{task_id}/{sub_task_id}/{file_name}.csv" + file_key = ( + f"ara_postcode_splitter_batches/{task_id}/{sub_task_id}/{file_name}.csv" + ) success = save_csv_to_s3(batch_df, bucket_name, file_key) @@ -128,10 +132,11 @@ def send_to_address2uprn_queue(task_id: str, sub_task_id: str, s3_uri: str) -> s "s3_uri": s3_uri, } - response = sqs_client.send_message( - QueueUrl=queue_url, - MessageBody=json.dumps(message_body), - ) + # Don't run on sqs yet + # response = sqs_client.send_message( + # QueueUrl=queue_url, + # MessageBody=json.dumps(message_body), + # ) logger.info( f"Sent message to address2UPRN queue. " @@ -174,7 +179,7 @@ def create_batch_and_send_to_address2uprn( "sub_task_id": batch_sub_task_id, "batch_size": len(batch_rows), "s3_uri": s3_uri, - } + }, ) logger.info(f"Created batch subtask {created_batch_sub_task_id}") @@ -241,9 +246,7 @@ def handler(event, context): logger.info(f"Created subtask {subtask_id} for task {task_id}") # Mark subtask as in progress - subtask_interface.update_subtask_status( - subtask_id, "in progress" - ) + subtask_interface.update_subtask_status(subtask_id, "in progress") logger.info(f"Marked subtask {subtask_id} as in progress") # Read CSV from S3 From df141e4122e020b8f037e31a56838ff234daf367 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Thu, 12 Feb 2026 18:08:00 +0000 Subject: [PATCH 127/340] post code splitter main py --- backend/postcode_splitter/main.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/backend/postcode_splitter/main.py b/backend/postcode_splitter/main.py index 7aaf1fbb..85dbc2da 100644 --- a/backend/postcode_splitter/main.py +++ b/backend/postcode_splitter/main.py @@ -132,18 +132,19 @@ def send_to_address2uprn_queue(task_id: str, sub_task_id: str, s3_uri: str) -> s "s3_uri": s3_uri, } - # Don't run on sqs yet + # # Don't run on sqs yet # response = sqs_client.send_message( # QueueUrl=queue_url, # MessageBody=json.dumps(message_body), # ) - logger.info( - f"Sent message to address2UPRN queue. " - f"Task: {task_id}, SubTask: {sub_task_id}, MessageId: {response['MessageId']}" - ) + # logger.info( + # f"Sent message to address2UPRN queue. " + # f"Task: {task_id}, SubTask: {sub_task_id}, MessageId: {response['MessageId']}" + # ) - return response["MessageId"] + # return response["MessageId"] + return str(uuid4()) def create_batch_and_send_to_address2uprn( From 5f8eca84b62452bf6c3708f0c5bfb03af4ef1700 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Thu, 12 Feb 2026 18:12:11 +0000 Subject: [PATCH 128/340] deploy --- .github/workflows/deploy_terraform.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index 02bb1b76..776bbd38 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -217,3 +217,5 @@ jobs: + + From bf7b8d87e5b380d71ae77b249cfccfb7afa99b19 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Thu, 12 Feb 2026 18:20:28 +0000 Subject: [PATCH 129/340] add docker file and specify lambda images --- backend/address2UPRN/handler/Dockerfile | 2 +- backend/condition/handler/Dockerfile | 2 +- backend/postcode_splitter/handler/Dockerfile | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/backend/address2UPRN/handler/Dockerfile b/backend/address2UPRN/handler/Dockerfile index 07159357..5f274456 100644 --- a/backend/address2UPRN/handler/Dockerfile +++ b/backend/address2UPRN/handler/Dockerfile @@ -1,4 +1,4 @@ -FROM public.ecr.aws/lambda/python:3.10 +FROM --platform=linux/amd64 public.ecr.aws/lambda/python:3.10 # FROM python:3.11.10-bullseye diff --git a/backend/condition/handler/Dockerfile b/backend/condition/handler/Dockerfile index 71556895..be0d5ca5 100644 --- a/backend/condition/handler/Dockerfile +++ b/backend/condition/handler/Dockerfile @@ -1,4 +1,4 @@ -FROM public.ecr.aws/lambda/python:3.11 +FROM --platform=linux/amd64 public.ecr.aws/lambda/python:3.11 # For local running: # FROM python:3.11.10-bullseye diff --git a/backend/postcode_splitter/handler/Dockerfile b/backend/postcode_splitter/handler/Dockerfile index 74c00b9f..8e30f9e3 100644 --- a/backend/postcode_splitter/handler/Dockerfile +++ b/backend/postcode_splitter/handler/Dockerfile @@ -1,4 +1,4 @@ -FROM public.ecr.aws/lambda/python:3.11 +FROM --platform=linux/amd64 public.ecr.aws/lambda/python:3.11 ARG DEV_DB_HOST ARG DEV_DB_PORT From ee8554314b951e165d281967d09c4963c36c4932 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Thu, 12 Feb 2026 18:23:35 +0000 Subject: [PATCH 130/340] add docker file and specify lambda images --- .github/workflows/deploy_terraform.yml | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index 776bbd38..990dbdfa 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -209,13 +209,3 @@ jobs: - - - - - - - - - - From 0ab0d5505f4c5aababc9c6f57d988b91c984c2bf Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Thu, 12 Feb 2026 18:29:11 +0000 Subject: [PATCH 131/340] no cache --- .github/workflows/_build_image.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/_build_image.yml b/.github/workflows/_build_image.yml index a5e16a51..caf1ccb8 100644 --- a/.github/workflows/_build_image.yml +++ b/.github/workflows/_build_image.yml @@ -92,6 +92,7 @@ jobs: done <<< "${{ inputs.build_args }}" docker build \ + --no-cache \ -f ${{ inputs.dockerfile_path }} \ $BUILD_ARGS \ -t $IMAGE_URI \ From 3af620a61a0ce4a91ea8c2923eea5c23778c52ef Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Thu, 12 Feb 2026 18:38:18 +0000 Subject: [PATCH 132/340] ensure we don't use any platform but linux/amd64 --- .github/workflows/_build_image.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/_build_image.yml b/.github/workflows/_build_image.yml index caf1ccb8..f4b94fc0 100644 --- a/.github/workflows/_build_image.yml +++ b/.github/workflows/_build_image.yml @@ -93,6 +93,7 @@ jobs: docker build \ --no-cache \ + --platform linux/amd64 \ -f ${{ inputs.dockerfile_path }} \ $BUILD_ARGS \ -t $IMAGE_URI \ From 0f4c1c0029706474317997420f70290f442455b5 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Thu, 12 Feb 2026 18:52:11 +0000 Subject: [PATCH 133/340] only in docker build --- backend/address2UPRN/handler/Dockerfile | 2 +- backend/condition/handler/Dockerfile | 2 +- backend/postcode_splitter/handler/Dockerfile | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/backend/address2UPRN/handler/Dockerfile b/backend/address2UPRN/handler/Dockerfile index 5f274456..07159357 100644 --- a/backend/address2UPRN/handler/Dockerfile +++ b/backend/address2UPRN/handler/Dockerfile @@ -1,4 +1,4 @@ -FROM --platform=linux/amd64 public.ecr.aws/lambda/python:3.10 +FROM public.ecr.aws/lambda/python:3.10 # FROM python:3.11.10-bullseye diff --git a/backend/condition/handler/Dockerfile b/backend/condition/handler/Dockerfile index be0d5ca5..71556895 100644 --- a/backend/condition/handler/Dockerfile +++ b/backend/condition/handler/Dockerfile @@ -1,4 +1,4 @@ -FROM --platform=linux/amd64 public.ecr.aws/lambda/python:3.11 +FROM public.ecr.aws/lambda/python:3.11 # For local running: # FROM python:3.11.10-bullseye diff --git a/backend/postcode_splitter/handler/Dockerfile b/backend/postcode_splitter/handler/Dockerfile index 8e30f9e3..74c00b9f 100644 --- a/backend/postcode_splitter/handler/Dockerfile +++ b/backend/postcode_splitter/handler/Dockerfile @@ -1,4 +1,4 @@ -FROM --platform=linux/amd64 public.ecr.aws/lambda/python:3.11 +FROM public.ecr.aws/lambda/python:3.11 ARG DEV_DB_HOST ARG DEV_DB_PORT From c7bd70e17f3d339099040976e66a04047f0eaded Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Thu, 12 Feb 2026 18:52:23 +0000 Subject: [PATCH 134/340] only in docker build --- .github/workflows/deploy_terraform.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index 990dbdfa..6ee9de11 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -209,3 +209,7 @@ jobs: + + + + From 7637e87c3c7f2188e5c06fdcd50b3151fc75818c Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Thu, 12 Feb 2026 19:03:49 +0000 Subject: [PATCH 135/340] deleted all images in ecr --- .github/workflows/_deploy_lambda.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/_deploy_lambda.yml b/.github/workflows/_deploy_lambda.yml index b2f2ce49..1a690e02 100644 --- a/.github/workflows/_deploy_lambda.yml +++ b/.github/workflows/_deploy_lambda.yml @@ -114,3 +114,4 @@ jobs: -var="image_digest=${{ inputs.image_digest }}" + From ff78ddc5a0dbc299a47a21b4f2456f1f6c82f45e Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Thu, 12 Feb 2026 19:09:43 +0000 Subject: [PATCH 136/340] deleted all images in ecr --- .github/workflows/_build_image.yml | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/_build_image.yml b/.github/workflows/_build_image.yml index f4b94fc0..5e5b5155 100644 --- a/.github/workflows/_build_image.yml +++ b/.github/workflows/_build_image.yml @@ -91,15 +91,16 @@ jobs: BUILD_ARGS="$BUILD_ARGS --build-arg $temp" done <<< "${{ inputs.build_args }}" - docker build \ + docker buildx build \ --no-cache \ --platform linux/amd64 \ + --provenance=false \ + --sbom=false \ + --push \ -f ${{ inputs.dockerfile_path }} \ $BUILD_ARGS \ -t $IMAGE_URI \ ${{ inputs.build_context }} - - docker push $IMAGE_URI - name: Resolve image digest id: digest From f34a6269f7ae6a06de67171106cd5958aa547140 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Fri, 13 Feb 2026 09:39:25 +0000 Subject: [PATCH 137/340] Move updating of is_default to domain rather than database layer --- .../db/functions/recommendations_functions.py | 6 +- backend/app/domain/classes/plan.py | 78 ++++++++++++++++++- backend/categorisation/processor.py | 16 +++- 3 files changed, 92 insertions(+), 8 deletions(-) diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py index 2f85cbec..2fdb6142 100644 --- a/backend/app/db/functions/recommendations_functions.py +++ b/backend/app/db/functions/recommendations_functions.py @@ -632,12 +632,12 @@ def get_scenario(scenario_id: int) -> Optional[ScenarioModel]: return session_any.exec(stmt).scalar_one_or_none() -def set_plan_default(plan_id: int, is_default: bool) -> bool: +def update_plan(plan_model: PlanModel, scenario_model: ScenarioModel) -> bool: with db_read_session() as session: stmt = ( update(PlanModel) - .where(PlanModel.id == plan_id) - .values(is_default=is_default) + .where(PlanModel.id == plan_model.id) + .values(**plan_model.model_dump(exclude={"id"}, exclude_unset=True)) ) result = session.exec(stmt) session.commit() diff --git a/backend/app/domain/classes/plan.py b/backend/app/domain/classes/plan.py index e1215178..2b1d3026 100644 --- a/backend/app/domain/classes/plan.py +++ b/backend/app/domain/classes/plan.py @@ -2,8 +2,10 @@ from __future__ import annotations from dataclasses import replace from typing import Optional +from sqlalchemy import Tuple + from backend.app.db.models.portfolio import PortfolioGoal -from backend.app.db.models.recommendations import PlanModel +from backend.app.db.models.recommendations import PlanModel, ScenarioModel from backend.app.domain.classes.scenario import Scenario from backend.app.domain.records.plan_record import PlanRecord from backend.app.utils import sap_to_epc @@ -56,8 +58,82 @@ class Plan: case _: raise NotImplementedError + def to_sqlalchemy(self) -> Tuple[PlanModel, ScenarioModel]: + scenario_record = self.scenario.record + + scenario_model = ScenarioModel( + id=self.scenario.id, + name=scenario_record.name, + created_at=scenario_record.created_at, + housing_type=scenario_record.housing_type, + goal=scenario_record.goal, + goal_value=scenario_record.goal_value, + trigger_file_path=scenario_record.trigger_file_path, + multi_plan=scenario_record.multi_plan, + is_default=scenario_record.is_default, + budget=scenario_record.budget, + already_installed_file_path=scenario_record.already_installed_file_path, + patches_file_path=scenario_record.patches_file_path, + non_invasive_recommendations_file_path=scenario_record.non_invasive_recommendations_file_path, + exclusions=scenario_record.exclusions, + cost=scenario_record.cost, + contingency=scenario_record.contingency, + funding=scenario_record.funding, + total_work_hours=scenario_record.total_work_hours, + energy_savings=scenario_record.energy_savings, + co2_equivalent_savings=scenario_record.co2_equivalent_savings, + energy_cost_savings=scenario_record.energy_cost_savings, + epc_breakdown_pre_retrofit=scenario_record.epc_breakdown_pre_retrofit, + epc_breakdown_post_retrofit=scenario_record.epc_breakdown_post_retrofit, + number_of_properties=scenario_record.number_of_properties, + n_units_to_retrofit=scenario_record.n_units_to_retrofit, + co2_per_unit_pre_retrofit=scenario_record.co2_per_unit_pre_retrofit, + co2_per_unit_post_retrofit=scenario_record.co2_per_unit_post_retrofit, + energy_bill_per_unit_pre_retrofit=scenario_record.energy_bill_per_unit_pre_retrofit, + energy_bill_per_unit_post_retrofit=scenario_record.energy_bill_per_unit_post_retrofit, + energy_consumption_per_unit_pre_retrofit=scenario_record.energy_consumption_per_unit_pre_retrofit, + energy_consumption_per_unit_post_retrofit=scenario_record.energy_consumption_per_unit_post_retrofit, + valuation_improvement_per_unit=scenario_record.valuation_improvement_per_unit, + cost_per_unit=scenario_record.cost_per_unit, + cost_per_co2_saved=scenario_record.cost_per_co2_saved, + cost_per_sap_point=scenario_record.cost_per_sap_point, + valuation_return_on_investment=scenario_record.valuation_return_on_investment, + property_valuation_increase=scenario_record.property_valuation_increase, + labour_days=scenario_record.labour_days, + ) + + record = self.record + + plan_model = PlanModel( + id=self.id, + property_id=record.property_id, + portfolio_id=record.portfolio_id, + scenario_id=self.scenario.id, + created_at=record.created_at, + is_default=record.is_default, + valuation_increase_lower_bound=record.valuation_increase_lower_bound, + valuation_increase_upper_bound=record.valuation_increase_upper_bound, + valuation_increase_average=record.valuation_increase_average, + plan_type=record.plan_type, + post_sap_points=record.post_sap_points, + post_epc_rating=record.post_epc_rating, + post_co2_emissions=record.post_co2_emissions, + co2_savings=record.co2_savings, + post_energy_bill=record.post_energy_bill, + energy_bill_savings=record.energy_bill_savings, + post_energy_consumption=record.post_energy_consumption, + energy_consumption_savings=record.energy_consumption_savings, + valuation_post_retrofit=record.valuation_post_retrofit, + valuation_increase=record.valuation_increase, + cost_of_works=record.cost_of_works, + contingency_cost=record.contingency_cost, + ) + + return Tuple(plan_model, scenario_model) # TODO: create a type for this + def set_default(self, value: bool) -> None: self.record = replace(self.record, is_default=value) + self.scenario.record = replace(self.scenario.record, is_default=value) def _is_compliant_epc(self) -> bool: goal_value: str = self.scenario.record.goal_value diff --git a/backend/categorisation/processor.py b/backend/categorisation/processor.py index 55a1a1c6..9c1bb8f0 100644 --- a/backend/categorisation/processor.py +++ b/backend/categorisation/processor.py @@ -1,11 +1,15 @@ from collections import defaultdict -from typing import List +from typing import List, cast + +from sqlalchemy import Tuple from backend.app.db.functions.recommendations_functions import ( get_plans_by_portfolio_id, get_scenario, set_plan_default, + update_plan, ) +from backend.app.db.models.recommendations import PlanModel, ScenarioModel from backend.app.domain.classes.plan import Plan from backend.categorisation.categorisation_logic import CategorisationLogic from utils.logger import setup_logger @@ -58,7 +62,11 @@ def _update_default_flags(plans: List[Plan], cheapest_plan: Plan) -> None: if plan.id is None: raise ValueError("Cannot update Plan with missing ID") - set_plan_default( - plan.id, - plan.id == cheapest_plan.id, + plan.set_default(plan.id == cheapest_plan.id) + + plan_model, scenario_model = cast( + tuple[PlanModel, ScenarioModel], + plan.to_sqlalchemy(), ) + + update_plan(plan_model, scenario_model) From 61d9e64e1b06e4d0f0e5207ec96bb9cb9a31ff84 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Fri, 13 Feb 2026 09:44:35 +0000 Subject: [PATCH 138/340] also update scenario when updating plan --- .../app/db/functions/recommendations_functions.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py index 2fdb6142..620ec059 100644 --- a/backend/app/db/functions/recommendations_functions.py +++ b/backend/app/db/functions/recommendations_functions.py @@ -639,6 +639,14 @@ def update_plan(plan_model: PlanModel, scenario_model: ScenarioModel) -> bool: .where(PlanModel.id == plan_model.id) .values(**plan_model.model_dump(exclude={"id"}, exclude_unset=True)) ) - result = session.exec(stmt) + plan_result = session.exec(stmt) + + scenario_stmt = ( + update(ScenarioModel) + .where(ScenarioModel.id == scenario_model.id) + .values(**scenario_model.model_dump(exclude={"id"}, exclude_unset=True)) + ) + session.exec(scenario_stmt) + session.commit() - return result.rowcount > 0 + return plan_result.rowcount > 0 From 561594a6ca9a2ec34eba603db5655cfdb6f50c24 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Fri, 13 Feb 2026 09:45:15 +0000 Subject: [PATCH 139/340] consistent use of Tuple --- backend/categorisation/processor.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/backend/categorisation/processor.py b/backend/categorisation/processor.py index 9c1bb8f0..ee42efcd 100644 --- a/backend/categorisation/processor.py +++ b/backend/categorisation/processor.py @@ -1,12 +1,9 @@ from collections import defaultdict -from typing import List, cast - -from sqlalchemy import Tuple +from typing import List, Tuple, cast from backend.app.db.functions.recommendations_functions import ( get_plans_by_portfolio_id, get_scenario, - set_plan_default, update_plan, ) from backend.app.db.models.recommendations import PlanModel, ScenarioModel @@ -65,7 +62,7 @@ def _update_default_flags(plans: List[Plan], cheapest_plan: Plan) -> None: plan.set_default(plan.id == cheapest_plan.id) plan_model, scenario_model = cast( - tuple[PlanModel, ScenarioModel], + Tuple[PlanModel, ScenarioModel], plan.to_sqlalchemy(), ) From 1814c5988c151759c90e9a9807c636162a95c14d Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Fri, 13 Feb 2026 11:05:05 +0000 Subject: [PATCH 140/340] run on sqs --- .github/workflows/_build_image.yml | 2 +- backend/postcode_splitter/main.py | 20 +++++++++----------- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/.github/workflows/_build_image.yml b/.github/workflows/_build_image.yml index 5e5b5155..3435c92d 100644 --- a/.github/workflows/_build_image.yml +++ b/.github/workflows/_build_image.yml @@ -90,7 +90,7 @@ jobs: temp=$(eval echo "$line") BUILD_ARGS="$BUILD_ARGS --build-arg $temp" done <<< "${{ inputs.build_args }}" - + docker buildx build \ --no-cache \ --platform linux/amd64 \ diff --git a/backend/postcode_splitter/main.py b/backend/postcode_splitter/main.py index 85dbc2da..3d0f0d8d 100644 --- a/backend/postcode_splitter/main.py +++ b/backend/postcode_splitter/main.py @@ -132,19 +132,17 @@ def send_to_address2uprn_queue(task_id: str, sub_task_id: str, s3_uri: str) -> s "s3_uri": s3_uri, } - # # Don't run on sqs yet - # response = sqs_client.send_message( - # QueueUrl=queue_url, - # MessageBody=json.dumps(message_body), - # ) + response = sqs_client.send_message( + QueueUrl=queue_url, + MessageBody=json.dumps(message_body), + ) - # logger.info( - # f"Sent message to address2UPRN queue. " - # f"Task: {task_id}, SubTask: {sub_task_id}, MessageId: {response['MessageId']}" - # ) + logger.info( + f"Sent message to address2UPRN queue. " + f"Task: {task_id}, SubTask: {sub_task_id}, MessageId: {response['MessageId']}" + ) - # return response["MessageId"] - return str(uuid4()) + return response["MessageId"] def create_batch_and_send_to_address2uprn( From 8152dc516666ce6d9183e73b3879a2f5f028cbd7 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Fri, 13 Feb 2026 11:15:15 +0000 Subject: [PATCH 141/340] deploy with new address2uprn handling --- backend/address2UPRN/main.py | 163 ++++++++++++------------------ backend/postcode_splitter/main.py | 51 +--------- utils/s3.py | 51 ++++++++++ 3 files changed, 118 insertions(+), 147 deletions(-) diff --git a/backend/address2UPRN/main.py b/backend/address2UPRN/main.py index f4aa0dc9..f843d28a 100644 --- a/backend/address2UPRN/main.py +++ b/backend/address2UPRN/main.py @@ -12,11 +12,16 @@ import requests from uuid import UUID import uuid from backend.app.db.functions.tasks.Tasks import SubTaskInterface -from utils.s3 import save_csv_to_s3 +from utils.s3 import ( + save_csv_to_s3, + read_csv_from_s3 as read_csv_from_s3_dict, + parse_s3_uri, +) from datetime import datetime logger = setup_logger() + EPC_AUTH_TOKEN = os.getenv( "EPC_AUTH_TOKEN", ) @@ -526,48 +531,6 @@ def save_results_to_s3( return False -def test(a, b): - assert a == b, f"erorr: {a}{type(a)} != {b}: {type(b)}" - - -def run_all_test(): - # Basic usage with different post codes styles - test(get_epc_data_with_postcode("b93 8sy").shape[0], 63) - test(get_epc_data_with_postcode("B938sy").shape[0], 63) - test(get_epc_data_with_postcode("b93 8Sy").shape[0], 63) - test(get_epc_data_with_postcode("b93 8Sy").shape[0], 63) - - test(get_uprn("68", "b93 8sy"), "100070989938") - test(get_uprn("68 Glendon Way", "b93 8sy"), "100070989938") - test(get_uprn("Flat A, 28, Nelgarde Road", "se6 4tf"), "100023278633") - test(get_uprn("28 A", "se6 4tf"), "100023278633") - test(get_uprn("28A", "se6 4tf"), "100023278633") - test(get_uprn("6 Aitken Close", "E8 4SQ"), False) - - # unique case - test(get_uprn("Flat 5, 1, Semley Gate", "e9 5nh"), "10008238198") - test(get_uprn("5 , 1 Semley Gate", "e9 5nh"), "10008238198") - test(get_uprn("5 Semley Gate", "e9 5nh"), "10008238198") - test(get_uprn("1, 5 Semley Gate", "e9 5nh"), False) - test( - get_uprn("1 Semley Gate", "e9 5nh"), "10008238188" - ) # this one return "flat 1, in 1 semley gate" - test( - get_uprn("48 Oswald Street", "E5 0BT"), False - ) # this one return "flat 1, in 1 semley gate" - test( - get_uprn("42 Oswald Street", "E5 0BT"), False - ) # this one return "flat 1, in 1 semley gate" - test( - get_uprn("46 Oswald Street", "E5 0BT"), False - ) # this one return "flat 1, in 1 semley gate" - get_uprn_candidates(get_epc_data_with_postcode("e5 0bt"), "48 Oswald Street") - get_uprn_candidates( - get_epc_data_with_postcode("Cr2 7dl"), - "FLAT 3; 42 MORETON ROAD, SOUTH CROYDON, SURREY", - ) - - def handler(event, context, local=False): print("=== Address2UPRN Lambda Handler ===") print(f"Function: {context.function_name}") @@ -581,35 +544,8 @@ def handler(event, context, local=False): "body": json.dumps( { "task_id": "e31f2f21-175b-4a91-a3ec-a6baa325e917", - "rows": [ - { - "landlord_property_id": "00000002POR", - "UPRN": "766019911", - "Address 1": "9 Redland Way", - "Address 2": "Aylesbury Vale", - "postcode": "HP21 9RJ", - "landlord_property_type": "House", - "postcode_clean": "HP219RJ", - }, - { - "landlord_property_id": "00000003MTR", - "UPRN": "100120781544", - "Address 1": "16 Lime Crescent", - "Address 2": "BICESTER", - "postcode": "OX26 3XJ", - "landlord_property_type": "House", - "postcode_clean": "OX263XJ", - }, - { - "landlord_property_id": "00000004HBY", - "UPRN": "14033542", - "Address 1": "14 Dunbar Drive", - "Address 2": "Woodley", - "postcode": "RG5 4HA", - "landlord_property_type": "House", - "postcode_clean": "RG54HA", - }, - ], + "sub_task_id": "a1b2c3d4-e5f6-7a8b-9c0d-e1f2a3b4c5d6", + "s3_uri": "", } ) } @@ -637,14 +573,19 @@ def handler(event, context, local=False): # Validate required fields task_id = body.get("task_id") - rows = body.get("rows", []) + sub_task_id = body.get("sub_task_id") + s3_uri = body.get("s3_uri") if not task_id: errors.append({"error": "Missing required field: task_id"}) continue - if not rows: - errors.append({"error": "Missing or empty rows data"}) + if not sub_task_id: + errors.append({"error": "Missing required field: sub_task_id"}) + continue + + if not s3_uri: + errors.append({"error": "Missing required field: s3_uri"}) continue # Convert task_id to UUID @@ -654,29 +595,56 @@ def handler(event, context, local=False): errors.append({"error": f"Invalid UUID format for task_id: {str(e)}"}) continue - # Create a subtask for this batch - subtask_id = subtask_interface.create_subtask( - task_id=task_id, inputs={"row_count": len(rows)} - ) - logger.info( - f"Created subtask {subtask_id} for task {task_id} with {len(rows)} rows" - ) + # Convert sub_task_id to UUID + try: + subtask_id = ( + UUID(sub_task_id) if isinstance(sub_task_id, str) else sub_task_id + ) + except ValueError as e: + errors.append( + {"error": f"Invalid UUID format for sub_task_id: {str(e)}"} + ) + continue + + # Update existing subtask to 'in progress' + subtask_interface.update_subtask_status(subtask_id, "in progress") + logger.info(f"Processing subtask {subtask_id} for task {task_id}") + + # Parse S3 URI and read CSV from S3 + logger.info(f"Reading data from S3: {s3_uri}") + try: + bucket, key = parse_s3_uri(s3_uri) + csv_data = read_csv_from_s3_dict(bucket, key) + df = pd.DataFrame(csv_data) + logger.info(f"Loaded {len(df)} rows from S3") + except Exception as s3_error: + logger.error(f"Failed to read data from S3: {s3_error}") + errors.append( + {"error": "Failed to read data from S3", "details": str(s3_error)} + ) + try: + subtask_interface.update_subtask_status( + subtask_id, "failed", outputs={"error": str(s3_error)} + ) + except Exception as db_error: + logger.error(f"Failed to update subtask status: {db_error}") + continue # Process the rows - logger.info(f"Processing {len(rows)} rows for task {task_id}") + logger.info(f"Processing {len(df)} rows for task {task_id}") - # Convert rows to DataFrame - df = pd.DataFrame(rows) - - # Create user_input column by concatenating Address 1 and Address 2 - df["user_input"] = ( - df["Address 1"].fillna("") - + " " - + df["Address 2"].fillna("") - + " " - + df["Address 3"].fillna("") - ).str.strip() - logger.info(f"Created user_input column from Address 1 and Address 2") + # Create user_input column by concatenating Address columns if not already present + if "user_input" not in df.columns: + df["user_input"] = ( + df["Address 1"].fillna("") + + " " + + df["Address 2"].fillna("") + + " " + + df["Address 3"].fillna("") + ).str.strip() + logger.info(f"Created user_input column from Address 1 and Address 2") + else: + logger.info(f"user_input column already present in data") clean_df = df.dropna(subset=["postcode_clean"]) @@ -791,7 +759,6 @@ def handler(event, context, local=False): results.append( { "subtask_id": str(subtask_id), - "rows_processed": len(rows), "postcodes_processed": postcodes_processed, "addresses_processed": addresses_processed, "uprns_found": uprns_found, @@ -802,7 +769,9 @@ def handler(event, context, local=False): # Mark subtask as completed try: subtask_interface.update_subtask_status( - subtask_id, "completed", outputs={"rows_processed": len(rows)} + subtask_id, + "completed", + outputs={"rows_processed": "todo -> show sensible output"}, ) logger.info(f"Marked subtask {subtask_id} as completed") except Exception as db_error: diff --git a/backend/postcode_splitter/main.py b/backend/postcode_splitter/main.py index 3d0f0d8d..930fac7f 100644 --- a/backend/postcode_splitter/main.py +++ b/backend/postcode_splitter/main.py @@ -5,8 +5,7 @@ import pandas as pd import requests import boto3 from uuid import UUID, uuid4 -from urllib.parse import unquote -from utils.s3 import read_csv_from_s3 as read_csv_from_s3_dict, save_csv_to_s3 +from utils.s3 import read_csv_from_s3 as read_csv_from_s3_dict, save_csv_to_s3, parse_s3_uri from utils.logger import setup_logger from tqdm import tqdm from backend.app.db.functions.tasks.Tasks import SubTaskInterface @@ -15,54 +14,6 @@ from datetime import datetime logger = setup_logger() -def parse_s3_uri(s3_uri: str) -> tuple[str, str]: - """ - Parse S3 URI to extract bucket and key. - - Supports two formats: - 1. S3 URI format: s3://bucket/key - """ - logger.info("Parsing S3 URI") - - try: - # Check if it's an S3 URI format - if s3_uri.startswith("s3://"): - parts = s3_uri[5:].split("/", 1) - if len(parts) < 2: - raise ValueError("S3 URI must include both bucket and key") - bucket = parts[0] - key = parts[1] - logger.info(f"Extracted bucket: {bucket}, key: {key}") - return bucket, key - - # Otherwise, treat as AWS console URL - logger.info("Parsing as AWS console URL") - - # Split base URL and query string - if "?" not in s3_uri: - raise ValueError("No query string found") - - base, query = s3_uri.split("?", 1) - - # Extract bucket from base URL - if "/s3/object/" not in base: - raise ValueError("No '/s3/object/' found in URL path") - - path_parts = base.split("/s3/object/") - bucket = path_parts[1] - logger.info(f"Extracted bucket: {bucket}") - - # Extract prefix from query parameters - params = dict(item.split("=") for item in query.split("&") if "=" in item) - key = unquote(params.get("prefix", "")) - logger.info(f"Extracted key: {key}") - - return bucket, key - except Exception as e: - logger.error(f"Error parsing S3 URI: {type(e).__name__}: {e}") - raise ValueError(f"Could not parse S3 URI") from e - - def upload_batch_to_s3( batch_df: pd.DataFrame, task_id: str, sub_task_id: str, bucket_name: str = None ) -> str: diff --git a/utils/s3.py b/utils/s3.py index 0e79c26b..0ba036f7 100644 --- a/utils/s3.py +++ b/utils/s3.py @@ -3,11 +3,62 @@ import boto3 import csv import pandas as pd from io import BytesIO, StringIO +from urllib.parse import unquote from utils.logger import setup_logger from botocore.exceptions import NoCredentialsError, PartialCredentialsError logger = setup_logger() + +def parse_s3_uri(s3_uri: str) -> tuple[str, str]: + """ + Parse S3 URI to extract bucket and key. + + Supports two formats: + 1. S3 URI format: s3://bucket/key + 2. AWS console URL format with query parameters + """ + logger.info("Parsing S3 URI") + + try: + # Check if it's an S3 URI format + if s3_uri.startswith("s3://"): + parts = s3_uri[5:].split("/", 1) + if len(parts) < 2: + raise ValueError("S3 URI must include both bucket and key") + bucket = parts[0] + key = parts[1] + logger.info(f"Extracted bucket: {bucket}, key: {key}") + return bucket, key + + # Otherwise, treat as AWS console URL + logger.info("Parsing as AWS console URL") + + # Split base URL and query string + if "?" not in s3_uri: + raise ValueError("No query string found") + + base, query = s3_uri.split("?", 1) + + # Extract bucket from base URL + if "/s3/object/" not in base: + raise ValueError("No '/s3/object/' found in URL path") + + path_parts = base.split("/s3/object/") + bucket = path_parts[1] + logger.info(f"Extracted bucket: {bucket}") + + # Extract prefix from query parameters + params = dict(item.split("=") for item in query.split("&") if "=" in item) + key = unquote(params.get("prefix", "")) + logger.info(f"Extracted key: {key}") + + return bucket, key + except Exception as e: + logger.error(f"Error parsing S3 URI: {type(e).__name__}: {e}") + raise ValueError(f"Could not parse S3 URI") from e + + def read_from_s3(bucket_name, s3_file_name): """ Read an object from s3. Decoding of the data is left for outside of this function From e0e50d696af6ce879a748c03f340d90f02ab1756 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Fri, 13 Feb 2026 12:26:31 +0000 Subject: [PATCH 142/340] fixes so it runs (as far as the database update), plus some temp prints --- .../db/functions/recommendations_functions.py | 2 +- backend/app/db/models/recommendations.py | 16 +++++++- backend/app/domain/classes/plan.py | 10 +++-- .../categorisation/categorisation_logic.py | 12 ------ backend/categorisation/local_runner.py | 7 +++- backend/categorisation/processor.py | 41 ++++++++++++++----- 6 files changed, 59 insertions(+), 29 deletions(-) delete mode 100644 backend/categorisation/categorisation_logic.py diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py index 620ec059..28d82416 100644 --- a/backend/app/db/functions/recommendations_functions.py +++ b/backend/app/db/functions/recommendations_functions.py @@ -622,7 +622,7 @@ def get_plans_by_portfolio_id(portfolio_id: int) -> List[PlanModel]: stmt = select(PlanModel).where(PlanModel.portfolio_id == portfolio_id) with db_read_session() as session: session_any: Any = session # Typehint as Any to satisfy Pylance... - return session_any.exec(stmt).all() + return session_any.exec(stmt).scalars().all() def get_scenario(scenario_id: int) -> Optional[ScenarioModel]: diff --git a/backend/app/db/models/recommendations.py b/backend/app/db/models/recommendations.py index addb5e80..538b11e3 100644 --- a/backend/app/db/models/recommendations.py +++ b/backend/app/db/models/recommendations.py @@ -1,4 +1,4 @@ -from typing import Iterable, Optional +from typing import Iterable, List, NamedTuple, Optional, Type from sqlalchemy import ( Column, BigInteger, @@ -22,6 +22,10 @@ import enum Base = declarative_base() +def portfolio_goal_values(enum_cls: Type[PortfolioGoal]) -> List[str]: + return [e.value for e in enum_cls] + + class Recommendation(Base): __tablename__ = "recommendation" @@ -152,7 +156,10 @@ class ScenarioModel(Base): BigInteger, ForeignKey(Portfolio.id), nullable=False ) housing_type: Mapped[str] = mapped_column(String, nullable=False) - goal: Mapped[PortfolioGoal] = mapped_column(Enum(PortfolioGoal), nullable=False) + goal: Mapped[PortfolioGoal] = mapped_column( + Enum(PortfolioGoal, values_callable=portfolio_goal_values, name="goal"), + nullable=False, + ) goal_value: Mapped[str] = mapped_column(String, nullable=False) trigger_file_path: Mapped[str] = mapped_column(String, nullable=False) already_installed_file_path: Mapped[Optional[str]] = mapped_column(String) @@ -252,3 +259,8 @@ class InstalledMeasure(Base): def enum_values(e: Iterable[PlanTypeEnum]) -> list[str]: return [m.value for m in e] + + +class PlanPersistence(NamedTuple): + plan: PlanModel + scenario: ScenarioModel diff --git a/backend/app/domain/classes/plan.py b/backend/app/domain/classes/plan.py index 2b1d3026..4bd8f962 100644 --- a/backend/app/domain/classes/plan.py +++ b/backend/app/domain/classes/plan.py @@ -5,7 +5,11 @@ from typing import Optional from sqlalchemy import Tuple from backend.app.db.models.portfolio import PortfolioGoal -from backend.app.db.models.recommendations import PlanModel, ScenarioModel +from backend.app.db.models.recommendations import ( + PlanModel, + PlanPersistence, + ScenarioModel, +) from backend.app.domain.classes.scenario import Scenario from backend.app.domain.records.plan_record import PlanRecord from backend.app.utils import sap_to_epc @@ -58,7 +62,7 @@ class Plan: case _: raise NotImplementedError - def to_sqlalchemy(self) -> Tuple[PlanModel, ScenarioModel]: + def to_sqlalchemy(self) -> PlanPersistence: scenario_record = self.scenario.record scenario_model = ScenarioModel( @@ -129,7 +133,7 @@ class Plan: contingency_cost=record.contingency_cost, ) - return Tuple(plan_model, scenario_model) # TODO: create a type for this + return PlanPersistence(plan=plan_model, scenario=scenario_model) def set_default(self, value: bool) -> None: self.record = replace(self.record, is_default=value) diff --git a/backend/categorisation/categorisation_logic.py b/backend/categorisation/categorisation_logic.py deleted file mode 100644 index 2f540a55..00000000 --- a/backend/categorisation/categorisation_logic.py +++ /dev/null @@ -1,12 +0,0 @@ -from typing import List -from backend.app.domain.classes.plan import Plan - - -class CategorisationLogic: - @staticmethod - def get_compliant_plans(plans: List[Plan]) -> List[Plan]: - raise NotImplementedError - - @staticmethod - def get_cheapest_plan(plans: List[Plan]) -> Plan: - raise NotImplementedError diff --git a/backend/categorisation/local_runner.py b/backend/categorisation/local_runner.py index 4693850c..599cbbbb 100644 --- a/backend/categorisation/local_runner.py +++ b/backend/categorisation/local_runner.py @@ -1,5 +1,10 @@ +from backend.categorisation.processor import process_portfolio + + def main() -> None: - pass + portfolio_id = 556 + + process_portfolio(portfolio_id) if __name__ == "__main__": diff --git a/backend/categorisation/processor.py b/backend/categorisation/processor.py index ee42efcd..704dfc07 100644 --- a/backend/categorisation/processor.py +++ b/backend/categorisation/processor.py @@ -1,5 +1,5 @@ from collections import defaultdict -from typing import List, Tuple, cast +from typing import Dict, List, Tuple, cast from backend.app.db.functions.recommendations_functions import ( get_plans_by_portfolio_id, @@ -8,23 +8,30 @@ from backend.app.db.functions.recommendations_functions import ( ) from backend.app.db.models.recommendations import PlanModel, ScenarioModel from backend.app.domain.classes.plan import Plan -from backend.categorisation.categorisation_logic import CategorisationLogic +from backend.app.domain.classes.scenario import Scenario from utils.logger import setup_logger logger = setup_logger() def process_portfolio(portfolio_id: int) -> None: - plans = _load_plans_for_portfolio(portfolio_id) - plans_by_property = _group_plans_by_property(plans) + print(f"Processing portfolio {portfolio_id}") + plans: List[Plan] = _load_plans_for_portfolio(portfolio_id) + plans_by_property: Dict[int, List[Plan]] = _group_plans_by_property(plans) + + for uprn, property_plans in plans_by_property.items(): + + if not property_plans: + raise ValueError(f"No plans for property {uprn}") - for property_plans in plans_by_property.values(): cheapest_plan = _choose_cheapest_relevant_plan(property_plans) _update_default_flags(property_plans, cheapest_plan) def _load_plans_for_portfolio(portfolio_id: int) -> List[Plan]: plan_models = get_plans_by_portfolio_id(portfolio_id) + print(f"Got {len(plan_models)} plans from database") + plans: List[Plan] = [] for model in plan_models: @@ -33,12 +40,15 @@ def _load_plans_for_portfolio(portfolio_id: int) -> List[Plan]: continue scenario_model = get_scenario(model.scenario_id) - plans.append(Plan.from_sqlalchemy(model, scenario_model)) + plans.append( + Plan.from_sqlalchemy(model, Scenario.from_sqlalchemy(scenario_model)) + ) + print("Successfully mapped plan and scenario to domain object") return plans -def _group_plans_by_property(plans: List[Plan]) -> dict[int, List[Plan]]: +def _group_plans_by_property(plans: List[Plan]) -> Dict[int, List[Plan]]: grouped: dict[int, List[Plan]] = defaultdict(list) for plan in plans: @@ -48,10 +58,18 @@ def _group_plans_by_property(plans: List[Plan]) -> dict[int, List[Plan]]: def _choose_cheapest_relevant_plan(plans: List[Plan]) -> Plan: - compliant_plans = CategorisationLogic.get_compliant_plans(plans) + plans_to_consider: List[Plan] = [p for p in plans if p.is_compliant] or plans - plans_to_consider = compliant_plans or plans - return CategorisationLogic.get_cheapest_plan(plans_to_consider) + def plan_cost(plan: Plan) -> float: + return ( + plan.record.cost_of_works + if plan.record.cost_of_works is not None + else float("inf") + ) + + cheapest_plan = min(plans_to_consider, key=plan_cost) + + return cheapest_plan def _update_default_flags(plans: List[Plan], cheapest_plan: Plan) -> None: @@ -60,6 +78,9 @@ def _update_default_flags(plans: List[Plan], cheapest_plan: Plan) -> None: raise ValueError("Cannot update Plan with missing ID") plan.set_default(plan.id == cheapest_plan.id) + print( + f"Setting plan of id {plan.id}, scenario name {plan.scenario.record.name} to is_default value {plan.id == cheapest_plan.id}" + ) plan_model, scenario_model = cast( Tuple[PlanModel, ScenarioModel], From 0dbc5f985cb80c12b00b6653cb62dfa4e5e95f71 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Fri, 13 Feb 2026 12:37:53 +0000 Subject: [PATCH 143/340] wrong subtask id being sent --- backend/postcode_splitter/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/postcode_splitter/main.py b/backend/postcode_splitter/main.py index 930fac7f..e49a7f0d 100644 --- a/backend/postcode_splitter/main.py +++ b/backend/postcode_splitter/main.py @@ -136,7 +136,7 @@ def create_batch_and_send_to_address2uprn( # Send message with S3 reference send_to_address2uprn_queue( task_id=str(task_id), - sub_task_id=batch_sub_task_id, + sub_task_id=created_batch_sub_task_id, s3_uri=s3_uri, ) From e70a8b3c62c998d7596df2869f8a67ca08570d21 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Fri, 13 Feb 2026 12:40:53 +0000 Subject: [PATCH 144/340] wrong subtask id being sent --- .github/workflows/deploy_terraform.yml | 8 -------- 1 file changed, 8 deletions(-) diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index 6ee9de11..d2fd7b5b 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -205,11 +205,3 @@ jobs: AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} AWS_REGION: ${{ secrets.DEV_AWS_REGION }} - - - - - - - - From 581f0ad49fb8859a7e983e05db6058e31ffb8a79 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Fri, 13 Feb 2026 12:57:36 +0000 Subject: [PATCH 145/340] uudi needs to be str --- backend/postcode_splitter/main.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/backend/postcode_splitter/main.py b/backend/postcode_splitter/main.py index e49a7f0d..b3c78b20 100644 --- a/backend/postcode_splitter/main.py +++ b/backend/postcode_splitter/main.py @@ -5,7 +5,11 @@ import pandas as pd import requests import boto3 from uuid import UUID, uuid4 -from utils.s3 import read_csv_from_s3 as read_csv_from_s3_dict, save_csv_to_s3, parse_s3_uri +from utils.s3 import ( + read_csv_from_s3 as read_csv_from_s3_dict, + save_csv_to_s3, + parse_s3_uri, +) from utils.logger import setup_logger from tqdm import tqdm from backend.app.db.functions.tasks.Tasks import SubTaskInterface @@ -136,7 +140,7 @@ def create_batch_and_send_to_address2uprn( # Send message with S3 reference send_to_address2uprn_queue( task_id=str(task_id), - sub_task_id=created_batch_sub_task_id, + sub_task_id=str(created_batch_sub_task_id), s3_uri=s3_uri, ) From d99ee337670800fc5955331e27d9926afb99efd9 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Fri, 13 Feb 2026 12:57:47 +0000 Subject: [PATCH 146/340] uudi needs to be str --- .github/workflows/_deploy_lambda.yml | 1 + .github/workflows/unit_tests.yml | 46 ++++++++++++++-------------- 2 files changed, 24 insertions(+), 23 deletions(-) diff --git a/.github/workflows/_deploy_lambda.yml b/.github/workflows/_deploy_lambda.yml index 1a690e02..9f8619f9 100644 --- a/.github/workflows/_deploy_lambda.yml +++ b/.github/workflows/_deploy_lambda.yml @@ -115,3 +115,4 @@ jobs: + diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index cc6431b8..5521a481 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -1,30 +1,30 @@ -name: Run unit tests +# name: Run unit tests -on: - pull_request: - branches: - - "**" +# on: +# pull_request: +# branches: +# - "**" -jobs: - test: - runs-on: ubuntu-latest +# jobs: +# test: +# runs-on: ubuntu-latest - steps: - - name: Checkout code - uses: actions/checkout@v4 +# steps: +# - name: Checkout code +# uses: actions/checkout@v4 - - name: Set up Python 3.11 - uses: actions/setup-python@v4 - with: - python-version: '3.11' +# - name: Set up Python 3.11 +# uses: actions/setup-python@v4 +# with: +# python-version: '3.11' - - name: Install tox via Makefile - run: | - make setup +# - name: Install tox via Makefile +# run: | +# make setup - - name: Run tests with tox via Makefile - env: - EPC_AUTH_TOKEN: ${{ secrets.DEV_EPC_AUTH_TOKEN }} - run: | - make test \ No newline at end of file +# - name: Run tests with tox via Makefile +# env: +# EPC_AUTH_TOKEN: ${{ secrets.DEV_EPC_AUTH_TOKEN }} +# run: | +# make test \ No newline at end of file From a4b259959f37d22ac01011db5e8453bb561bb8f3 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Fri, 13 Feb 2026 13:35:05 +0000 Subject: [PATCH 147/340] set defaults --- backend/app/config.py | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/backend/app/config.py b/backend/app/config.py index 41552ae5..feb312b4 100644 --- a/backend/app/config.py +++ b/backend/app/config.py @@ -18,37 +18,37 @@ def resolve_env_file() -> Optional[str]: class Settings(BaseSettings): - API_KEY: str + API_KEY: str = "changeme" API_KEY_NAME: str = "X-API-KEY" - SECRET_KEY: str - ENVIRONMENT: str - DATA_BUCKET: str + SECRET_KEY: str = "changeme" + ENVIRONMENT: str = "changeme" + DATA_BUCKET: str = "changeme" PLAN_TRIGGER_BUCKET: str - ENGINE_SQS_URL: str + ENGINE_SQS_URL: str = "changeme" # Third parties - EPC_AUTH_TOKEN: str - GOOGLE_SOLAR_API_KEY: str + EPC_AUTH_TOKEN: str = "changeme" + GOOGLE_SOLAR_API_KEY: str = "changeme" # Database settings - DB_HOST: str - DB_PASSWORD: str - DB_USERNAME: str - DB_PORT: str - DB_NAME: str + DB_HOST: str = "changeme" + DB_PASSWORD: str = "changeme" + DB_USERNAME: str = "changeme" + DB_PORT: str = "changeme" + DB_NAME: str = "changeme" # Prediction buckets - SAP_PREDICTIONS_BUCKET: str - CARBON_PREDICTIONS_BUCKET: str - HEAT_PREDICTIONS_BUCKET: str + SAP_PREDICTIONS_BUCKET: str = "changeme" + CARBON_PREDICTIONS_BUCKET: str = "changeme" + HEAT_PREDICTIONS_BUCKET: str = "changeme" # LIGHTING_COST_PREDICTIONS_BUCKET: str # HEATING_COST_PREDICTIONS_BUCKET: str # HOT_WATER_COST_PREDICTIONS_BUCKET: str - HEATING_KWH_PREDICTIONS_BUCKET: str - HOTWATER_KWH_PREDICTIONS_BUCKET: str + HEATING_KWH_PREDICTIONS_BUCKET: str = "changeme" + HOTWATER_KWH_PREDICTIONS_BUCKET: str = "changeme" # Other S3 buckts - ENERGY_ASSESSMENTS_BUCKET: str + ENERGY_ASSESSMENTS_BUCKET: str = "changeme" # Optional AWS creds (only required in local) AWS_ACCESS_KEY_ID: Optional[str] = None From 5770e0f066ebf514116f0e6a18d9bca9c5a7ff0f Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Fri, 13 Feb 2026 13:35:27 +0000 Subject: [PATCH 148/340] set defaults --- .github/workflows/_deploy_lambda.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/_deploy_lambda.yml b/.github/workflows/_deploy_lambda.yml index 9f8619f9..528300f8 100644 --- a/.github/workflows/_deploy_lambda.yml +++ b/.github/workflows/_deploy_lambda.yml @@ -113,6 +113,3 @@ jobs: -var="ecr_repo_url=${{ steps.repo.outputs.ecr_repo_url }}" \ -var="image_digest=${{ inputs.image_digest }}" - - - From 16386173af118b3c7f62973d62d699ce2a9f6e43 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Fri, 13 Feb 2026 13:39:38 +0000 Subject: [PATCH 149/340] get update_plan working --- .../db/functions/recommendations_functions.py | 21 +++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py index 28d82416..6816e25b 100644 --- a/backend/app/db/functions/recommendations_functions.py +++ b/backend/app/db/functions/recommendations_functions.py @@ -634,17 +634,26 @@ def get_scenario(scenario_id: int) -> Optional[ScenarioModel]: def update_plan(plan_model: PlanModel, scenario_model: ScenarioModel) -> bool: with db_read_session() as session: - stmt = ( - update(PlanModel) - .where(PlanModel.id == plan_model.id) - .values(**plan_model.model_dump(exclude={"id"}, exclude_unset=True)) + plan_values = { + c.name: getattr(plan_model, c.name) + for c in plan_model.__table__.columns + if c.name != "id" + } + scenario_values = { + c.name: getattr(scenario_model, c.name) + for c in scenario_model.__table__.columns + if c.name not in {"id", "portfolio_id"} + } + + plan_stmt = ( + update(PlanModel).where(PlanModel.id == plan_model.id).values(**plan_values) ) - plan_result = session.exec(stmt) + plan_result = session.exec(plan_stmt) scenario_stmt = ( update(ScenarioModel) .where(ScenarioModel.id == scenario_model.id) - .values(**scenario_model.model_dump(exclude={"id"}, exclude_unset=True)) + .values(**scenario_values) ) session.exec(scenario_stmt) From da79ccf7595927cb105f9b0b2f727c43c8ad563f Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Fri, 13 Feb 2026 14:08:09 +0000 Subject: [PATCH 150/340] just do 5 --- backend/postcode_splitter/main.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/backend/postcode_splitter/main.py b/backend/postcode_splitter/main.py index b3c78b20..1049295b 100644 --- a/backend/postcode_splitter/main.py +++ b/backend/postcode_splitter/main.py @@ -211,7 +211,8 @@ def handler(event, context): csv_data = read_csv_from_s3_dict(bucket, key) df = pd.DataFrame(csv_data) - df = df.head(1983) + # df = df.head(1983) + df = df.head(5) logger.info(f"CSV loaded: {len(df)} rows, {len(df.columns)} columns") From d6ea88adf3860d7715f173820199291bf227e2c6 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Fri, 13 Feb 2026 14:08:38 +0000 Subject: [PATCH 151/340] just do 5 --- .github/workflows/deploy_terraform.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index d2fd7b5b..4dcbf129 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -205,3 +205,4 @@ jobs: AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} AWS_REGION: ${{ secrets.DEV_AWS_REGION }} + From bd9e553e35c562e80007e1c057e6aa245b3a417f Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Fri, 13 Feb 2026 14:50:48 +0000 Subject: [PATCH 152/340] bulk update of plans --- .../db/functions/recommendations_functions.py | 65 ++++++++++++------- backend/categorisation/processor.py | 23 +++---- 2 files changed, 49 insertions(+), 39 deletions(-) diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py index 6816e25b..e690991a 100644 --- a/backend/app/db/functions/recommendations_functions.py +++ b/backend/app/db/functions/recommendations_functions.py @@ -1,6 +1,6 @@ -from typing import Any, List, Optional -from sqlalchemy import text, insert, delete, select, update -from sqlalchemy.orm import Session +from typing import Any, Dict, List, Optional +from sqlalchemy import inspect, text, insert, delete, select, update +from sqlalchemy.orm import Session, Mapper from sqlalchemy.exc import SQLAlchemyError from sqlmodel import Session @@ -632,30 +632,45 @@ def get_scenario(scenario_id: int) -> Optional[ScenarioModel]: return session_any.exec(stmt).scalar_one_or_none() -def update_plan(plan_model: PlanModel, scenario_model: ScenarioModel) -> bool: +def bulk_update_plans( + plan_models: List[PlanModel], + scenario_models: List[ScenarioModel], +) -> int: + if not plan_models: + return 0 + with db_read_session() as session: - plan_values = { - c.name: getattr(plan_model, c.name) - for c in plan_model.__table__.columns - if c.name != "id" - } - scenario_values = { - c.name: getattr(scenario_model, c.name) - for c in scenario_model.__table__.columns - if c.name not in {"id", "portfolio_id"} - } - plan_stmt = ( - update(PlanModel).where(PlanModel.id == plan_model.id).values(**plan_values) - ) - plan_result = session.exec(plan_stmt) + plan_mapper: Mapper[Any] = inspect(PlanModel) + scenario_mapper: Mapper[Any] = inspect(ScenarioModel) - scenario_stmt = ( - update(ScenarioModel) - .where(ScenarioModel.id == scenario_model.id) - .values(**scenario_values) - ) - session.exec(scenario_stmt) + plan_mappings: List[Dict[str, Any]] = ( + [] + ) # Typehint as Any to satisfy Pylance... + for plan in plan_models: + data: Dict[str, Any] = { + c.name: getattr(plan, c.name) + for c in plan.__table__.columns + if c.name != "id" + } + data["id"] = plan.id + plan_mappings.append(data) + + session.bulk_update_mappings(plan_mapper, plan_mappings) + + scenario_mappings: List[Dict[str, Any]] = ( + [] + ) # Typehint as Any to satisfy Pylance... + for scenario in scenario_models: + data: Dict[str, Any] = { + c.name: getattr(scenario, c.name) + for c in scenario.__table__.columns + if c.name not in {"id", "portfolio_id"} + } + data["id"] = scenario.id + scenario_mappings.append(data) + + session.bulk_update_mappings(scenario_mapper, scenario_mappings) session.commit() - return plan_result.rowcount > 0 + return len(plan_models) diff --git a/backend/categorisation/processor.py b/backend/categorisation/processor.py index 704dfc07..445bbbc4 100644 --- a/backend/categorisation/processor.py +++ b/backend/categorisation/processor.py @@ -1,10 +1,10 @@ from collections import defaultdict -from typing import Dict, List, Tuple, cast +from typing import Dict, List from backend.app.db.functions.recommendations_functions import ( + bulk_update_plans, get_plans_by_portfolio_id, get_scenario, - update_plan, ) from backend.app.db.models.recommendations import PlanModel, ScenarioModel from backend.app.domain.classes.plan import Plan @@ -73,18 +73,13 @@ def _choose_cheapest_relevant_plan(plans: List[Plan]) -> Plan: def _update_default_flags(plans: List[Plan], cheapest_plan: Plan) -> None: + plan_models: List[PlanModel] = [] + scenario_models: List[ScenarioModel] = [] + for plan in plans: - if plan.id is None: - raise ValueError("Cannot update Plan with missing ID") - plan.set_default(plan.id == cheapest_plan.id) - print( - f"Setting plan of id {plan.id}, scenario name {plan.scenario.record.name} to is_default value {plan.id == cheapest_plan.id}" - ) + plan_model, scenario_model = plan.to_sqlalchemy() + plan_models.append(plan_model) + scenario_models.append(scenario_model) - plan_model, scenario_model = cast( - Tuple[PlanModel, ScenarioModel], - plan.to_sqlalchemy(), - ) - - update_plan(plan_model, scenario_model) + bulk_update_plans(plan_models, scenario_models) From 64260a4bbbf3dd03562eb3abd59a7339742aad38 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 13 Feb 2026 17:27:41 +0000 Subject: [PATCH 153/340] changing build commange to handle ubuntu upgrade from docker 28 to 29 --- .github/workflows/deploy_fastapi_backend.yml | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.github/workflows/deploy_fastapi_backend.yml b/.github/workflows/deploy_fastapi_backend.yml index 32e30bfa..e4037c19 100644 --- a/.github/workflows/deploy_fastapi_backend.yml +++ b/.github/workflows/deploy_fastapi_backend.yml @@ -87,7 +87,13 @@ jobs: - name: Build Docker Image For Engine run: | - docker build -t fastapi-lambda-image:${{ github.sha }} -f backend/docker/engine.Dockerfile . --load + docker buildx build \ + --platform linux/amd64 \ + --provenance=false \ + --output=type=docker \ + -t fastapi-lambda-image:${{ github.sha }} \ + -f backend/docker/engine.Dockerfile \ + . - name: Login to ECR run: | From 8e574c24014ee15534de3847762e3800690f521f Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Fri, 13 Feb 2026 18:30:47 +0000 Subject: [PATCH 154/340] post code splitter works --- .github/workflows/deploy_terraform.yml | 2 +- backend/address2UPRN/main.py | 31 +-- backend/postcode_splitter/main.py | 361 +++++++++---------------- 3 files changed, 130 insertions(+), 264 deletions(-) diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index 4dcbf129..2fd12fe6 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -77,7 +77,7 @@ jobs: run: terraform plan -var-file=${STAGE}.tfvars -out=tfplan - name: Terraform Apply - # if: env.STAGE == 'prod' + if: env.STAGE == 'prod' working-directory: infrastructure/terraform/shared run: terraform apply -auto-approve tfplan diff --git a/backend/address2UPRN/main.py b/backend/address2UPRN/main.py index f843d28a..7fc11570 100644 --- a/backend/address2UPRN/main.py +++ b/backend/address2UPRN/main.py @@ -544,8 +544,8 @@ def handler(event, context, local=False): "body": json.dumps( { "task_id": "e31f2f21-175b-4a91-a3ec-a6baa325e917", - "sub_task_id": "a1b2c3d4-e5f6-7a8b-9c0d-e1f2a3b4c5d6", - "s3_uri": "", + "sub_task_id": "1c09df07-fd29-4de7-b146-fafb591856a9", + "s3_uri": "s3://retrofit-data-dev/ara_postcode_splitter_batches/e31f2f21-175b-4a91-a3ec-a6baa325e917/8673913b-1a88-42d7-8578-0449123d94b0/2026-02-13T15:54:58.568594_67557923.csv", } ) } @@ -573,14 +573,14 @@ def handler(event, context, local=False): # Validate required fields task_id = body.get("task_id") - sub_task_id = body.get("sub_task_id") + subtask_id = body.get("sub_task_id") s3_uri = body.get("s3_uri") if not task_id: errors.append({"error": "Missing required field: task_id"}) continue - if not sub_task_id: + if not subtask_id: errors.append({"error": "Missing required field: sub_task_id"}) continue @@ -598,7 +598,7 @@ def handler(event, context, local=False): # Convert sub_task_id to UUID try: subtask_id = ( - UUID(sub_task_id) if isinstance(sub_task_id, str) else sub_task_id + UUID(subtask_id) if isinstance(subtask_id, str) else subtask_id ) except ValueError as e: errors.append( @@ -756,16 +756,6 @@ def handler(event, context, local=False): except Exception as s3_error: logger.error(f"Failed to save results to S3: {s3_error}") - results.append( - { - "subtask_id": str(subtask_id), - "postcodes_processed": postcodes_processed, - "addresses_processed": addresses_processed, - "uprns_found": uprns_found, - "status": "processed", - } - ) - # Mark subtask as completed try: subtask_interface.update_subtask_status( @@ -777,17 +767,6 @@ def handler(event, context, local=False): except Exception as db_error: logger.error(f"Failed to mark subtask as completed: {db_error}") - except json.JSONDecodeError as e: - logger.error(f"Invalid JSON in request body: {e}") - errors.append({"error": "Invalid JSON in request body", "details": str(e)}) - # Mark subtask as failed if we have one - if subtask_id: - try: - subtask_interface.update_subtask_status( - subtask_id, "failed", outputs={"error": str(e)} - ) - except Exception as db_error: - logger.error(f"Failed to update subtask status: {db_error}") except Exception as e: logger.error(f"Unexpected error processing record: {e}", exc_info=True) errors.append({"error": "Unexpected error", "details": str(e)}) diff --git a/backend/postcode_splitter/main.py b/backend/postcode_splitter/main.py index 1049295b..6d8d1095 100644 --- a/backend/postcode_splitter/main.py +++ b/backend/postcode_splitter/main.py @@ -101,8 +101,9 @@ def send_to_address2uprn_queue(task_id: str, sub_task_id: str, s3_uri: str) -> s def create_batch_and_send_to_address2uprn( - batch_rows: list, + batch_df: pd.DataFrame, task_id: str, + sub_task_id: str, subtask_interface: SubTaskInterface, bucket_name: str, ) -> str: @@ -118,291 +119,177 @@ def create_batch_and_send_to_address2uprn( Returns: The created batch subtask ID """ - # Generate unique batch subtask ID - batch_sub_task_id = str(uuid4()) - # Upload batch to S3 - batch_df = pd.DataFrame(batch_rows) - s3_uri = upload_batch_to_s3(batch_df, str(task_id), batch_sub_task_id, bucket_name) + + s3_uri = upload_batch_to_s3(batch_df, str(task_id), str(sub_task_id), bucket_name) # Create a new subtask for this batch with all inputs created_batch_sub_task_id = subtask_interface.create_subtask( task_id=task_id, inputs={ "task_id": str(task_id), - "sub_task_id": batch_sub_task_id, - "batch_size": len(batch_rows), "s3_uri": s3_uri, }, ) + logger.info(f"Created batch subtask {created_batch_sub_task_id}") - # Send message with S3 reference - send_to_address2uprn_queue( - task_id=str(task_id), - sub_task_id=str(created_batch_sub_task_id), - s3_uri=s3_uri, - ) + # # Send message with S3 reference + # send_to_address2uprn_queue( + # task_id=str(task_id), + # sub_task_id=str(created_batch_sub_task_id), + # s3_uri=s3_uri, + # ) return created_batch_sub_task_id -def handler(event, context): +def handler(event, context, local=False): print(f"Function: {context.function_name}") print(f"Request ID: {context.aws_request_id}") # Example SQS message for testing (copy and paste into SQS): - # { - # "task_id":"e31f2f21-175b-4a91-a3ec-a6baa325e917", - # "s3_uri":"s3://retrofit-data-dev/ara_raw_inputs/peabody/2025_11_11 - Peabody - Data Extracts for Domna_transformed.csv" - # } - + if local is True: + event = { + "Records": [ + { + "body": json.dumps( + { + "task_id": "e31f2f21-175b-4a91-a3ec-a6baa325e917", + "sub_task_id": "8673913b-1a88-42d7-8578-0449123d94b0", + "s3_uri": "s3://retrofit-data-dev/ara_raw_inputs/peabody/2025_11_11 - Peabody - Data Extracts for Domna_transformed.csv", + } + ) + } + ] + } # Handle both single event and batch events (SQS, etc.) records = event.get("Records", [event]) results = [] errors = [] subtask_interface = SubTaskInterface() bucket_name = os.getenv("S3_BUCKET_NAME") + if local: + bucket_name = "retrofit-data-dev" for record in records: + if local: + record = records[0] task_id = None subtask_id = None - try: - # Parse body (inputs) - if isinstance(record.get("body"), str): - body = json.loads(record["body"]) - else: - body = record.get("body", {}) + # Parse body (inputs) - # Validate required fields - task_id = body.get("task_id") - s3_uri = body.get("s3_uri") + if isinstance(record.get("body"), str): + body = json.loads(record["body"]) + else: + body = record.get("body", {}) - if not task_id: - errors.append({"error": "Missing required field: task_id"}) - continue + # Validate required fields + task_id = body.get("task_id") + subtask_id = body.get("sub_task_id") + s3_uri = body.get("s3_uri") - if not s3_uri: - errors.append({"error": "Missing required field: s3_uri"}) - continue + # Convert task_id to UUID + task_id = UUID(task_id) if isinstance(task_id, str) else task_id + subtask_id = UUID(subtask_id) if isinstance(subtask_id, str) else subtask_id - # Convert task_id to UUID - try: - task_id = UUID(task_id) if isinstance(task_id, str) else task_id - except ValueError as e: - errors.append({"error": f"Invalid UUID format for task_id: {str(e)}"}) - continue + # Mark subtask as in progress + subtask_interface.update_subtask_status(subtask_id, "in progress") + logger.info(f"Marked subtask {subtask_id} as in progress") - # Create a new subtask for this postcode splitter invocation - subtask_id = subtask_interface.create_subtask( - task_id=task_id, inputs={"s3_uri": s3_uri} + # Read CSV from S3 + bucket, key = parse_s3_uri(s3_uri) + logger.info(f"S3 Bucket: {bucket}, Key: {key}") + + csv_data = read_csv_from_s3_dict(bucket, key) + df = pd.DataFrame(csv_data) + + # TODO: Change the input to the file you want + # df = df.head(1983) + df = df.head(502) + + logger.info(f"CSV loaded: {len(df)} rows, {len(df.columns)} columns") + + # Sanitise postcodes + df["postcode_clean"] = df["postcode"].str.upper().str.replace(" ", "") + + df = df.dropna(subset=["postcode_clean"]) + + batch_size = 500 + if df.shape[0] < batch_size: + create_batch_and_send_to_address2uprn( + batch_df=df, + task_id=task_id, + sub_task_id=subtask_id, + subtask_interface=subtask_interface, + bucket_name=bucket_name, ) - logger.info(f"Created subtask {subtask_id} for task {task_id}") - - # Mark subtask as in progress - subtask_interface.update_subtask_status(subtask_id, "in progress") - logger.info(f"Marked subtask {subtask_id} as in progress") - - # Read CSV from S3 - logger.info(f"Processing S3 URI: {s3_uri}") - bucket, key = parse_s3_uri(s3_uri) - logger.info(f"S3 Bucket: {bucket}, Key: {key}") - - csv_data = read_csv_from_s3_dict(bucket, key) - df = pd.DataFrame(csv_data) - - # df = df.head(1983) - df = df.head(5) - - logger.info(f"CSV loaded: {len(df)} rows, {len(df.columns)} columns") - - # Sanitise postcodes - df["postcode_clean"] = df["postcode"].str.upper().str.replace(" ", "") - - clean_df = df.dropna(subset=["postcode_clean"]) - + else: postcode_to_addresses = { - postcode: group.to_dict(orient="records") - for postcode, group in clean_df.groupby("postcode_clean", sort=False) + postcode: group + for postcode, group in df.groupby("postcode_clean", sort=False) } - logger.info(f"Total postcodes: {len(postcode_to_addresses)}") + count = 0 + buffer = [] - # Calculate total rows to send - total_rows = sum(len(rows) for rows in postcode_to_addresses.values()) - logger.info(f"Total rows to send: {total_rows}") + for postcode, group_df in postcode_to_addresses.items(): + group_len = len(group_df) - batch_size = 500 - - # If all rows fit in one batch, just send them all at once - if total_rows <= batch_size: - all_rows = [] - for postcode, rows in postcode_to_addresses.items(): - all_rows.extend(rows) - try: - create_batch_and_send_to_address2uprn( - batch_rows=all_rows, - task_id=task_id, - subtask_interface=subtask_interface, - bucket_name=bucket_name, - ) - logger.info( - f"Sent all {len(all_rows)} rows in single batch to address2UPRN queue" - ) - except Exception as e: - logger.error( - f"Failed to send all rows to address2UPRN queue: {e}", - exc_info=True, - ) - errors.append( - { - "error": "Failed to send to address2UPRN queue", - "details": str(e), - } - ) - else: - # Multi-batch processing for large datasets - batch_rows = [] - total_sent = 0 - - for postcode, rows in postcode_to_addresses.items(): - logger.info(f"Processing postcode {postcode} with {len(rows)} rows") - # If postcode itself is larger than batch_size, send it individually - if len(rows) > batch_size: - # First, send the current batch if it has data - if batch_rows: - try: - create_batch_and_send_to_address2uprn( - batch_rows=batch_rows, - task_id=task_id, - subtask_interface=subtask_interface, - bucket_name=bucket_name, - ) - logger.info( - f"Sent batch of {len(batch_rows)} rows to address2UPRN queue" - ) - batch_rows = [] - except Exception as e: - logger.error( - f"Failed to send batch to address2UPRN queue: {e}", - exc_info=True, - ) - errors.append( - { - "error": "Failed to send to address2UPRN queue", - "details": str(e), - } - ) - - # Send the large postcode on its own - try: - create_batch_and_send_to_address2uprn( - batch_rows=rows, - task_id=task_id, - subtask_interface=subtask_interface, - bucket_name=bucket_name, - ) - logger.info( - f"Sent large postcode {postcode} ({len(rows)} rows) to address2UPRN queue" - ) - except Exception as e: - logger.error( - f"Failed to send large postcode to address2UPRN queue: {e}", - exc_info=True, - ) - errors.append( - { - "error": "Failed to send to address2UPRN queue", - "details": str(e), - } - ) - continue - - # If adding this postcode's rows would exceed batch_size, send current batch - current_batch_size = len(batch_rows) + len(rows) - if batch_rows and current_batch_size > batch_size: - logger.info( - f"Batch threshold reached: current {len(batch_rows)} + next postcode {len(rows)} = {current_batch_size} > {batch_size}" - ) - try: - create_batch_and_send_to_address2uprn( - batch_rows=batch_rows, - task_id=task_id, - subtask_interface=subtask_interface, - bucket_name=bucket_name, - ) - logger.info( - f"Sent batch of {len(batch_rows)} rows to address2UPRN queue (total sent: {total_sent})" - ) - total_sent += len(batch_rows) - batch_rows = [] - except Exception as e: - logger.error( - f"Failed to send batch to address2UPRN queue: {e}", - exc_info=True, - ) - errors.append( - { - "error": "Failed to send to address2UPRN queue", - "details": str(e), - } - ) - - # Add current postcode's rows to batch - batch_rows.extend(rows) - - # Send remaining batch - if batch_rows: - try: + # If single postcode is bigger than batch_size → send directly + if group_len >= batch_size: + if buffer: create_batch_and_send_to_address2uprn( - batch_rows=batch_rows, + batch_df=pd.concat(buffer, ignore_index=True), task_id=task_id, + sub_task_id=subtask_id, subtask_interface=subtask_interface, bucket_name=bucket_name, ) - total_sent += len(batch_rows) - logger.info( - f"Sent final batch of {len(batch_rows)} rows to address2UPRN queue (total sent: {total_sent})" - ) - batch_rows = [] - except Exception as e: - logger.error( - f"Failed to send final batch to address2UPRN queue: {e}", - exc_info=True, - ) - errors.append( - { - "error": "Failed to send to address2UPRN queue", - "details": str(e), - } - ) + buffer = [] + count = 0 - except json.JSONDecodeError as e: - logger.error(f"Invalid JSON in request body: {e}") - errors.append({"error": "Invalid JSON in request body", "details": str(e)}) - # Mark subtask as failed if we have one - if subtask_id: - try: - subtask_interface.update_subtask_status( - subtask_id, "failed", outputs={"error": str(e)} + create_batch_and_send_to_address2uprn( + batch_df=group_df, + task_id=task_id, + sub_task_id=subtask_id, + subtask_interface=subtask_interface, + bucket_name=bucket_name, ) - except Exception as db_error: - logger.error(f"Failed to update subtask status: {db_error}") - except Exception as e: - logger.error(f"Unexpected error processing record: {e}", exc_info=True) - errors.append({"error": "Unexpected error", "details": str(e)}) - # Mark subtask as failed if we have one - if subtask_id: - try: - subtask_interface.update_subtask_status( - subtask_id, "failed", outputs={"error": str(e)} - ) - except Exception as db_error: - logger.error(f"Failed to update subtask status: {db_error}") + continue - # Return error if all records failed - if errors and not results: - return {"statusCode": 500, "body": json.dumps({"errors": errors})} + # If adding would exceed batch → flush first + if count + group_len > batch_size: + create_batch_and_send_to_address2uprn( + batch_df=pd.concat(buffer, ignore_index=True), + task_id=task_id, + sub_task_id=subtask_id, + subtask_interface=subtask_interface, + bucket_name=bucket_name, + ) + buffer = [] + count = 0 + + # Add group + buffer.append(group_df) + count += group_len + + # Final flush + if buffer: + create_batch_and_send_to_address2uprn( + batch_df=pd.concat(buffer, ignore_index=True), + task_id=task_id, + sub_task_id=subtask_id, + subtask_interface=subtask_interface, + bucket_name=bucket_name, + ) + + # Mark subtask as completed + subtask_interface.update_subtask_status( + subtask_id, + "completed", + outputs={"rows_processed": "todo -> show sensible output"}, + ) return { "statusCode": 200, From e6c0feaf1cffa4cfe26ef742382a0cd77f2f3f23 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 16 Feb 2026 09:12:55 +0000 Subject: [PATCH 155/340] remove unused import --- backend/app/domain/classes/plan.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/backend/app/domain/classes/plan.py b/backend/app/domain/classes/plan.py index 4bd8f962..7970abcd 100644 --- a/backend/app/domain/classes/plan.py +++ b/backend/app/domain/classes/plan.py @@ -2,8 +2,6 @@ from __future__ import annotations from dataclasses import replace from typing import Optional -from sqlalchemy import Tuple - from backend.app.db.models.portfolio import PortfolioGoal from backend.app.db.models.recommendations import ( PlanModel, From d1fb1a6d39a9457f3944442b981b77fd4fccc2c0 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 16 Feb 2026 09:45:26 +0000 Subject: [PATCH 156/340] typehint read_io_from_s3 signature to remove pylance problems in calling modules --- utils/s3.py | 119 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 73 insertions(+), 46 deletions(-) diff --git a/utils/s3.py b/utils/s3.py index 2e67d4f0..b243b2ab 100644 --- a/utils/s3.py +++ b/utils/s3.py @@ -17,11 +17,11 @@ def read_from_s3(bucket_name, s3_file_name): :param s3_file_name: The file name to use for the saved data in S3 """ # Initialize a session using Amazon S3 - s3 = boto3.resource('s3') + s3 = boto3.resource("s3") # Get the MessagePack data from S3 obj = s3.Object(bucket_name, s3_file_name) - data = obj.get()['Body'].read() + data = obj.get()["Body"].read() return data @@ -36,7 +36,7 @@ def save_data_to_s3(data, bucket_name, s3_file_name): """ # Ensure you have AWS credentials set up - either via environment variables, AWS CLI, or IAM roles try: - s3 = boto3.client('s3') + s3 = boto3.client("s3") except NoCredentialsError: print("Credentials not available.") return @@ -46,12 +46,12 @@ def save_data_to_s3(data, bucket_name, s3_file_name): try: s3.put_object(Bucket=bucket_name, Key=s3_file_name, Body=data) - print(f'Successfully uploaded data to {bucket_name}/{s3_file_name}') + print(f"Successfully uploaded data to {bucket_name}/{s3_file_name}") except Exception as e: - print(f'Failed to upload data to {bucket_name}/{s3_file_name}: {str(e)}') + print(f"Failed to upload data to {bucket_name}/{s3_file_name}: {str(e)}") -def read_io_from_s3(bucket_name, file_key): +def read_io_from_s3(bucket_name: str, file_key: str) -> BytesIO: """ Read a file from S3 into a BytesIO object. This can be used by other methods to parse the response @@ -61,13 +61,13 @@ def read_io_from_s3(bucket_name, file_key): :param file_key: The file name of the shapefile in S3 :return: Io file to be parsed by another method """ - client = boto3.client('s3') + client = boto3.client("s3") # Get the Parquet file from S3 response = client.get_object(Bucket=bucket_name, Key=file_key) # Read the file into an io object - buffer = BytesIO(response['Body'].read()) + buffer = BytesIO(response["Body"].read()) return buffer @@ -86,7 +86,7 @@ def save_dataframe_to_s3_parquet(df, bucket_name, file_key): df.to_parquet(parquet_buffer) # Create the boto3 client - client = boto3.client('s3') + client = boto3.client("s3") # Upload the Parquet file to S3 client.put_object(Bucket=bucket_name, Key=file_key, Body=parquet_buffer.getvalue()) @@ -102,15 +102,14 @@ def read_dataframe_from_s3_parquet(bucket_name, file_key): """ if bucket_name is None: - raise ValueError("Bucket name is None when trying to read dataframe from parquet") + raise ValueError( + "Bucket name is None when trying to read dataframe from parquet" + ) if not file_key.endswith(".parquet"): raise ValueError("This file doesn't look like a parquet file") - parquet_buffer = read_io_from_s3( - bucket_name=bucket_name, - file_key=file_key - ) + parquet_buffer = read_io_from_s3(bucket_name=bucket_name, file_key=file_key) df = pd.read_parquet(parquet_buffer) @@ -130,7 +129,7 @@ def save_csv_to_s3(dataframe, bucket_name, file_name): bool: True if the file was successfully saved, False otherwise. """ # Initialize S3 client - s3 = boto3.client('s3') + s3 = boto3.client("s3") # Create an in-memory text stream csv_buffer = StringIO() @@ -159,7 +158,7 @@ def save_pickle_to_s3(data, bucket_name, s3_file_name): try: serialized_data = pickle.dumps(data) except Exception as e: - print(f'Failed to serialize data: {str(e)}') + print(f"Failed to serialize data: {str(e)}") return # Use save_data_to_s3 function to upload the serialized data to S3 @@ -175,9 +174,9 @@ def read_pickle_from_s3(bucket_name, s3_file_name): :return: The data read from the pickle file """ try: - s3 = boto3.client('s3') + s3 = boto3.client("s3") s3_response = s3.get_object(Bucket=bucket_name, Key=s3_file_name) - serialized_data = s3_response['Body'].read() + serialized_data = s3_response["Body"].read() except NoCredentialsError: logger.errpr("Credentials not available.") return None @@ -185,20 +184,24 @@ def read_pickle_from_s3(bucket_name, s3_file_name): logger.errpr("Incomplete credentials provided.") return None except Exception as e: - logger.error(f'Failed to download data from {bucket_name}/{s3_file_name}: {str(e)}') + logger.error( + f"Failed to download data from {bucket_name}/{s3_file_name}: {str(e)}" + ) return None # Deserialize data from pickle format try: data = pickle.loads(serialized_data) except Exception as e: - logger.error(f'Failed to deserialize data: {str(e)}') + logger.error(f"Failed to deserialize data: {str(e)}") return None return data -def read_excel_from_s3(bucket_name, file_key, header_row, drop_all_na=True, sheet_name=None): +def read_excel_from_s3( + bucket_name, file_key, header_row, drop_all_na=True, sheet_name=None +): """ Read an Excel file from an S3 bucket and return it as a pandas DataFrame. @@ -222,7 +225,7 @@ def read_excel_from_s3(bucket_name, file_key, header_row, drop_all_na=True, shee # Drop columns where all values are NaN if drop_all_na: - df.dropna(axis=1, how='all', inplace=True) + df.dropna(axis=1, how="all", inplace=True) # Reset index if the first column is just an index or entirely NaN df.reset_index(drop=True, inplace=True) @@ -254,7 +257,7 @@ def save_excel_to_s3(df, bucket_name, file_key): # Initialize a session using boto3 session = boto3.session.Session() - s3 = session.resource('s3') + s3 = session.resource("s3") # Upload the Excel file from the buffer to S3 bucket = s3.Bucket(bucket_name) @@ -264,17 +267,19 @@ def save_excel_to_s3(df, bucket_name, file_key): def read_csv_from_s3(bucket_name, filepath): - logger.info(f"Reading CSV file from S3 bucket '{bucket_name}' with key '{filepath}'") - s3 = boto3.client('s3') + logger.info( + f"Reading CSV file from S3 bucket '{bucket_name}' with key '{filepath}'" + ) + s3 = boto3.client("s3") # Get the object from s3 s3_object = s3.get_object(Bucket=bucket_name, Key=filepath) # Read the CSV body from the s3 object - body = s3_object['Body'].read() + body = s3_object["Body"].read() # Use StringIO to create a file-like object from the string - csv_data = StringIO(body.decode('utf-8')) + csv_data = StringIO(body.decode("utf-8")) # Use csv library to read it into a list of dictionaries reader = csv.DictReader(csv_data) @@ -292,14 +297,16 @@ def list_files_in_s3_folder(bucket_name, folder_name): :return: A list of file keys in the specified S3 folder. """ try: - s3 = boto3.client('s3') + s3 = boto3.client("s3") response = s3.list_objects_v2(Bucket=bucket_name, Prefix=folder_name) - if 'Contents' not in response: - logger.info(f"No files found in folder {folder_name} in bucket {bucket_name}.") + if "Contents" not in response: + logger.info( + f"No files found in folder {folder_name} in bucket {bucket_name}." + ) return [] - file_keys = [content['Key'] for content in response['Contents']] + file_keys = [content["Key"] for content in response["Contents"]] return file_keys except NoCredentialsError: @@ -309,7 +316,9 @@ def list_files_in_s3_folder(bucket_name, folder_name): logger.error("Incomplete credentials provided.") return [] except Exception as e: - logger.error(f'Failed to list files in folder {folder_name} in bucket {bucket_name}: {str(e)}') + logger.error( + f"Failed to list files in folder {folder_name} in bucket {bucket_name}: {str(e)}" + ) return [] @@ -335,22 +344,30 @@ def list_files_and_subfolders_in_s3_folder(bucket_name, folder_name): """ # For this function, folder_name should end with a forward slash - if not folder_name.endswith('/'): - folder_name += '/' + if not folder_name.endswith("/"): + folder_name += "/" try: - s3 = boto3.client('s3') - response = s3.list_objects_v2(Bucket=bucket_name, Prefix=folder_name, Delimiter='/') + s3 = boto3.client("s3") + response = s3.list_objects_v2( + Bucket=bucket_name, Prefix=folder_name, Delimiter="/" + ) items = [] # Add files to the list - if 'Contents' in response: - items.extend([content['Key'] for content in response['Contents'] if content['Key'] != folder_name]) + if "Contents" in response: + items.extend( + [ + content["Key"] + for content in response["Contents"] + if content["Key"] != folder_name + ] + ) # Add immediate subfolders to the list - if 'CommonPrefixes' in response: - items.extend([prefix['Prefix'] for prefix in response['CommonPrefixes']]) + if "CommonPrefixes" in response: + items.extend([prefix["Prefix"] for prefix in response["CommonPrefixes"]]) return items @@ -361,7 +378,9 @@ def list_files_and_subfolders_in_s3_folder(bucket_name, folder_name): logger.error("Incomplete credentials provided.") return [] except Exception as e: - logger.error(f'Failed to list files and subfolders in folder {folder_name} in bucket {bucket_name}: {str(e)}') + logger.error( + f"Failed to list files and subfolders in folder {folder_name} in bucket {bucket_name}: {str(e)}" + ) return [] @@ -374,15 +393,21 @@ def list_xmls_in_s3_folder(bucket_name, folder_name): :return: A list of XML file keys in the specified S3 folder. """ try: - s3 = boto3.client('s3') + s3 = boto3.client("s3") response = s3.list_objects_v2(Bucket=bucket_name, Prefix=folder_name) - if 'Contents' not in response: - logger.info(f"No files found in folder {folder_name} in bucket {bucket_name}.") + if "Contents" not in response: + logger.info( + f"No files found in folder {folder_name} in bucket {bucket_name}." + ) return [] # Filter XML files - xml_files = [content['Key'] for content in response['Contents'] if content['Key'].endswith('.xml')] + xml_files = [ + content["Key"] + for content in response["Contents"] + if content["Key"].endswith(".xml") + ] return xml_files except NoCredentialsError: @@ -392,5 +417,7 @@ def list_xmls_in_s3_folder(bucket_name, folder_name): logger.error("Incomplete credentials provided.") return [] except Exception as e: - logger.error(f'Failed to list XML files in folder {folder_name} in bucket {bucket_name}: {str(e)}') + logger.error( + f"Failed to list XML files in folder {folder_name} in bucket {bucket_name}: {str(e)}" + ) return [] From 53cfd9ee8c1b4cd3d192e48929e9b8591121a57c Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 16 Feb 2026 09:57:00 +0000 Subject: [PATCH 157/340] start setting up lambda deployment code --- backend/categorisation/handler/Dockerfile | 47 +++++++++++++++++++ backend/categorisation/handler/handler.py | 10 ++++ .../categorisation/handler/requirements.txt | 3 ++ 3 files changed, 60 insertions(+) create mode 100644 backend/categorisation/handler/Dockerfile create mode 100644 backend/categorisation/handler/handler.py create mode 100644 backend/categorisation/handler/requirements.txt diff --git a/backend/categorisation/handler/Dockerfile b/backend/categorisation/handler/Dockerfile new file mode 100644 index 00000000..46c8d477 --- /dev/null +++ b/backend/categorisation/handler/Dockerfile @@ -0,0 +1,47 @@ +FROM public.ecr.aws/lambda/python:3.11 +# For local running: +# FROM python:3.11.10-bullseye + +ARG DEV_DB_HOST +ARG DEV_DB_PORT +ARG DEV_DB_NAME + + +# Set working directory (Lambda task root) +WORKDIR /var/task + +# Environment +ENV DB_HOST=${DEV_DB_HOST} +ENV DB_PORT=${DEV_DB_PORT} +ENV DB_NAME=${DEV_DB_NAME} + +COPY backend/.env.test backend/.env + +# ----------------------------- +# Copy requirements FIRST (for Docker layer caching) +# ----------------------------- +COPY backend/categorisation/handler/requirements.txt . + +# Install dependencies into Lambda runtime +RUN pip install --no-cache-dir -r requirements.txt + +# ----------------------------- +# Copy application code +# ----------------------------- +COPY utils/ utils/ +COPY backend/categorisation/ backend/categorisation/ + +COPY backend/app/db/connection.py backend/app/db/connection.py +COPY backend/app/config.py backend/app/config.py + +COPY backend/__init__.py backend/__init__.py +COPY backend/app/__init__.py backend/app/__init__.py +COPY backend/app/db/__init__.py backend/app/db/__init__.py + + +# ----------------------------- +# Lambda handler +# ----------------------------- +CMD ["backend/categorisation/handler/handler.handler"] +# For local running +# CMD ["python", "-m", "backend.categorisation.handler.handler"] diff --git a/backend/categorisation/handler/handler.py b/backend/categorisation/handler/handler.py new file mode 100644 index 00000000..e74bfeb5 --- /dev/null +++ b/backend/categorisation/handler/handler.py @@ -0,0 +1,10 @@ +from typing import Any, Mapping +from utils.logger import setup_logger + + +logger = setup_logger() + + +def handler(event: Mapping[str, Any], context: Any) -> None: + + pass diff --git a/backend/categorisation/handler/requirements.txt b/backend/categorisation/handler/requirements.txt new file mode 100644 index 00000000..48e5b561 --- /dev/null +++ b/backend/categorisation/handler/requirements.txt @@ -0,0 +1,3 @@ +sqlmodel +pydantic-settings +psycopg2-binary==2.9.10 \ No newline at end of file From 3349edda897dc21dc5d5b6b04cefb39223c75dbd Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 16 Feb 2026 10:03:07 +0000 Subject: [PATCH 158/340] initial definition of trigger request object --- backend/categorisation/categorisation_trigger_request.py | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 backend/categorisation/categorisation_trigger_request.py diff --git a/backend/categorisation/categorisation_trigger_request.py b/backend/categorisation/categorisation_trigger_request.py new file mode 100644 index 00000000..9ef1d106 --- /dev/null +++ b/backend/categorisation/categorisation_trigger_request.py @@ -0,0 +1,5 @@ +from pydantic import BaseModel + + +class CategorisationTriggerRequest(BaseModel): + portfolio_id: int From b99fb686ddff9aa530c9f70c757e4e6a84721448 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 16 Feb 2026 11:59:03 +0000 Subject: [PATCH 159/340] only write to db if is_default value has changed --- backend/categorisation/processor.py | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/backend/categorisation/processor.py b/backend/categorisation/processor.py index 445bbbc4..68e8c991 100644 --- a/backend/categorisation/processor.py +++ b/backend/categorisation/processor.py @@ -72,14 +72,22 @@ def _choose_cheapest_relevant_plan(plans: List[Plan]) -> Plan: return cheapest_plan -def _update_default_flags(plans: List[Plan], cheapest_plan: Plan) -> None: - plan_models: List[PlanModel] = [] - scenario_models: List[ScenarioModel] = [] +def _update_default_flags(plans: List["Plan"], cheapest_plan: Plan) -> None: + plans_to_update: List[Plan] = [] for plan in plans: - plan.set_default(plan.id == cheapest_plan.id) - plan_model, scenario_model = plan.to_sqlalchemy() - plan_models.append(plan_model) - scenario_models.append(scenario_model) + should_be_default: bool = plan.id == cheapest_plan.id + if plan.record.is_default != should_be_default: + plan.set_default(should_be_default) + plans_to_update.append(plan) - bulk_update_plans(plan_models, scenario_models) + if plans_to_update: + plan_models: List[PlanModel] = [] + scenario_models: List[ScenarioModel] = [] + + for plan in plans_to_update: + plan_model, scenario_model = plan.to_sqlalchemy() + plan_models.append(plan_model) + scenario_models.append(scenario_model) + + bulk_update_plans(plan_models, scenario_models) From 68c3a20d0afd612ecc1acaf3987055502e78784b Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 16 Feb 2026 12:04:49 +0000 Subject: [PATCH 160/340] typehint correction --- backend/categorisation/processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/categorisation/processor.py b/backend/categorisation/processor.py index 68e8c991..7c5698b7 100644 --- a/backend/categorisation/processor.py +++ b/backend/categorisation/processor.py @@ -72,7 +72,7 @@ def _choose_cheapest_relevant_plan(plans: List[Plan]) -> Plan: return cheapest_plan -def _update_default_flags(plans: List["Plan"], cheapest_plan: Plan) -> None: +def _update_default_flags(plans: List[Plan], cheapest_plan: Plan) -> None: plans_to_update: List[Plan] = [] for plan in plans: From c1f784b87fd90e09a5af74ab1189d9f04e017f33 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 16 Feb 2026 12:13:16 +0000 Subject: [PATCH 161/340] address 2uprn and postcode splitter works locally --- backend/address2UPRN/main.py | 6 ++++-- backend/postcode_splitter/main.py | 6 +----- 2 files changed, 5 insertions(+), 7 deletions(-) diff --git a/backend/address2UPRN/main.py b/backend/address2UPRN/main.py index 7fc11570..c51171e5 100644 --- a/backend/address2UPRN/main.py +++ b/backend/address2UPRN/main.py @@ -504,6 +504,8 @@ def save_results_to_s3( """ if bucket_name is None: bucket_name = os.getenv("S3_BUCKET_NAME") + if bucket_name is None: + bucket_name = "retrofit-data-dev" if not bucket_name: logger.error( @@ -544,8 +546,8 @@ def handler(event, context, local=False): "body": json.dumps( { "task_id": "e31f2f21-175b-4a91-a3ec-a6baa325e917", - "sub_task_id": "1c09df07-fd29-4de7-b146-fafb591856a9", - "s3_uri": "s3://retrofit-data-dev/ara_postcode_splitter_batches/e31f2f21-175b-4a91-a3ec-a6baa325e917/8673913b-1a88-42d7-8578-0449123d94b0/2026-02-13T15:54:58.568594_67557923.csv", + "sub_task_id": "6a427b6e-1ece-4983-b1e5-9bffccc53d1d", + "s3_uri": "s3://retrofit-data-dev/ara_postcode_splitter_batches/e31f2f21-175b-4a91-a3ec-a6baa325e917/8673913b-1a88-42d7-8578-0449123d94b0/2026-02-16T12:00:20.257856_7b520c0e.csv", } ) } diff --git a/backend/postcode_splitter/main.py b/backend/postcode_splitter/main.py index 6d8d1095..6cc40fc4 100644 --- a/backend/postcode_splitter/main.py +++ b/backend/postcode_splitter/main.py @@ -204,10 +204,6 @@ def handler(event, context, local=False): csv_data = read_csv_from_s3_dict(bucket, key) df = pd.DataFrame(csv_data) - # TODO: Change the input to the file you want - # df = df.head(1983) - df = df.head(502) - logger.info(f"CSV loaded: {len(df)} rows, {len(df.columns)} columns") # Sanitise postcodes @@ -288,7 +284,7 @@ def handler(event, context, local=False): subtask_interface.update_subtask_status( subtask_id, "completed", - outputs={"rows_processed": "todo -> show sensible output"}, + outputs={"rows_processed": "completed"}, ) return { From a6c827c47fb298b31cb4e7c0a1d033033f84ecfa Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 16 Feb 2026 12:30:57 +0000 Subject: [PATCH 162/340] terraform apply --- .github/workflows/deploy_terraform.yml | 6 ++-- .github/workflows/unit_tests.yml | 46 +++++++++++++------------- 2 files changed, 25 insertions(+), 27 deletions(-) diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index 2fd12fe6..e7c8fb94 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -117,8 +117,7 @@ jobs: stage: ${{ needs.determine_stage.outputs.stage }} ecr_repo: address2uprn-${{ needs.determine_stage.outputs.stage }} image_digest: ${{ needs.address2uprn_image.outputs.image_digest }} - # terraform_apply: ${{ needs.determine_stage.outputs.terraform_apply }} - terraform_apply: 'true' + terraform_apply: ${{ needs.determine_stage.outputs.terraform_apply }} secrets: AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} @@ -159,8 +158,7 @@ jobs: stage: ${{ needs.determine_stage.outputs.stage }} ecr_repo: postcode_splitter-${{ needs.determine_stage.outputs.stage }} image_digest: ${{ needs.postcodeSplitter_image.outputs.image_digest }} - # terraform_apply: ${{ needs.determine_stage.outputs.terraform_apply }} - terraform_apply: 'true' + terraform_apply: ${{ needs.determine_stage.outputs.terraform_apply }} secrets: AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index 5521a481..cc6431b8 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -1,30 +1,30 @@ -# name: Run unit tests +name: Run unit tests -# on: -# pull_request: -# branches: -# - "**" +on: + pull_request: + branches: + - "**" -# jobs: -# test: -# runs-on: ubuntu-latest +jobs: + test: + runs-on: ubuntu-latest -# steps: -# - name: Checkout code -# uses: actions/checkout@v4 + steps: + - name: Checkout code + uses: actions/checkout@v4 -# - name: Set up Python 3.11 -# uses: actions/setup-python@v4 -# with: -# python-version: '3.11' + - name: Set up Python 3.11 + uses: actions/setup-python@v4 + with: + python-version: '3.11' -# - name: Install tox via Makefile -# run: | -# make setup + - name: Install tox via Makefile + run: | + make setup -# - name: Run tests with tox via Makefile -# env: -# EPC_AUTH_TOKEN: ${{ secrets.DEV_EPC_AUTH_TOKEN }} -# run: | -# make test \ No newline at end of file + - name: Run tests with tox via Makefile + env: + EPC_AUTH_TOKEN: ${{ secrets.DEV_EPC_AUTH_TOKEN }} + run: | + make test \ No newline at end of file From dbba066ba57e6026a86c645d2daf0077d74e64f2 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 16 Feb 2026 12:51:56 +0000 Subject: [PATCH 163/340] remove docker as i don't need locally working workflows anymore --- .devcontainer/backend/Dockerfile | 2 -- 1 file changed, 2 deletions(-) diff --git a/.devcontainer/backend/Dockerfile b/.devcontainer/backend/Dockerfile index f48fb99f..99cd66d6 100644 --- a/.devcontainer/backend/Dockerfile +++ b/.devcontainer/backend/Dockerfile @@ -3,8 +3,6 @@ FROM python:3.11.10-bullseye ARG USER=vscode ARG DEBIAN_FRONTEND=noninteractive -ARG DOCKER_GID=1003 - # 1) Toolchain + utilities for building libpostal RUN apt-get update && apt-get install -y --no-install-recommends \ From 62a8f543f60f4548f2376886337d1a46053947e5 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 16 Feb 2026 13:04:27 +0000 Subject: [PATCH 164/340] get rid of comments --- backend/address2UPRN/main.py | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/backend/address2UPRN/main.py b/backend/address2UPRN/main.py index c51171e5..6ca2fd5c 100644 --- a/backend/address2UPRN/main.py +++ b/backend/address2UPRN/main.py @@ -3,7 +3,6 @@ import os from urllib.parse import urlencode import pandas as pd from difflib import SequenceMatcher -from tqdm import tqdm from utils.logger import setup_logger import re from typing import Set @@ -334,22 +333,10 @@ def get_uprn_candidates( def get_uprn_with_epc_df( user_inputed_address: str, epc_df: pd.DataFrame, - verbose=False, ): """ Return uprn (str) using a pre-fetched EPC dataframe. This avoids calling the API multiple times for the same postcode. - - Args: - user_inputed_address: The user's address string - epc_df: Pre-fetched EPC data for the postcode - return_address: Whether to return the matched address - return_EPC: Whether to return the EPC rating - return_score: Whether to return the lexiscore - - Returns: - uprn (str), or tuple if return_address/return_EPC/return_score are True - Returns None if no match found, lexiscore < 0.7, or UPRN is empty """ if epc_df.empty: return None From ed8d5629170ab328c7bed6d5b249916a839e91db Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 16 Feb 2026 13:49:49 +0000 Subject: [PATCH 165/340] added logger and verbose --- backend/address2UPRN/main.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/backend/address2UPRN/main.py b/backend/address2UPRN/main.py index 6ca2fd5c..73fe7c7d 100644 --- a/backend/address2UPRN/main.py +++ b/backend/address2UPRN/main.py @@ -333,6 +333,7 @@ def get_uprn_candidates( def get_uprn_with_epc_df( user_inputed_address: str, epc_df: pd.DataFrame, + verbose: bool = False, ): """ Return uprn (str) using a pre-fetched EPC dataframe. @@ -363,7 +364,7 @@ def get_uprn_with_epc_df( address = top_rank_df["address"].values[0] score = float(top_rank_df["lexiscore"].values[0]) - # logger.info(f"Address found to be: {address}, with lexiscore {lexiscore}") + logger.info(f"Address found to be: {address}, with lexiscore {score}") # Safe to return the agreed UPRN found_uprn = top_rank_df.iloc[0]["uprn"] @@ -379,7 +380,7 @@ def get_uprn_with_epc_df( def get_uprn( user_inputed_address: str, postcode: str, - verbose=False, + verbose: bool = False, ): """ Return uprn (str) From 61377497ff5405a7af0cd1414e5a8c71eb32dadc Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 16 Feb 2026 14:07:23 +0000 Subject: [PATCH 166/340] get rid of unneccsary variable declartion --- backend/address2UPRN/main.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/backend/address2UPRN/main.py b/backend/address2UPRN/main.py index 73fe7c7d..a067593e 100644 --- a/backend/address2UPRN/main.py +++ b/backend/address2UPRN/main.py @@ -646,9 +646,7 @@ def handler(event, context, local=False): logger.info(f"Total postcodes: {len(postcode_to_addresses)}") # Process each postcode group - postcodes_processed = 0 - addresses_processed = 0 - uprns_found = 0 + results_data = [] for postcode, postcode_rows in postcode_to_addresses.items(): @@ -691,7 +689,6 @@ def handler(event, context, local=False): # Parse result tuple if successful if result: uprn, found_address, score = result - uprns_found += 1 logger.info( f"Found UPRN for {user_input} in {postcode}: {uprn} (score: {score})" ) @@ -717,8 +714,6 @@ def handler(event, context, local=False): } ) - addresses_processed += 1 - except Exception as e: logger.error( f"Error processing address {row.get('user_input', 'unknown')}: {e}" @@ -735,8 +730,6 @@ def handler(event, context, local=False): ) continue - postcodes_processed += 1 - # Create results DataFrame result_df = pd.DataFrame(results_data) From 4ca538ecb2efe27128ac2460966ff962bedd950c Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 16 Feb 2026 14:12:09 +0000 Subject: [PATCH 167/340] added commnets on script --- backend/address2UPRN/script.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/backend/address2UPRN/script.py b/backend/address2UPRN/script.py index 59855dbc..090ac5ae 100644 --- a/backend/address2UPRN/script.py +++ b/backend/address2UPRN/script.py @@ -1,3 +1,5 @@ +# one time script for a customer forhousing + import pandas as pd from tqdm import tqdm from backend.address2UPRN.main import get_uprn From 0a87ba786c61a089fba8f22533727813128960f8 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 16 Feb 2026 14:14:01 +0000 Subject: [PATCH 168/340] local run stuff --- backend/address2UPRN/main.py | 2 -- backend/postcode_splitter/main.py | 9 --------- 2 files changed, 11 deletions(-) diff --git a/backend/address2UPRN/main.py b/backend/address2UPRN/main.py index a067593e..af29a095 100644 --- a/backend/address2UPRN/main.py +++ b/backend/address2UPRN/main.py @@ -492,8 +492,6 @@ def save_results_to_s3( """ if bucket_name is None: bucket_name = os.getenv("S3_BUCKET_NAME") - if bucket_name is None: - bucket_name = "retrofit-data-dev" if not bucket_name: logger.error( diff --git a/backend/postcode_splitter/main.py b/backend/postcode_splitter/main.py index 6cc40fc4..70ecf5f1 100644 --- a/backend/postcode_splitter/main.py +++ b/backend/postcode_splitter/main.py @@ -23,15 +23,6 @@ def upload_batch_to_s3( ) -> str: """ Upload batch DataFrame to S3 as CSV. - - Args: - batch_df: The DataFrame containing batch data - task_id: The parent task ID (used for file path) - sub_task_id: The subtask ID (used for file path) - bucket_name: The S3 bucket name (defaults to env variable) - - Returns: - S3 URI (s3://bucket/key) of the uploaded file """ if bucket_name is None: bucket_name = os.getenv("S3_BUCKET_NAME") From 12b99669822b72f54a09901c804372044255ffce Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 16 Feb 2026 14:16:57 +0000 Subject: [PATCH 169/340] send message to address2uprn --- backend/postcode_splitter/main.py | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) diff --git a/backend/postcode_splitter/main.py b/backend/postcode_splitter/main.py index 70ecf5f1..4f63ed4b 100644 --- a/backend/postcode_splitter/main.py +++ b/backend/postcode_splitter/main.py @@ -101,14 +101,6 @@ def create_batch_and_send_to_address2uprn( """ Create a batch DataFrame, upload to S3, create subtask, and send to address2UPRN queue. - Args: - batch_rows: List of row dictionaries for this batch - task_id: The parent task ID - subtask_interface: SubTaskInterface instance - bucket_name: S3 bucket name - - Returns: - The created batch subtask ID """ # Upload batch to S3 @@ -125,12 +117,12 @@ def create_batch_and_send_to_address2uprn( logger.info(f"Created batch subtask {created_batch_sub_task_id}") - # # Send message with S3 reference - # send_to_address2uprn_queue( - # task_id=str(task_id), - # sub_task_id=str(created_batch_sub_task_id), - # s3_uri=s3_uri, - # ) + # Send message with S3 reference + send_to_address2uprn_queue( + task_id=str(task_id), + sub_task_id=str(created_batch_sub_task_id), + s3_uri=s3_uri, + ) return created_batch_sub_task_id From 9f6d61b178d6ef6c8e6902d0dc4032117c94a818 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 16 Feb 2026 14:21:44 +0000 Subject: [PATCH 170/340] get rid of todo --- infrastructure/terraform/lambda/address2UPRN/main.tf | 2 +- infrastructure/terraform/lambda/postcodeSplitter/main.tf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/infrastructure/terraform/lambda/address2UPRN/main.tf b/infrastructure/terraform/lambda/address2UPRN/main.tf index 5f0c4a11..5a36153e 100644 --- a/infrastructure/terraform/lambda/address2UPRN/main.tf +++ b/infrastructure/terraform/lambda/address2UPRN/main.tf @@ -2,7 +2,7 @@ data "terraform_remote_state" "shared" { backend = "s3" config = { bucket = "assessment-model-terraform-state" - key = "env:/${var.stage}/terraform.tfstate" # TODO: dont hardcode this + key = "env:/${var.stage}/terraform.tfstate" region = "eu-west-2" } } diff --git a/infrastructure/terraform/lambda/postcodeSplitter/main.tf b/infrastructure/terraform/lambda/postcodeSplitter/main.tf index e17d272d..d37a01c9 100644 --- a/infrastructure/terraform/lambda/postcodeSplitter/main.tf +++ b/infrastructure/terraform/lambda/postcodeSplitter/main.tf @@ -2,7 +2,7 @@ data "terraform_remote_state" "shared" { backend = "s3" config = { bucket = "assessment-model-terraform-state" - key = "env:/${var.stage}/terraform.tfstate" # TODO: dont hardcode this + key = "env:/${var.stage}/terraform.tfstate" region = "eu-west-2" } } From 8cf2d9d95a56854d2d349a9fea195d6b74a838bc Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 16 Feb 2026 14:49:09 +0000 Subject: [PATCH 171/340] pull infrastructure changes from Jun-te's branch --- .../terraform/lambda/_template/main.tf | 49 +++++++++++ .../terraform/lambda/address2UPRN/main.tf | 49 +++++++++-- .../terraform/lambda/address2UPRN/outputs.tf | 14 ++++ .../terraform/lambda/condition-etl/main.tf | 1 - .../lambda/modules/lambda_with_sqs/outputs.tf | 1 + .../terraform/lambda/postcodeSplitter/main.tf | 83 ++++++++++++++++++- .../lambda/postcodeSplitter/variables.tf | 9 ++ .../modules/general_iam_policy/main.tf | 21 +++++ .../modules/general_iam_policy/outputs.tf | 9 ++ .../modules/general_iam_policy/variables.tf | 32 +++++++ .../modules/lambda_execution_role/main.tf | 16 ---- .../terraform/modules/s3_iam_policy/main.tf | 31 +++++++ .../modules/s3_iam_policy/outputs.tf | 14 ++++ .../modules/s3_iam_policy/variables.tf | 42 ++++++++++ infrastructure/terraform/shared/main.tf | 77 +++++++++++------ 15 files changed, 398 insertions(+), 50 deletions(-) create mode 100644 infrastructure/terraform/lambda/address2UPRN/outputs.tf create mode 100644 infrastructure/terraform/modules/general_iam_policy/main.tf create mode 100644 infrastructure/terraform/modules/general_iam_policy/outputs.tf create mode 100644 infrastructure/terraform/modules/general_iam_policy/variables.tf create mode 100644 infrastructure/terraform/modules/s3_iam_policy/main.tf create mode 100644 infrastructure/terraform/modules/s3_iam_policy/outputs.tf create mode 100644 infrastructure/terraform/modules/s3_iam_policy/variables.tf diff --git a/infrastructure/terraform/lambda/_template/main.tf b/infrastructure/terraform/lambda/_template/main.tf index 3010aa8a..7f60d684 100644 --- a/infrastructure/terraform/lambda/_template/main.tf +++ b/infrastructure/terraform/lambda/_template/main.tf @@ -1,3 +1,30 @@ +# ============================================================================== +# TEMPLATE: Lambda Configuration with Optional S3 IAM Policy +# ============================================================================== +# Instructions: +# 1. Replace "REPLACE ME" with your lambda name (e.g., "my-lambda-name") +# 2. Add any additional environment variables as needed +# 3. To attach S3 IAM policies from shared state: +# - Uncomment the S3 policy attachment section below +# - Update the policy_arn to match the output from shared/main.tf +# - Available shared outputs (examples): +# - data.terraform_remote_state.shared.outputs.condition_etl_s3_read_arn +# - data.terraform_remote_state.shared.outputs.postcode_splitter_s3_read_arn +# 4. To create a NEW S3 policy: +# - Add a new module "lambda_s3_policy" in shared/main.tf using the +# s3_iam_policy module (see examples in shared/main.tf) +# - Then reference it here using data.terraform_remote_state.shared.outputs +# ============================================================================== + +data "terraform_remote_state" "shared" { + backend = "s3" + config = { + bucket = "assessment-model-terraform-state" + key = "env:/${var.stage}/terraform.tfstate" + region = "eu-west-2" + } +} + module "lambda" { source = "../modules/lambda_with_sqs" @@ -12,3 +39,25 @@ module "lambda" { LOG_LEVEL = "info" } } + +# ====================================================================== +# OPTIONAL: Attach S3 IAM policy to Lambda execution role +# ====================================================================== +# Uncomment and configure the resource below to attach S3 permissions +# +# Example 1: Attach existing policy from shared state +# resource "aws_iam_role_policy_attachment" "lambda_s3_policy" { +# role = module.lambda.role_name +# policy_arn = data.terraform_remote_state.shared.outputs.YOUR_POLICY_OUTPUT_NAME_arn +# } +# +# Example 2: Attach multiple policies +# resource "aws_iam_role_policy_attachment" "lambda_read_policy" { +# role = module.lambda.role_name +# policy_arn = data.terraform_remote_state.shared.outputs.postcode_splitter_s3_read_arn +# } +# +# resource "aws_iam_role_policy_attachment" "lambda_write_policy" { +# role = module.lambda.role_name +# policy_arn = data.terraform_remote_state.shared.outputs.another_policy_arn +# } diff --git a/infrastructure/terraform/lambda/address2UPRN/main.tf b/infrastructure/terraform/lambda/address2UPRN/main.tf index 46b193f2..5a36153e 100644 --- a/infrastructure/terraform/lambda/address2UPRN/main.tf +++ b/infrastructure/terraform/lambda/address2UPRN/main.tf @@ -1,3 +1,19 @@ +data "terraform_remote_state" "shared" { + backend = "s3" + config = { + bucket = "assessment-model-terraform-state" + key = "env:/${var.stage}/terraform.tfstate" + region = "eu-west-2" + } +} +data "aws_secretsmanager_secret_version" "db_credentials" { + secret_id = "${var.stage}/assessment_model/db_credentials" +} + +locals { + db_credentials = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string) +} + module "address2uprn" { source = "../modules/lambda_with_sqs" @@ -6,9 +22,32 @@ module "address2uprn" { image_uri = local.image_uri - - environment = { - STAGE = var.stage - LOG_LEVEL = "info" - } + environment = merge( + { + STAGE = var.stage + LOG_LEVEL = "info" + DB_USERNAME = local.db_credentials.db_assessment_model_username + DB_PASSWORD = local.db_credentials.db_assessment_model_password + GOOGLE_SOLAR_API_KEY = "test" + SAP_PREDICTIONS_BUCKET = "test" + CARBON_PREDICTIONS_BUCKET = "test" + HEAT_PREDICTIONS_BUCKET = "test" + HEATING_KWH_PREDICTIONS_BUCKET = "test" + HOTWATER_KWH_PREDICTIONS_BUCKET = "test" + API_KEY = "test" + ENVIRONMENT = "test" + SECRET_KEY = "test" + PLAN_TRIGGER_BUCKET = "test" + DATA_BUCKET = "test" + ENGINE_SQS_URL = "test" + ENERGY_ASSESSMENTS_BUCKET = "test" + S3_BUCKET_NAME = data.terraform_remote_state.shared.outputs.retrofit_sap_data_bucket_name + }, + ) } + +# Attach S3 read policy to the Lambda execution role +resource "aws_iam_role_policy_attachment" "address2uprn_read_and_write" { + role = module.address2uprn.role_name + policy_arn = data.terraform_remote_state.shared.outputs.address_2_uprn_s3_read_and_write_arn +} \ No newline at end of file diff --git a/infrastructure/terraform/lambda/address2UPRN/outputs.tf b/infrastructure/terraform/lambda/address2UPRN/outputs.tf new file mode 100644 index 00000000..e4645a0a --- /dev/null +++ b/infrastructure/terraform/lambda/address2UPRN/outputs.tf @@ -0,0 +1,14 @@ +output "address2uprn_queue_url" { + value = module.address2uprn.queue_url + description = "URL of the address2UPRN SQS queue" +} + +output "address2uprn_queue_arn" { + value = module.address2uprn.queue_arn + description = "ARN of the address2UPRN SQS queue" +} + +output "address2uprn_lambda_arn" { + value = module.address2uprn.lambda_arn + description = "ARN of the address2UPRN Lambda function" +} diff --git a/infrastructure/terraform/lambda/condition-etl/main.tf b/infrastructure/terraform/lambda/condition-etl/main.tf index 4219f209..0128f975 100644 --- a/infrastructure/terraform/lambda/condition-etl/main.tf +++ b/infrastructure/terraform/lambda/condition-etl/main.tf @@ -23,7 +23,6 @@ module "lambda" { stage = var.stage image_uri = local.image_uri - timeout = 180 environment = merge( diff --git a/infrastructure/terraform/lambda/modules/lambda_with_sqs/outputs.tf b/infrastructure/terraform/lambda/modules/lambda_with_sqs/outputs.tf index afc9246d..b408593f 100644 --- a/infrastructure/terraform/lambda/modules/lambda_with_sqs/outputs.tf +++ b/infrastructure/terraform/lambda/modules/lambda_with_sqs/outputs.tf @@ -9,3 +9,4 @@ output "queue_arn" { output "queue_url" { value = module.queue.queue_url } + diff --git a/infrastructure/terraform/lambda/postcodeSplitter/main.tf b/infrastructure/terraform/lambda/postcodeSplitter/main.tf index ebbdbfdc..d37a01c9 100644 --- a/infrastructure/terraform/lambda/postcodeSplitter/main.tf +++ b/infrastructure/terraform/lambda/postcodeSplitter/main.tf @@ -1,3 +1,30 @@ +data "terraform_remote_state" "shared" { + backend = "s3" + config = { + bucket = "assessment-model-terraform-state" + key = "env:/${var.stage}/terraform.tfstate" + region = "eu-west-2" + } +} +data "aws_secretsmanager_secret_version" "db_credentials" { + secret_id = "${var.stage}/assessment_model/db_credentials" +} + + +locals { + db_credentials = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string) +} + +# Reference the existing address2UPRN Lambda outputs from address2uprn state +data "terraform_remote_state" "address2uprn" { + backend = "s3" + config = { + bucket = "address2uprn-terraform-state" + key = "env:/${var.stage}/terraform.tfstate" + region = "eu-west-2" + } +} + module "lambda" { source = "../modules/lambda_with_sqs" @@ -7,8 +34,56 @@ module "lambda" { image_uri = local.image_uri - environment = { - STAGE = var.stage - LOG_LEVEL = "info" - } + environment = merge( + { + STAGE = var.stage + LOG_LEVEL = "info" + DB_USERNAME = local.db_credentials.db_assessment_model_username + DB_PASSWORD = local.db_credentials.db_assessment_model_password + GOOGLE_SOLAR_API_KEY = "test" + SAP_PREDICTIONS_BUCKET = "test" + CARBON_PREDICTIONS_BUCKET = "test" + HEAT_PREDICTIONS_BUCKET = "test" + HEATING_KWH_PREDICTIONS_BUCKET = "test" + HOTWATER_KWH_PREDICTIONS_BUCKET = "test" + API_KEY = "test" + ENVIRONMENT = "test" + SECRET_KEY = "test" + PLAN_TRIGGER_BUCKET = "test" + DATA_BUCKET = "test" + EPC_AUTH_TOKEN = "test" + ENGINE_SQS_URL = "test" + ENERGY_ASSESSMENTS_BUCKET = "test" + ADDRESS2UPRN_QUEUE_URL = data.terraform_remote_state.address2uprn.outputs.address2uprn_queue_url + S3_BUCKET_NAME = data.terraform_remote_state.shared.outputs.retrofit_sap_data_bucket_name + }, + ) } + +# Attach S3 read policy to the Lambda execution role +resource "aws_iam_role_policy_attachment" "postcode_splitter_s3_read" { + role = module.lambda.role_name + policy_arn = data.terraform_remote_state.shared.outputs.postcode_splitter_s3_read_arn +} + +# Create SQS send policy for address2UPRN queue +module "postcode_splitter_sqs_policy" { + source = "../../modules/general_iam_policy" + + policy_name = "postcode-splitter-sqs-send-${var.stage}" + policy_description = "Allow postcode-splitter Lambda to send messages to address2UPRN queue" + + actions = [ + "sqs:SendMessage" + ] + + resources = [ + data.terraform_remote_state.address2uprn.outputs.address2uprn_queue_arn + ] +} + +# Attach SQS policy to the Lambda execution role +resource "aws_iam_role_policy_attachment" "postcode_splitter_sqs_send" { + role = module.lambda.role_name + policy_arn = module.postcode_splitter_sqs_policy.policy_arn +} \ No newline at end of file diff --git a/infrastructure/terraform/lambda/postcodeSplitter/variables.tf b/infrastructure/terraform/lambda/postcodeSplitter/variables.tf index 9ce45fa5..7bd68543 100644 --- a/infrastructure/terraform/lambda/postcodeSplitter/variables.tf +++ b/infrastructure/terraform/lambda/postcodeSplitter/variables.tf @@ -24,3 +24,12 @@ locals { output "resolved_image_uri" { value = local.image_uri } + + + + + + + + + diff --git a/infrastructure/terraform/modules/general_iam_policy/main.tf b/infrastructure/terraform/modules/general_iam_policy/main.tf new file mode 100644 index 00000000..f7ffe4a1 --- /dev/null +++ b/infrastructure/terraform/modules/general_iam_policy/main.tf @@ -0,0 +1,21 @@ +# IAM Policy with dynamic actions and resources +resource "aws_iam_policy" "policy" { + name = var.policy_name + description = var.policy_description + + policy = jsonencode({ + Version = "2012-10-17" + Statement = [ + merge( + { + Effect = "Allow" + Action = var.actions + Resource = var.resources + }, + var.conditions != null ? { Condition = var.conditions } : {} + ) + ] + }) + + tags = var.tags +} diff --git a/infrastructure/terraform/modules/general_iam_policy/outputs.tf b/infrastructure/terraform/modules/general_iam_policy/outputs.tf new file mode 100644 index 00000000..cfceab05 --- /dev/null +++ b/infrastructure/terraform/modules/general_iam_policy/outputs.tf @@ -0,0 +1,9 @@ +output "policy_arn" { + value = aws_iam_policy.policy.arn + description = "ARN of the created IAM policy" +} + +output "policy_name" { + value = aws_iam_policy.policy.name + description = "Name of the created IAM policy" +} diff --git a/infrastructure/terraform/modules/general_iam_policy/variables.tf b/infrastructure/terraform/modules/general_iam_policy/variables.tf new file mode 100644 index 00000000..0d824eb5 --- /dev/null +++ b/infrastructure/terraform/modules/general_iam_policy/variables.tf @@ -0,0 +1,32 @@ +variable "policy_name" { + description = "Name of the IAM policy" + type = string +} + +variable "policy_description" { + description = "Description of the IAM policy" + type = string + default = "" +} + +variable "actions" { + description = "List of IAM actions allowed by this policy" + type = list(string) +} + +variable "resources" { + description = "List of AWS resources this policy applies to" + type = list(string) +} + +variable "conditions" { + description = "Optional IAM policy conditions" + type = any + default = null +} + +variable "tags" { + description = "Tags to apply to the policy" + type = map(string) + default = {} +} diff --git a/infrastructure/terraform/modules/lambda_execution_role/main.tf b/infrastructure/terraform/modules/lambda_execution_role/main.tf index fa657afd..e593b17c 100644 --- a/infrastructure/terraform/modules/lambda_execution_role/main.tf +++ b/infrastructure/terraform/modules/lambda_execution_role/main.tf @@ -19,19 +19,3 @@ resource "aws_iam_role_policy_attachment" "basic_logs" { policy_arn = "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole" } -resource "aws_iam_role_policy" "ecr_pull" { - role = aws_iam_role.this.name - - policy = jsonencode({ - Version = "2012-10-17" - Statement = [{ - Effect = "Allow" - Action = [ - "ecr:GetAuthorizationToken", - "ecr:BatchGetImage", - "ecr:GetDownloadUrlForLayer" - ] - Resource = "*" - }] - }) -} diff --git a/infrastructure/terraform/modules/s3_iam_policy/main.tf b/infrastructure/terraform/modules/s3_iam_policy/main.tf new file mode 100644 index 00000000..397bd963 --- /dev/null +++ b/infrastructure/terraform/modules/s3_iam_policy/main.tf @@ -0,0 +1,31 @@ +# Dynamically build S3 resources list from bucket ARNs and resource paths +locals { + # Generate full resource ARNs by combining bucket ARNs with resource paths + resources = flatten([ + for bucket_arn in var.bucket_arns : [ + for path in var.resource_paths : "${bucket_arn}${path}" + ] + ]) +} + +# IAM Policy with dynamic actions and resources +resource "aws_iam_policy" "s3_policy" { + name = var.policy_name + description = var.policy_description + + policy = jsonencode({ + Version = "2012-10-17" + Statement = [ + merge( + { + Effect = "Allow" + Action = var.actions + Resource = local.resources + }, + var.conditions != null ? { Condition = var.conditions } : {} + ) + ] + }) + + tags = var.tags +} diff --git a/infrastructure/terraform/modules/s3_iam_policy/outputs.tf b/infrastructure/terraform/modules/s3_iam_policy/outputs.tf new file mode 100644 index 00000000..85defd9c --- /dev/null +++ b/infrastructure/terraform/modules/s3_iam_policy/outputs.tf @@ -0,0 +1,14 @@ +output "policy_arn" { + description = "ARN of the S3 IAM policy" + value = aws_iam_policy.s3_policy.arn +} + +output "policy_name" { + description = "Name of the S3 IAM policy" + value = aws_iam_policy.s3_policy.name +} + +output "policy_id" { + description = "ID of the S3 IAM policy" + value = aws_iam_policy.s3_policy.id +} diff --git a/infrastructure/terraform/modules/s3_iam_policy/variables.tf b/infrastructure/terraform/modules/s3_iam_policy/variables.tf new file mode 100644 index 00000000..e2b3d7a8 --- /dev/null +++ b/infrastructure/terraform/modules/s3_iam_policy/variables.tf @@ -0,0 +1,42 @@ +variable "policy_name" { + description = "Name of the IAM policy" + type = string +} + +variable "policy_description" { + description = "Description of the IAM policy" + type = string + default = "" +} + +variable "bucket_arns" { + description = "List of S3 bucket ARNs to grant access to" + type = list(string) +} + +variable "actions" { + description = "List of S3 actions to allow (e.g., ['s3:GetObject'], ['s3:PutObject'], ['s3:DeleteObject'])" + type = list(string) + default = ["s3:GetObject"] +} + +variable "resource_paths" { + description = "List of resource paths within buckets (e.g., ['/*'] for all objects, ['/specific-prefix/*'] for specific prefix)" + type = list(string) + default = ["/*"] +} + +variable "conditions" { + description = "Optional IAM policy conditions to apply to the statement" + type = any + default = null +} + +variable "tags" { + description = "Tags to apply to the policy" + type = map(string) + default = {} +} + + + diff --git a/infrastructure/terraform/shared/main.tf b/infrastructure/terraform/shared/main.tf index b1474055..acf8c281 100644 --- a/infrastructure/terraform/shared/main.tf +++ b/infrastructure/terraform/shared/main.tf @@ -133,6 +133,11 @@ module "retrofit_sap_data" { allowed_origins = var.allowed_origins } +output "retrofit_sap_data_bucket_name" { + value = module.retrofit_sap_data.bucket_name + description = "Name of the retrofit SAP data bucket" +} + module "retrofit_carbon_predictions" { source = "../modules/s3" bucketname = "retrofit-carbon-predictions-${var.stage}" @@ -305,6 +310,21 @@ module "address2uprn_registry" { } +# S3 policy for postcode splitter to read from retrofit data bucket +module "address2uprn_s3_read_and_write" { + source = "../modules/s3_iam_policy" + + policy_name = "Address2UPRNReadandWriteS3" + policy_description = "Allow address2uprn Lambda to read and write from retrofit-data bucket" + bucket_arns = ["arn:aws:s3:::retrofit-data-${var.stage}"] + actions = ["s3:GetObject", "s3:ListBucket", "s3:PutObject"] + resource_paths = ["/*"] +} + +output "address_2_uprn_s3_read_and_write_arn" { + value = module.address2uprn_s3_read_and_write.policy_arn +} + ################################################ # Condition ETL – Lambda ECR ################################################ @@ -321,6 +341,28 @@ module "condition_etl_registry" { } +# Condition Data S3 Bucket to store initial data +module "condition_data_bucket" { + source = "../modules/s3" + bucketname = "condition-data-${var.stage}" + allowed_origins = var.allowed_origins +} + +module "condition_etl_s3_read" { + source = "../modules/s3_iam_policy" + + policy_name = "ConditionETLReadS3" + policy_description = "Allow Lambda to read objects from condition-data-${var.stage}" + bucket_arns = ["arn:aws:s3:::condition-data-${var.stage}"] + actions = ["s3:GetObject"] + resource_paths = ["/*"] +} + +output "condition_etl_s3_read_arn" { + value = module.condition_etl_s3_read.policy_arn +} + + ################################################ # Postcode Splitter – Lambda ECR ################################################ @@ -337,30 +379,17 @@ module "postcode_splitter_registry" { } -################################################ -# Conidition data – S3 bucket -################################################ -module "condition_data_bucket" { - source = "../modules/s3" - bucketname = "condition-data-${var.stage}" - allowed_origins = var.allowed_origins +# S3 policy for postcode splitter to read from retrofit data bucket +module "postcode_splitter_s3_read" { + source = "../modules/s3_iam_policy" + + policy_name = "PostcodeSplitterReadS3" + policy_description = "Allow postcode splitter Lambda to read from retrofit-data bucket" + bucket_arns = ["arn:aws:s3:::retrofit-data-${var.stage}"] + actions = ["s3:GetObject", "s3:ListBucket", "s3:PutObject"] + resource_paths = ["/*"] } -resource "aws_iam_policy" "condition_etl_s3_read" { - name = "ConditionETLReadS3" - description = "Allow Lambda to read objects from condition-data-${var.stage}" - policy = jsonencode({ - Version = "2012-10-17" - Statement = [ - { - Effect = "Allow" - Action = ["s3:GetObject"] - Resource = "arn:aws:s3:::condition-data-${var.stage}/*" - } - ] - }) -} - -output "condition_etl_s3_read_arn" { - value = aws_iam_policy.condition_etl_s3_read.arn +output "postcode_splitter_s3_read_arn" { + value = module.postcode_splitter_s3_read.policy_arn } \ No newline at end of file From 7246c953455b8a4d78cf02ed1b6b5380a93af24c Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 16 Feb 2026 15:05:42 +0000 Subject: [PATCH 172/340] categorisation terraform --- .github/workflows/deploy_terraform.yml | 40 +++++++++++++++++++ .../terraform/lambda/categorisation/main.tf | 27 +++++++++++++ .../lambda/categorisation/provider.tf | 16 ++++++++ .../lambda/categorisation/variables.tf | 27 +++++++++++++ infrastructure/terraform/shared/main.tf | 16 ++++++++ 5 files changed, 126 insertions(+) create mode 100644 infrastructure/terraform/lambda/categorisation/main.tf create mode 100644 infrastructure/terraform/lambda/categorisation/provider.tf create mode 100644 infrastructure/terraform/lambda/categorisation/variables.tf diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index 71e2ad9d..fca44a4c 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -189,6 +189,46 @@ jobs: ecr_repo: condition-etl-${{ needs.determine_stage.outputs.stage }} image_digest: ${{ needs.condition_etl_image.outputs.image_digest }} terraform_apply: ${{ needs.determine_stage.outputs.terraform_apply }} + secrets: + AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} + AWS_REGION: ${{ secrets.DEV_AWS_REGION }} + + # ============================================================ + # Categorisation image and Push + # ============================================================ + categorisation_image: + needs: [determine_stage, shared_terraform] + uses: ./.github/workflows/_build_image.yml + with: + ecr_repo: categorisation-${{ needs.determine_stage.outputs.stage }} + dockerfile_path: backend/categorisation/handler/Dockerfile + build_context: . + build_args: | + DEV_DB_HOST=$DEV_DB_HOST + DEV_DB_PORT=$DEV_DB_PORT + DEV_DB_NAME=$DEV_DB_NAME + secrets: + AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} + AWS_REGION: ${{ secrets.DEV_AWS_REGION }} + DEV_DB_HOST: ${{ secrets.DEV_DB_HOST }} + DEV_DB_PORT: ${{ secrets.DEV_DB_PORT }} + DEV_DB_NAME: ${{ secrets.DEV_DB_NAME }} + + # ============================================================ + # Deploy Categorisation Lambda + # ============================================================ + condition_etl_lambda: + needs: [categorisation_image, determine_stage] + uses: ./.github/workflows/_deploy_lambda.yml + with: + lambda_name: categorisation + lambda_path: infrastructure/terraform/lambda/categorisation + stage: ${{ needs.determine_stage.outputs.stage }} + ecr_repo: categorisation-${{ needs.determine_stage.outputs.stage }} + image_digest: ${{ needs.categorisation_image.outputs.image_digest }} + terraform_apply: ${{ needs.determine_stage.outputs.terraform_apply }} secrets: AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} diff --git a/infrastructure/terraform/lambda/categorisation/main.tf b/infrastructure/terraform/lambda/categorisation/main.tf new file mode 100644 index 00000000..a402a386 --- /dev/null +++ b/infrastructure/terraform/lambda/categorisation/main.tf @@ -0,0 +1,27 @@ +data "terraform_remote_state" "shared" { + backend = "s3" + config = { + bucket = "assessment-model-terraform-state" + key = "env:/${var.stage}/terraform.tfstate" + region = "eu-west-2" + } +} + +module "lambda" { + source = "../modules/lambda_with_sqs" + + name = "categorisation" + stage = var.stage + + image_uri = local.image_uri + + + environment = merge( + { + STAGE = var.stage + LOG_LEVEL = "info" + DB_USERNAME = local.db_credentials.db_assessment_model_username + DB_PASSWORD = local.db_credentials.db_assessment_model_password + } + ) +} \ No newline at end of file diff --git a/infrastructure/terraform/lambda/categorisation/provider.tf b/infrastructure/terraform/lambda/categorisation/provider.tf new file mode 100644 index 00000000..37c412ce --- /dev/null +++ b/infrastructure/terraform/lambda/categorisation/provider.tf @@ -0,0 +1,16 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 4.16" + } + } + + backend "s3" { + bucket = REPLACE_ME + key = "terraform.tfstate" + region = "eu-west-2" + } + + required_version = ">= 1.2.0" +} \ No newline at end of file diff --git a/infrastructure/terraform/lambda/categorisation/variables.tf b/infrastructure/terraform/lambda/categorisation/variables.tf new file mode 100644 index 00000000..e4bab243 --- /dev/null +++ b/infrastructure/terraform/lambda/categorisation/variables.tf @@ -0,0 +1,27 @@ +variable "lambda_name" { + type = string + description = "Logical name of the lambda (e.g. address2uprn)" +} + +variable "stage" { + description = "Deployment stage (e.g. dev, prod)" + type = string +} +variable "ecr_repo_url" { + type = string + description = "ECR repository URL (no tag, no digest)" +} + +variable "image_digest" { + type = string + description = "Image digest (sha256:...)" +} + + +locals { + image_uri = "${var.ecr_repo_url}@${var.image_digest}" +} + +output "resolved_image_uri" { + value = local.image_uri +} diff --git a/infrastructure/terraform/shared/main.tf b/infrastructure/terraform/shared/main.tf index acf8c281..2e009196 100644 --- a/infrastructure/terraform/shared/main.tf +++ b/infrastructure/terraform/shared/main.tf @@ -392,4 +392,20 @@ module "postcode_splitter_s3_read" { output "postcode_splitter_s3_read_arn" { value = module.postcode_splitter_s3_read.policy_arn +} + +################################################ +# Categorisation – Lambda ECR +################################################ +module "categorisation_state_bucket" { + source = "../modules/tf_state_bucket" + bucket_name = "categorisation-terraform-state" + +} + +module "categorisation_registry" { + source = "../modules/container_registry" + name = "categorisation" + stage = var.stage + } \ No newline at end of file From 36c710f6e65b9b0b01c3e7451e9d409e80416bf4 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 16 Feb 2026 15:06:36 +0000 Subject: [PATCH 173/340] correction to categorisation terraform --- .github/workflows/deploy_terraform.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index fca44a4c..216069fe 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -219,7 +219,7 @@ jobs: # ============================================================ # Deploy Categorisation Lambda # ============================================================ - condition_etl_lambda: + categorisation_lambda: needs: [categorisation_image, determine_stage] uses: ./.github/workflows/_deploy_lambda.yml with: From 9dd6f4f72ea33d4ac4bebd3adc39e8cbb6de1e41 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 16 Feb 2026 15:29:08 +0000 Subject: [PATCH 174/340] remove categorisation stuff from deploy_terraform --- .github/workflows/deploy_terraform.yml | 40 ------------------- .../terraform/lambda/_template/README.md | 4 +- 2 files changed, 2 insertions(+), 42 deletions(-) diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index 216069fe..71e2ad9d 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -189,46 +189,6 @@ jobs: ecr_repo: condition-etl-${{ needs.determine_stage.outputs.stage }} image_digest: ${{ needs.condition_etl_image.outputs.image_digest }} terraform_apply: ${{ needs.determine_stage.outputs.terraform_apply }} - secrets: - AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} - AWS_REGION: ${{ secrets.DEV_AWS_REGION }} - - # ============================================================ - # Categorisation image and Push - # ============================================================ - categorisation_image: - needs: [determine_stage, shared_terraform] - uses: ./.github/workflows/_build_image.yml - with: - ecr_repo: categorisation-${{ needs.determine_stage.outputs.stage }} - dockerfile_path: backend/categorisation/handler/Dockerfile - build_context: . - build_args: | - DEV_DB_HOST=$DEV_DB_HOST - DEV_DB_PORT=$DEV_DB_PORT - DEV_DB_NAME=$DEV_DB_NAME - secrets: - AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} - AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} - AWS_REGION: ${{ secrets.DEV_AWS_REGION }} - DEV_DB_HOST: ${{ secrets.DEV_DB_HOST }} - DEV_DB_PORT: ${{ secrets.DEV_DB_PORT }} - DEV_DB_NAME: ${{ secrets.DEV_DB_NAME }} - - # ============================================================ - # Deploy Categorisation Lambda - # ============================================================ - categorisation_lambda: - needs: [categorisation_image, determine_stage] - uses: ./.github/workflows/_deploy_lambda.yml - with: - lambda_name: categorisation - lambda_path: infrastructure/terraform/lambda/categorisation - stage: ${{ needs.determine_stage.outputs.stage }} - ecr_repo: categorisation-${{ needs.determine_stage.outputs.stage }} - image_digest: ${{ needs.categorisation_image.outputs.image_digest }} - terraform_apply: ${{ needs.determine_stage.outputs.terraform_apply }} secrets: AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} diff --git a/infrastructure/terraform/lambda/_template/README.md b/infrastructure/terraform/lambda/_template/README.md index a7282fc9..7992ec5c 100644 --- a/infrastructure/terraform/lambda/_template/README.md +++ b/infrastructure/terraform/lambda/_template/README.md @@ -3,7 +3,7 @@ ### 1. Create the Lambda scaffold - Copy the template: - cp -r lambda/_template lambda/ + `cp -r lambda/_template lambda/` --- @@ -13,7 +13,7 @@ infrastructure/terraform/shared/main.tf - Apply the shared stack - - This requires commenting 'if env.stage == "prod"' in .github/workflows/deploy_terraform.yml + - This requires commenting `if: env.STAGE == 'prod'` in .github/workflows/deploy_terraform.yml - Verify the ECR repository exists in AWS From 42cac343576a4cf1f0bb2c02df145dd8e53ed293 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 16 Feb 2026 15:50:01 +0000 Subject: [PATCH 175/340] only run on branches it was told to --- .github/workflows/deploy_terraform.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index e7c8fb94..6280abcd 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -77,10 +77,10 @@ jobs: run: terraform plan -var-file=${STAGE}.tfvars -out=tfplan - name: Terraform Apply - if: env.STAGE == 'prod' + if: env.TERRAFORM_APPLY == 'true' working-directory: infrastructure/terraform/shared run: terraform apply -auto-approve tfplan - + # ============================================================ # 2️⃣ Build Address 2 UPRN image and Push # ============================================================ From 2fc01a5dc7f059c821126962796d519b23684045 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 16 Feb 2026 15:59:42 +0000 Subject: [PATCH 176/340] replace replace me in provider.tf --- infrastructure/terraform/lambda/categorisation/provider.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/infrastructure/terraform/lambda/categorisation/provider.tf b/infrastructure/terraform/lambda/categorisation/provider.tf index 37c412ce..f983533d 100644 --- a/infrastructure/terraform/lambda/categorisation/provider.tf +++ b/infrastructure/terraform/lambda/categorisation/provider.tf @@ -7,7 +7,7 @@ terraform { } backend "s3" { - bucket = REPLACE_ME + bucket = "categorisation" key = "terraform.tfstate" region = "eu-west-2" } From c96fa51badc00dfb6df770fa78c82006f37d63c4 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 16 Feb 2026 16:09:49 +0000 Subject: [PATCH 177/340] add terraform_apply env var to shared_terraform --- .github/workflows/deploy_terraform.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index 6280abcd..691c9996 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -52,6 +52,7 @@ jobs: runs-on: ubuntu-latest env: STAGE: ${{ needs.determine_stage.outputs.stage }} + TERRAFORM_APPLY: ${{ needs.determine_stage.outputs.terraform_apply }} steps: - uses: actions/checkout@v4 From 146ca3d8b0d1ebdd577c15663da0a4d7bc3b4a2d Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 16 Feb 2026 16:12:36 +0000 Subject: [PATCH 178/340] update readme --- infrastructure/terraform/lambda/_template/README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/infrastructure/terraform/lambda/_template/README.md b/infrastructure/terraform/lambda/_template/README.md index 7992ec5c..5bb10627 100644 --- a/infrastructure/terraform/lambda/_template/README.md +++ b/infrastructure/terraform/lambda/_template/README.md @@ -12,8 +12,7 @@ infrastructure/terraform/shared/main.tf -- Apply the shared stack - - This requires commenting `if: env.STAGE == 'prod'` in .github/workflows/deploy_terraform.yml +- Create a PR to deploy this to main then dev in order to deploy the shared stack - Verify the ECR repository exists in AWS From c7c208334e231568449acb714a42b12d0e47405d Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 16 Feb 2026 16:54:35 +0000 Subject: [PATCH 179/340] lets view plan for now --- .github/workflows/deploy_terraform.yml | 1 + infrastructure/terraform/lambda/address2UPRN/main.tf | 2 ++ infrastructure/terraform/modules/sqs_queue/main.tf | 2 +- infrastructure/terraform/modules/sqs_queue/variables.tf | 7 ++++++- 4 files changed, 10 insertions(+), 2 deletions(-) diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index e7c8fb94..c7a3ec1b 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -52,6 +52,7 @@ jobs: runs-on: ubuntu-latest env: STAGE: ${{ needs.determine_stage.outputs.stage }} + TERRAFORM_APPLY: ${{ needs.determine_stage.outputs.terraform_apply }} steps: - uses: actions/checkout@v4 diff --git a/infrastructure/terraform/lambda/address2UPRN/main.tf b/infrastructure/terraform/lambda/address2UPRN/main.tf index 5a36153e..f53d55c8 100644 --- a/infrastructure/terraform/lambda/address2UPRN/main.tf +++ b/infrastructure/terraform/lambda/address2UPRN/main.tf @@ -22,6 +22,8 @@ module "address2uprn" { image_uri = local.image_uri + timeout = 900 + environment = merge( { STAGE = var.stage diff --git a/infrastructure/terraform/modules/sqs_queue/main.tf b/infrastructure/terraform/modules/sqs_queue/main.tf index 580e67bd..356d3429 100644 --- a/infrastructure/terraform/modules/sqs_queue/main.tf +++ b/infrastructure/terraform/modules/sqs_queue/main.tf @@ -5,7 +5,7 @@ resource "aws_sqs_queue" "dlq" { resource "aws_sqs_queue" "this" { name = var.name - visibility_timeout_seconds = 120 + visibility_timeout_seconds = var.timeout * 6 redrive_policy = jsonencode({ deadLetterTargetArn = aws_sqs_queue.dlq.arn diff --git a/infrastructure/terraform/modules/sqs_queue/variables.tf b/infrastructure/terraform/modules/sqs_queue/variables.tf index 943a7a16..bb8e4485 100644 --- a/infrastructure/terraform/modules/sqs_queue/variables.tf +++ b/infrastructure/terraform/modules/sqs_queue/variables.tf @@ -2,5 +2,10 @@ variable "name" { type = string } variable "max_receive_count" { type = number - default = 5 + default = 1 +} + +variable "timeout" { + type = number + default = 900 } From 952a2cb2c0df65bf4c58c62e68c5a2d7d7a516f3 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 16 Feb 2026 17:23:15 +0000 Subject: [PATCH 180/340] quick shared infra --- .github/workflows/deploy_terraform.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index c7a3ec1b..c6937f7a 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -3,7 +3,9 @@ name: Deploy infrastructure on: push: branches: - - "**" + - "main" + - "dev" + - "prod" paths: - 'infrastructure/terraform/**' - '.github/workflows/deploy_terraform.yml' From 0afa8b3aba6993d45a35336ce0f878eb56e32faa Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 16 Feb 2026 17:29:10 +0000 Subject: [PATCH 181/340] sqs time out redploy --- infrastructure/terraform/modules/sqs_queue/main.tf | 2 +- infrastructure/terraform/modules/sqs_queue/variables.tf | 5 ----- 2 files changed, 1 insertion(+), 6 deletions(-) diff --git a/infrastructure/terraform/modules/sqs_queue/main.tf b/infrastructure/terraform/modules/sqs_queue/main.tf index 356d3429..afb7dc27 100644 --- a/infrastructure/terraform/modules/sqs_queue/main.tf +++ b/infrastructure/terraform/modules/sqs_queue/main.tf @@ -5,7 +5,7 @@ resource "aws_sqs_queue" "dlq" { resource "aws_sqs_queue" "this" { name = var.name - visibility_timeout_seconds = var.timeout * 6 + visibility_timeout_seconds = 1000 redrive_policy = jsonencode({ deadLetterTargetArn = aws_sqs_queue.dlq.arn diff --git a/infrastructure/terraform/modules/sqs_queue/variables.tf b/infrastructure/terraform/modules/sqs_queue/variables.tf index bb8e4485..95b33231 100644 --- a/infrastructure/terraform/modules/sqs_queue/variables.tf +++ b/infrastructure/terraform/modules/sqs_queue/variables.tf @@ -4,8 +4,3 @@ variable "max_receive_count" { type = number default = 1 } - -variable "timeout" { - type = number - default = 900 -} From 152ca2d7f1f73c2d5d9448e226828b1ff42de462 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 16 Feb 2026 17:46:07 +0000 Subject: [PATCH 182/340] did any workflows run --- .github/workflows/unit_tests.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index cc6431b8..a5d946bd 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -1,6 +1,10 @@ name: Run unit tests on: + branches: + - "main" + - "dev" + - "prod" pull_request: branches: - "**" From 92aaa92964df8a3b4e7d5965b0a6545f09b405b1 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 16 Feb 2026 17:46:52 +0000 Subject: [PATCH 183/340] do you see me now? --- .github/workflows/unit_tests.yml | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index a5d946bd..2ad16b97 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -1,10 +1,11 @@ name: Run unit tests on: - branches: - - "main" - - "dev" - - "prod" + push: + branches: + - "main" + - "dev" + - "prod" pull_request: branches: - "**" From 9d87ef8eec2b0b55637486b208da4710ffe18d27 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 17 Feb 2026 09:17:41 +0000 Subject: [PATCH 184/340] deploy categorisation image and lambda --- .github/workflows/deploy_terraform.yml | 41 ++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index 691c9996..f2a1f34a 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -205,3 +205,44 @@ jobs: AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} AWS_REGION: ${{ secrets.DEV_AWS_REGION }} + # ============================================================ + # Categorisation image and Push + # ============================================================ + categorisation_image: + needs: [determine_stage, shared_terraform] + uses: ./.github/workflows/_build_image.yml + with: + ecr_repo: categorisation-${{ needs.determine_stage.outputs.stage }} + dockerfile_path: backend/categorisation/handler/Dockerfile + build_context: . + build_args: | + DEV_DB_HOST=$DEV_DB_HOST + DEV_DB_PORT=$DEV_DB_PORT + DEV_DB_NAME=$DEV_DB_NAME + secrets: + AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} + AWS_REGION: ${{ secrets.DEV_AWS_REGION }} + DEV_DB_HOST: ${{ secrets.DEV_DB_HOST }} + DEV_DB_PORT: ${{ secrets.DEV_DB_PORT }} + DEV_DB_NAME: ${{ secrets.DEV_DB_NAME }} + + # ============================================================ + # Deploy Categorisation Lambda + # ============================================================ + categorisation_lambda: + needs: [categorisation_image, determine_stage] + uses: ./.github/workflows/_deploy_lambda.yml + with: + lambda_name: categorisation + lambda_path: infrastructure/terraform/lambda/categorisation + stage: ${{ needs.determine_stage.outputs.stage }} + ecr_repo: categorisation-${{ needs.determine_stage.outputs.stage }} + image_digest: ${{ needs.categorisation_image.outputs.image_digest }} + terraform_apply: ${{ needs.determine_stage.outputs.terraform_apply }} + secrets: + AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} + AWS_REGION: ${{ secrets.DEV_AWS_REGION }} + + From 1d6746d3911b2b3ff1c5d8eb713a8891208a6b7d Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 17 Feb 2026 09:26:04 +0000 Subject: [PATCH 185/340] correct terraform state bucket --- infrastructure/terraform/lambda/categorisation/provider.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/infrastructure/terraform/lambda/categorisation/provider.tf b/infrastructure/terraform/lambda/categorisation/provider.tf index f983533d..fe497c81 100644 --- a/infrastructure/terraform/lambda/categorisation/provider.tf +++ b/infrastructure/terraform/lambda/categorisation/provider.tf @@ -7,7 +7,7 @@ terraform { } backend "s3" { - bucket = "categorisation" + bucket = "categorisation-terraform-state" key = "terraform.tfstate" region = "eu-west-2" } From 827745c10f0e8354ad522b7641919ce875e5fa9d Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 17 Feb 2026 09:29:37 +0000 Subject: [PATCH 186/340] add db credentials to categorisation tf --- infrastructure/terraform/lambda/categorisation/main.tf | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/infrastructure/terraform/lambda/categorisation/main.tf b/infrastructure/terraform/lambda/categorisation/main.tf index a402a386..6e30dd8e 100644 --- a/infrastructure/terraform/lambda/categorisation/main.tf +++ b/infrastructure/terraform/lambda/categorisation/main.tf @@ -1,3 +1,7 @@ +data "aws_secretsmanager_secret_version" "db_credentials" { + secret_id = "${var.stage}/assessment_model/db_credentials" +} + data "terraform_remote_state" "shared" { backend = "s3" config = { @@ -7,6 +11,10 @@ data "terraform_remote_state" "shared" { } } +locals { + db_credentials = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string) +} + module "lambda" { source = "../modules/lambda_with_sqs" From 35f7738160ce40e61c048facc4b22608ee643f6e Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 17 Feb 2026 09:33:31 +0000 Subject: [PATCH 187/340] implement handler --- backend/categorisation/handler/handler.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/backend/categorisation/handler/handler.py b/backend/categorisation/handler/handler.py index e74bfeb5..20076613 100644 --- a/backend/categorisation/handler/handler.py +++ b/backend/categorisation/handler/handler.py @@ -1,4 +1,9 @@ +import json from typing import Any, Mapping +from backend.categorisation.categorisation_trigger_request import ( + CategorisationTriggerRequest, +) +from backend.categorisation.processor import process_portfolio from utils.logger import setup_logger @@ -7,4 +12,15 @@ logger = setup_logger() def handler(event: Mapping[str, Any], context: Any) -> None: - pass + for record in event.get("Records", []): + try: + body_dict = json.loads(record["body"]) + logger.debug("Validating request body") + payload = CategorisationTriggerRequest.model_validate(body_dict) + + logger.debug("Successfully validated request body") + + process_portfolio(payload.portfolio_id) + + except Exception as e: + logger.error(f"Failed to process record: {e}") From e0f133073b8a75c899abd85af7371c564f3e1d61 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 17 Feb 2026 11:12:10 +0000 Subject: [PATCH 188/340] fix import errors --- backend/categorisation/handler/Dockerfile | 5 +++++ backend/categorisation/handler/requirements.txt | 5 ++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/backend/categorisation/handler/Dockerfile b/backend/categorisation/handler/Dockerfile index 46c8d477..5f435afd 100644 --- a/backend/categorisation/handler/Dockerfile +++ b/backend/categorisation/handler/Dockerfile @@ -30,8 +30,13 @@ RUN pip install --no-cache-dir -r requirements.txt # ----------------------------- COPY utils/ utils/ COPY backend/categorisation/ backend/categorisation/ +COPY backend/app/db/functions/ backend/app/db/functions/ +COPY backend/app/db/models/ backend/app/db/models/ +COPY backend/addresses/ backend/addresses +COPY datatypes/ datatypes/ COPY backend/app/db/connection.py backend/app/db/connection.py + COPY backend/app/config.py backend/app/config.py COPY backend/__init__.py backend/__init__.py diff --git a/backend/categorisation/handler/requirements.txt b/backend/categorisation/handler/requirements.txt index 48e5b561..e277b094 100644 --- a/backend/categorisation/handler/requirements.txt +++ b/backend/categorisation/handler/requirements.txt @@ -1,3 +1,6 @@ sqlmodel pydantic-settings -psycopg2-binary==2.9.10 \ No newline at end of file +psycopg2-binary==2.9.10 + +# Not used but needed to satisfy imports +pytz==2024.2 \ No newline at end of file From 82009a81e663fdd96e22b06025361e09b6738eba Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Tue, 17 Feb 2026 11:22:49 +0000 Subject: [PATCH 189/340] remove for main --- .github/workflows/deploy_terraform.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index 728292ed..9081c507 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -3,7 +3,6 @@ name: Deploy infrastructure on: push: branches: - - "main" - "dev" - "prod" paths: From 6e737f0cca1f7ed1380828205fc6090566db12d6 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Tue, 17 Feb 2026 11:28:02 +0000 Subject: [PATCH 190/340] pull request --- .github/workflows/deploy_terraform.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index 9081c507..84f6f1f3 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -1,7 +1,7 @@ name: Deploy infrastructure on: - push: + pull_request: branches: - "dev" - "prod" From 781af5fcb2d5f21810df744a5871e947bdc3e1ee Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Tue, 17 Feb 2026 11:29:34 +0000 Subject: [PATCH 191/340] get rid of it from branches --- .github/workflows/deploy_terraform.yml | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index 84f6f1f3..67f75836 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -1,15 +1,10 @@ name: Deploy infrastructure on: - pull_request: + push: branches: - "dev" - "prod" - paths: - - 'infrastructure/terraform/**' - - '.github/workflows/deploy_terraform.yml' - - '.github/workflows/_build_image.yml' - - '.github/workflows/_deploy_lambda.yml' workflow_dispatch: jobs: From fc961233f96a992a819eb2feeaad57a5242a65d2 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Tue, 17 Feb 2026 11:36:24 +0000 Subject: [PATCH 192/340] only run on pull request --- .github/workflows/unit_tests.yml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/.github/workflows/unit_tests.yml b/.github/workflows/unit_tests.yml index 2ad16b97..cc6431b8 100644 --- a/.github/workflows/unit_tests.yml +++ b/.github/workflows/unit_tests.yml @@ -1,11 +1,6 @@ name: Run unit tests on: - push: - branches: - - "main" - - "dev" - - "prod" pull_request: branches: - "**" From cb864c161fa811752a9eecba6b2f6757f30fdc6d Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 17 Feb 2026 11:45:50 +0000 Subject: [PATCH 193/340] import entirity of backend/app/db --- backend/categorisation/handler/Dockerfile | 6 ++---- backend/categorisation/handler/handler.py | 19 ++++++++++--------- 2 files changed, 12 insertions(+), 13 deletions(-) diff --git a/backend/categorisation/handler/Dockerfile b/backend/categorisation/handler/Dockerfile index 5f435afd..dbb6c827 100644 --- a/backend/categorisation/handler/Dockerfile +++ b/backend/categorisation/handler/Dockerfile @@ -30,9 +30,8 @@ RUN pip install --no-cache-dir -r requirements.txt # ----------------------------- COPY utils/ utils/ COPY backend/categorisation/ backend/categorisation/ -COPY backend/app/db/functions/ backend/app/db/functions/ -COPY backend/app/db/models/ backend/app/db/models/ -COPY backend/addresses/ backend/addresses +COPY backend/app/db/ backend/app/db/ +COPY backend/addresses/ backend/addresses/ COPY datatypes/ datatypes/ COPY backend/app/db/connection.py backend/app/db/connection.py @@ -41,7 +40,6 @@ COPY backend/app/config.py backend/app/config.py COPY backend/__init__.py backend/__init__.py COPY backend/app/__init__.py backend/app/__init__.py -COPY backend/app/db/__init__.py backend/app/db/__init__.py # ----------------------------- diff --git a/backend/categorisation/handler/handler.py b/backend/categorisation/handler/handler.py index 20076613..7cea5a7a 100644 --- a/backend/categorisation/handler/handler.py +++ b/backend/categorisation/handler/handler.py @@ -12,15 +12,16 @@ logger = setup_logger() def handler(event: Mapping[str, Any], context: Any) -> None: - for record in event.get("Records", []): - try: - body_dict = json.loads(record["body"]) - logger.debug("Validating request body") - payload = CategorisationTriggerRequest.model_validate(body_dict) + process_portfolio(556) + # for record in event.get("Records", []): + # try: + # body_dict = json.loads(record["body"]) + # logger.debug("Validating request body") + # payload = CategorisationTriggerRequest.model_validate(body_dict) - logger.debug("Successfully validated request body") + # logger.debug("Successfully validated request body") - process_portfolio(payload.portfolio_id) + # # process_portfolio(payload.portfolio_id) - except Exception as e: - logger.error(f"Failed to process record: {e}") + # except Exception as e: + # logger.error(f"Failed to process record: {e}") From 87e5d059366cc8956433a0cf1d4c294b9a031443 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 17 Feb 2026 11:46:51 +0000 Subject: [PATCH 194/340] revert accidental pushed changes --- backend/categorisation/handler/Dockerfile | 2 +- backend/categorisation/handler/handler.py | 18 +++++++++--------- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/backend/categorisation/handler/Dockerfile b/backend/categorisation/handler/Dockerfile index dbb6c827..97f5c14e 100644 --- a/backend/categorisation/handler/Dockerfile +++ b/backend/categorisation/handler/Dockerfile @@ -36,7 +36,7 @@ COPY datatypes/ datatypes/ COPY backend/app/db/connection.py backend/app/db/connection.py -COPY backend/app/config.py backend/app/config.py +COPY backdend/app/config.py backend/app/config.py COPY backend/__init__.py backend/__init__.py COPY backend/app/__init__.py backend/app/__init__.py diff --git a/backend/categorisation/handler/handler.py b/backend/categorisation/handler/handler.py index 7cea5a7a..4e4a1e39 100644 --- a/backend/categorisation/handler/handler.py +++ b/backend/categorisation/handler/handler.py @@ -13,15 +13,15 @@ logger = setup_logger() def handler(event: Mapping[str, Any], context: Any) -> None: process_portfolio(556) - # for record in event.get("Records", []): - # try: - # body_dict = json.loads(record["body"]) - # logger.debug("Validating request body") - # payload = CategorisationTriggerRequest.model_validate(body_dict) + for record in event.get("Records", []): + try: + body_dict = json.loads(record["body"]) + logger.debug("Validating request body") + payload = CategorisationTriggerRequest.model_validate(body_dict) - # logger.debug("Successfully validated request body") + logger.debug("Successfully validated request body") - # # process_portfolio(payload.portfolio_id) + # process_portfolio(payload.portfolio_id) - # except Exception as e: - # logger.error(f"Failed to process record: {e}") + except Exception as e: + logger.error(f"Failed to process record: {e}") From f834d9497eb06ca6ac5c1107f68376f987b6fd6f Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 17 Feb 2026 11:55:57 +0000 Subject: [PATCH 195/340] undo typos --- backend/categorisation/handler/Dockerfile | 2 +- backend/categorisation/handler/handler.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/backend/categorisation/handler/Dockerfile b/backend/categorisation/handler/Dockerfile index 97f5c14e..dbb6c827 100644 --- a/backend/categorisation/handler/Dockerfile +++ b/backend/categorisation/handler/Dockerfile @@ -36,7 +36,7 @@ COPY datatypes/ datatypes/ COPY backend/app/db/connection.py backend/app/db/connection.py -COPY backdend/app/config.py backend/app/config.py +COPY backend/app/config.py backend/app/config.py COPY backend/__init__.py backend/__init__.py COPY backend/app/__init__.py backend/app/__init__.py diff --git a/backend/categorisation/handler/handler.py b/backend/categorisation/handler/handler.py index 4e4a1e39..20076613 100644 --- a/backend/categorisation/handler/handler.py +++ b/backend/categorisation/handler/handler.py @@ -12,7 +12,6 @@ logger = setup_logger() def handler(event: Mapping[str, Any], context: Any) -> None: - process_portfolio(556) for record in event.get("Records", []): try: body_dict = json.loads(record["body"]) @@ -21,7 +20,7 @@ def handler(event: Mapping[str, Any], context: Any) -> None: logger.debug("Successfully validated request body") - # process_portfolio(payload.portfolio_id) + process_portfolio(payload.portfolio_id) except Exception as e: logger.error(f"Failed to process record: {e}") From 36db25d5e473160b38f615c24a4a5747ba576f23 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 17 Feb 2026 12:07:04 +0000 Subject: [PATCH 196/340] hopefully the last missing import --- backend/categorisation/handler/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/backend/categorisation/handler/Dockerfile b/backend/categorisation/handler/Dockerfile index dbb6c827..f0828e35 100644 --- a/backend/categorisation/handler/Dockerfile +++ b/backend/categorisation/handler/Dockerfile @@ -31,6 +31,7 @@ RUN pip install --no-cache-dir -r requirements.txt COPY utils/ utils/ COPY backend/categorisation/ backend/categorisation/ COPY backend/app/db/ backend/app/db/ +COPY backend/app/domain/ backend/app/domain/ COPY backend/addresses/ backend/addresses/ COPY datatypes/ datatypes/ From 0218c169c27e1dc96eec06e3084033e2f9d9eed9 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 17 Feb 2026 12:38:12 +0000 Subject: [PATCH 197/340] missing import in dockerfile --- backend/categorisation/handler/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/backend/categorisation/handler/Dockerfile b/backend/categorisation/handler/Dockerfile index f0828e35..7811ee4a 100644 --- a/backend/categorisation/handler/Dockerfile +++ b/backend/categorisation/handler/Dockerfile @@ -38,6 +38,7 @@ COPY datatypes/ datatypes/ COPY backend/app/db/connection.py backend/app/db/connection.py COPY backend/app/config.py backend/app/config.py +COPY backend/app/utils.py backend/app/utils.py COPY backend/__init__.py backend/__init__.py COPY backend/app/__init__.py backend/app/__init__.py From fac418adbe2de14ad29b40abc74cab73653c4ba3 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 17 Feb 2026 15:25:51 +0000 Subject: [PATCH 198/340] Don't re-get scenarios for every plan --- .../db/functions/recommendations_functions.py | 7 +++++++ backend/categorisation/processor.py | 21 ++++++++++++------- 2 files changed, 20 insertions(+), 8 deletions(-) diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py index e690991a..aa966fbb 100644 --- a/backend/app/db/functions/recommendations_functions.py +++ b/backend/app/db/functions/recommendations_functions.py @@ -625,6 +625,13 @@ def get_plans_by_portfolio_id(portfolio_id: int) -> List[PlanModel]: return session_any.exec(stmt).scalars().all() +def get_scenarios_by_portfolio_id(portfolio_id: int) -> List[ScenarioModel]: + stmt = select(ScenarioModel).where(ScenarioModel.portfolio_id == portfolio_id) + with db_read_session() as session: + session_any: Any = session # Typehint as Any to satisfy Pylance... + return session_any.exec(stmt).scalars().all() + + def get_scenario(scenario_id: int) -> Optional[ScenarioModel]: stmt = select(ScenarioModel).where(ScenarioModel.id == scenario_id) with db_read_session() as session: diff --git a/backend/categorisation/processor.py b/backend/categorisation/processor.py index 7c5698b7..d2bdbef0 100644 --- a/backend/categorisation/processor.py +++ b/backend/categorisation/processor.py @@ -4,7 +4,7 @@ from typing import Dict, List from backend.app.db.functions.recommendations_functions import ( bulk_update_plans, get_plans_by_portfolio_id, - get_scenario, + get_scenarios_by_portfolio_id, ) from backend.app.db.models.recommendations import PlanModel, ScenarioModel from backend.app.domain.classes.plan import Plan @@ -15,7 +15,7 @@ logger = setup_logger() def process_portfolio(portfolio_id: int) -> None: - print(f"Processing portfolio {portfolio_id}") + logger.info(f"Processing portfolio {portfolio_id}") plans: List[Plan] = _load_plans_for_portfolio(portfolio_id) plans_by_property: Dict[int, List[Plan]] = _group_plans_by_property(plans) @@ -29,22 +29,27 @@ def process_portfolio(portfolio_id: int) -> None: def _load_plans_for_portfolio(portfolio_id: int) -> List[Plan]: - plan_models = get_plans_by_portfolio_id(portfolio_id) - print(f"Got {len(plan_models)} plans from database") - plans: List[Plan] = [] + plan_models = get_plans_by_portfolio_id(portfolio_id) + scenarios: List[ScenarioModel] = get_scenarios_by_portfolio_id(portfolio_id) + + if not scenarios: + raise Exception(f"No scenarios found for Portfolio {portfolio_id}") + for model in plan_models: - if not model.scenario_id: + + scenario_model = next((s for s in scenarios if s.id == model.scenario_id)) + if not scenario_model: logger.info(f"No Scenario associated with Plan of ID {model.id}") continue - scenario_model = get_scenario(model.scenario_id) plans.append( Plan.from_sqlalchemy(model, Scenario.from_sqlalchemy(scenario_model)) ) - print("Successfully mapped plan and scenario to domain object") + logger.info("Successfully mapped plan and scenario to domain object") + logger.info(f"Got {len(plans)} plans from database") return plans From 3a5df1a1f3f5e15c9a6bd353d8f957a7d3512e6d Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 18 Feb 2026 09:05:41 +0000 Subject: [PATCH 199/340] Better logging --- backend/categorisation/processor.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/backend/categorisation/processor.py b/backend/categorisation/processor.py index d2bdbef0..97e4c5ad 100644 --- a/backend/categorisation/processor.py +++ b/backend/categorisation/processor.py @@ -47,9 +47,11 @@ def _load_plans_for_portfolio(portfolio_id: int) -> List[Plan]: plans.append( Plan.from_sqlalchemy(model, Scenario.from_sqlalchemy(scenario_model)) ) - logger.info("Successfully mapped plan and scenario to domain object") + logger.debug( + f"Successfully mapped plan {model.id} and scenario {scenario_model.id} to domain object" + ) - logger.info(f"Got {len(plans)} plans from database") + logger.debug(f"Got {len(plans)} plans from database") return plans @@ -83,6 +85,9 @@ def _update_default_flags(plans: List[Plan], cheapest_plan: Plan) -> None: for plan in plans: should_be_default: bool = plan.id == cheapest_plan.id if plan.record.is_default != should_be_default: + logger.info( + f"Setting Plan {plan.id} (Scenario Name: {plan.scenario.record.name}) to is_default: {should_be_default}" + ) plan.set_default(should_be_default) plans_to_update.append(plan) @@ -96,3 +101,7 @@ def _update_default_flags(plans: List[Plan], cheapest_plan: Plan) -> None: scenario_models.append(scenario_model) bulk_update_plans(plan_models, scenario_models) + logger.info("Successfully updated Plan default values") + + else: + logger.info("All plan default values already correct. Not udpating") From 9a177065b611c0fded57070554701f010649b852 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 18 Feb 2026 10:06:40 +0000 Subject: [PATCH 200/340] =?UTF-8?q?allow=20plan=20priority=20to=20be=20spe?= =?UTF-8?q?cified=20for=20plans=20with=20identical=20ouput=20=F0=9F=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../categorisation_trigger_request.py | 3 + backend/categorisation/processor.py | 14 ++- .../tests/test_prioritised_plan_selected.py | 88 +++++++++++++++++++ 3 files changed, 101 insertions(+), 4 deletions(-) create mode 100644 backend/categorisation/tests/test_prioritised_plan_selected.py diff --git a/backend/categorisation/categorisation_trigger_request.py b/backend/categorisation/categorisation_trigger_request.py index 9ef1d106..aa2b8ed3 100644 --- a/backend/categorisation/categorisation_trigger_request.py +++ b/backend/categorisation/categorisation_trigger_request.py @@ -1,5 +1,8 @@ +from typing import List, Optional from pydantic import BaseModel class CategorisationTriggerRequest(BaseModel): portfolio_id: int + + plan_priority_order: Optional[List[int]] diff --git a/backend/categorisation/processor.py b/backend/categorisation/processor.py index 97e4c5ad..539f7a68 100644 --- a/backend/categorisation/processor.py +++ b/backend/categorisation/processor.py @@ -1,5 +1,5 @@ from collections import defaultdict -from typing import Dict, List +from typing import Dict, List, Optional from backend.app.db.functions.recommendations_functions import ( bulk_update_plans, @@ -14,7 +14,9 @@ from utils.logger import setup_logger logger = setup_logger() -def process_portfolio(portfolio_id: int) -> None: +def process_portfolio( + portfolio_id: int, plan_priority_order: Optional[List[int]] = [] +) -> None: logger.info(f"Processing portfolio {portfolio_id}") plans: List[Plan] = _load_plans_for_portfolio(portfolio_id) plans_by_property: Dict[int, List[Plan]] = _group_plans_by_property(plans) @@ -24,7 +26,9 @@ def process_portfolio(portfolio_id: int) -> None: if not property_plans: raise ValueError(f"No plans for property {uprn}") - cheapest_plan = _choose_cheapest_relevant_plan(property_plans) + cheapest_plan = _choose_cheapest_relevant_plan( + property_plans, plan_priority_order + ) _update_default_flags(property_plans, cheapest_plan) @@ -64,7 +68,9 @@ def _group_plans_by_property(plans: List[Plan]) -> Dict[int, List[Plan]]: return grouped -def _choose_cheapest_relevant_plan(plans: List[Plan]) -> Plan: +def _choose_cheapest_relevant_plan( + plans: List[Plan], plan_priority_order: Optional[List[int]] = [] +) -> Plan: plans_to_consider: List[Plan] = [p for p in plans if p.is_compliant] or plans def plan_cost(plan: Plan) -> float: diff --git a/backend/categorisation/tests/test_prioritised_plan_selected.py b/backend/categorisation/tests/test_prioritised_plan_selected.py new file mode 100644 index 00000000..03bca666 --- /dev/null +++ b/backend/categorisation/tests/test_prioritised_plan_selected.py @@ -0,0 +1,88 @@ +from datetime import datetime +from typing import List +import pytest + +from backend.app.domain.classes.plan import Plan +from backend.app.domain.classes.scenario import Scenario +from backend.app.domain.records.plan_record import PlanRecord +from backend.app.domain.records.scenario_record import ScenarioRecord +from backend.app.db.models.portfolio import Epc, PortfolioGoal +from backend.categorisation.processor import _choose_cheapest_relevant_plan + + +@pytest.fixture +def created_at_datetime() -> datetime: + return datetime.now() + + +@pytest.fixture +def identical_plan_record(created_at_datetime: datetime, default: bool) -> PlanRecord: + return PlanRecord( + property_id=1, + portfolio_id=1, + created_at=created_at_datetime, + is_default=default, + post_epc_rating=Epc.C, + cost_of_works=500.0, + ) + + +def make_plan_record(created_at_datetime: datetime, default: bool) -> PlanRecord: + return PlanRecord( + property_id=1, + portfolio_id=1, + created_at=created_at_datetime, + is_default=default, + post_epc_rating=Epc.C, + cost_of_works=500.0, + ) + + +def test_prioritised_plan_selected(created_at_datetime: datetime) -> None: + # arrange + epc_c_scenario_record = ScenarioRecord( + name="EPC C", + created_at=created_at_datetime, + housing_type="", + goal=PortfolioGoal.INCREASING_EPC, + goal_value="C", + trigger_file_path="", + multi_plan=False, + is_default=True, + ) + epc_c_scenario = Scenario(record=epc_c_scenario_record, id=1) + epc_c_plan = Plan( + record=make_plan_record(created_at_datetime, True), + scenario=epc_c_scenario, + id=1, + ) + + minor_works_scenario_record = ScenarioRecord( + name="EPC C - Minor Works", + created_at=created_at_datetime, + housing_type="", + goal=PortfolioGoal.INCREASING_EPC, + goal_value="C", + trigger_file_path="", + multi_plan=False, + is_default=False, + ) + minor_works_scenario = Scenario(record=minor_works_scenario_record, id=2) + minor_works_plan = Plan( + record=make_plan_record(created_at_datetime, False), + scenario=minor_works_scenario, + id=2, + ) + + plan_priority_order: List[int] = [2, 1] + + expected_default_plan_id = 2 + + # act + actual_default_plan = _choose_cheapest_relevant_plan( + plans=[epc_c_plan, minor_works_plan], + plan_priority_order=plan_priority_order, + ) + + # assert + assert actual_default_plan.id == expected_default_plan_id From 508f3f285934908f5ebeab2b9437dac2c108184e Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 18 Feb 2026 10:40:50 +0000 Subject: [PATCH 201/340] make choose cheapest relevant plan method public as it's called from outside the module --- backend/categorisation/processor.py | 36 +++++++++---------- .../tests/test_prioritised_plan_selected.py | 4 +-- 2 files changed, 20 insertions(+), 20 deletions(-) diff --git a/backend/categorisation/processor.py b/backend/categorisation/processor.py index 539f7a68..02116d61 100644 --- a/backend/categorisation/processor.py +++ b/backend/categorisation/processor.py @@ -26,12 +26,29 @@ def process_portfolio( if not property_plans: raise ValueError(f"No plans for property {uprn}") - cheapest_plan = _choose_cheapest_relevant_plan( + cheapest_plan = choose_cheapest_relevant_plan( property_plans, plan_priority_order ) _update_default_flags(property_plans, cheapest_plan) +def choose_cheapest_relevant_plan( + plans: List[Plan], plan_priority_order: Optional[List[int]] = [] +) -> Plan: + plans_to_consider: List[Plan] = [p for p in plans if p.is_compliant] or plans + + def plan_cost(plan: Plan) -> float: + return ( + plan.record.cost_of_works + if plan.record.cost_of_works is not None + else float("inf") + ) + + cheapest_plan = min(plans_to_consider, key=plan_cost) + + return cheapest_plan + + def _load_plans_for_portfolio(portfolio_id: int) -> List[Plan]: plans: List[Plan] = [] @@ -68,23 +85,6 @@ def _group_plans_by_property(plans: List[Plan]) -> Dict[int, List[Plan]]: return grouped -def _choose_cheapest_relevant_plan( - plans: List[Plan], plan_priority_order: Optional[List[int]] = [] -) -> Plan: - plans_to_consider: List[Plan] = [p for p in plans if p.is_compliant] or plans - - def plan_cost(plan: Plan) -> float: - return ( - plan.record.cost_of_works - if plan.record.cost_of_works is not None - else float("inf") - ) - - cheapest_plan = min(plans_to_consider, key=plan_cost) - - return cheapest_plan - - def _update_default_flags(plans: List[Plan], cheapest_plan: Plan) -> None: plans_to_update: List[Plan] = [] diff --git a/backend/categorisation/tests/test_prioritised_plan_selected.py b/backend/categorisation/tests/test_prioritised_plan_selected.py index 03bca666..eb41194c 100644 --- a/backend/categorisation/tests/test_prioritised_plan_selected.py +++ b/backend/categorisation/tests/test_prioritised_plan_selected.py @@ -7,7 +7,7 @@ from backend.app.domain.classes.scenario import Scenario from backend.app.domain.records.plan_record import PlanRecord from backend.app.domain.records.scenario_record import ScenarioRecord from backend.app.db.models.portfolio import Epc, PortfolioGoal -from backend.categorisation.processor import _choose_cheapest_relevant_plan +from backend.categorisation.processor import choose_cheapest_relevant_plan @pytest.fixture @@ -79,7 +79,7 @@ def test_prioritised_plan_selected(created_at_datetime: datetime) -> None: expected_default_plan_id = 2 # act - actual_default_plan = _choose_cheapest_relevant_plan( + actual_default_plan = choose_cheapest_relevant_plan( plans=[epc_c_plan, minor_works_plan], plan_priority_order=plan_priority_order, ) From b916551921f3e5590fd1d7caf7270370348361de Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 18 Feb 2026 11:20:08 +0000 Subject: [PATCH 202/340] =?UTF-8?q?allow=20plan=20priority=20to=20be=20spe?= =?UTF-8?q?cified=20for=20plans=20with=20identical=20ouput=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/categorisation/processor.py | 37 ++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 6 deletions(-) diff --git a/backend/categorisation/processor.py b/backend/categorisation/processor.py index 02116d61..184ccac2 100644 --- a/backend/categorisation/processor.py +++ b/backend/categorisation/processor.py @@ -1,6 +1,8 @@ from collections import defaultdict from typing import Dict, List, Optional +from sqlalchemy import Tuple + from backend.app.db.functions.recommendations_functions import ( bulk_update_plans, get_plans_by_portfolio_id, @@ -33,20 +35,43 @@ def process_portfolio( def choose_cheapest_relevant_plan( - plans: List[Plan], plan_priority_order: Optional[List[int]] = [] + plans: List[Plan], plan_priority_order: Optional[List[int]] = None ) -> Plan: - plans_to_consider: List[Plan] = [p for p in plans if p.is_compliant] or plans + plan_priority_order = plan_priority_order or [] - def plan_cost(plan: Plan) -> float: - return ( + eligible_plans: List[Plan] = [plan for plan in plans if plan.is_compliant] or plans + if not eligible_plans: + raise ValueError("No plans available to choose from.") + + for plan in eligible_plans: + if plan.id is None: + # This should never actually happen, but plan.id is optional to cater + # for new plans. We are only working with already persisted plans here + raise ValueError( + f"All plans must have an ID, but found a plan with no ID: {plan}" + ) + + min_cost: float = min( + ( plan.record.cost_of_works if plan.record.cost_of_works is not None else float("inf") ) + for plan in eligible_plans + ) - cheapest_plan = min(plans_to_consider, key=plan_cost) + cheapest_plans: List[Plan] = [ + plan + for plan in eligible_plans + if (plan.record.cost_of_works or float("inf")) == min_cost + ] - return cheapest_plan + for priority_plan_id in plan_priority_order: + for plan in cheapest_plans: + if plan.id == priority_plan_id: + return plan + + return cheapest_plans[0] def _load_plans_for_portfolio(portfolio_id: int) -> List[Plan]: From bfb0d79da6c7d7e0f8e9465683c14e7141e0b1ea Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 18 Feb 2026 11:27:09 +0000 Subject: [PATCH 203/340] =?UTF-8?q?Cheapest=20compliant=20plan=20selected?= =?UTF-8?q?=20even=20when=20not=20in=20the=20priority=20list=20?= =?UTF-8?q?=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../tests/test_prioritised_plan_selected.py | 100 ++++++++++-------- 1 file changed, 53 insertions(+), 47 deletions(-) diff --git a/backend/categorisation/tests/test_prioritised_plan_selected.py b/backend/categorisation/tests/test_prioritised_plan_selected.py index eb41194c..5424dd5e 100644 --- a/backend/categorisation/tests/test_prioritised_plan_selected.py +++ b/backend/categorisation/tests/test_prioritised_plan_selected.py @@ -15,67 +15,73 @@ def created_at_datetime() -> datetime: return datetime.now() -@pytest.fixture -def identical_plan_record(created_at_datetime: datetime, default: bool) -> PlanRecord: +def make_plan_record( + created_at: datetime, default: bool, cost_of_works: float = 500.0 +) -> PlanRecord: return PlanRecord( property_id=1, portfolio_id=1, - created_at=created_at_datetime, + created_at=created_at, is_default=default, post_epc_rating=Epc.C, - cost_of_works=500.0, + cost_of_works=cost_of_works, ) -def make_plan_record(created_at_datetime: datetime, default: bool) -> PlanRecord: - return PlanRecord( - property_id=1, - portfolio_id=1, - created_at=created_at_datetime, - is_default=default, - post_epc_rating=Epc.C, - cost_of_works=500.0, +def make_scenario(name: str, created_at: datetime, is_default: bool) -> Scenario: + record = ScenarioRecord( + name=name, + created_at=created_at, + housing_type="", + goal=PortfolioGoal.INCREASING_EPC, + goal_value="C", + trigger_file_path="", + multi_plan=False, + is_default=is_default, + ) + return Scenario(record=record, id=1 if is_default else 2) + + +def make_plan( + created_at: datetime, default: bool, cost_of_works: float = 500.0, name: str = "" +) -> Plan: + scenario = make_scenario(name, created_at, default) + plan_id = 1 if default else 2 + return Plan( + record=make_plan_record(created_at, default, cost_of_works), + scenario=scenario, + id=plan_id, ) def test_prioritised_plan_selected(created_at_datetime: datetime) -> None: # arrange - epc_c_scenario_record = ScenarioRecord( - name="EPC C", - created_at=created_at_datetime, - housing_type="", - goal=PortfolioGoal.INCREASING_EPC, - goal_value="C", - trigger_file_path="", - multi_plan=False, - is_default=True, - ) - epc_c_scenario = Scenario(record=epc_c_scenario_record, id=1) - epc_c_plan = Plan( - record=make_plan_record(created_at_datetime, True), - scenario=epc_c_scenario, - id=1, - ) - - minor_works_scenario_record = ScenarioRecord( - name="EPC C - Minor Works", - created_at=created_at_datetime, - housing_type="", - goal=PortfolioGoal.INCREASING_EPC, - goal_value="C", - trigger_file_path="", - multi_plan=False, - is_default=False, - ) - minor_works_scenario = Scenario(record=minor_works_scenario_record, id=2) - minor_works_plan = Plan( - record=make_plan_record(created_at_datetime, False), - scenario=minor_works_scenario, - id=2, - ) - + epc_c_plan = make_plan(created_at_datetime, True, name="EPC C") + minor_works_plan = make_plan(created_at_datetime, False, name="EPC C - Minor Works") plan_priority_order: List[int] = [2, 1] - + expected_default_plan_id = 2 + + # act + actual_default_plan = choose_cheapest_relevant_plan( + plans=[epc_c_plan, minor_works_plan], + plan_priority_order=plan_priority_order, + ) + + # assert + assert actual_default_plan.id == expected_default_plan_id + + +def test_cheapest_plan_returned_if_not_in_priority_list( + created_at_datetime: datetime, +) -> None: + # arrange + epc_c_plan = make_plan( + created_at_datetime, True, cost_of_works=1000.0, name="EPC C" + ) + minor_works_plan = make_plan( + created_at_datetime, False, cost_of_works=100.0, name="EPC C - Minor Works" + ) + plan_priority_order: List[int] = [1, 3] expected_default_plan_id = 2 # act From cc901d999b538e1743a32a00ab474460f3fd7bc5 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 18 Feb 2026 11:54:55 +0000 Subject: [PATCH 204/340] option to only consider a specific list of plans --- .../db/functions/recommendations_functions.py | 7 +++++ .../categorisation_trigger_request.py | 3 +- backend/categorisation/handler/handler.py | 6 +++- backend/categorisation/processor.py | 30 +++++++++++++++---- 4 files changed, 38 insertions(+), 8 deletions(-) diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py index aa966fbb..d4c3fcb9 100644 --- a/backend/app/db/functions/recommendations_functions.py +++ b/backend/app/db/functions/recommendations_functions.py @@ -625,6 +625,13 @@ def get_plans_by_portfolio_id(portfolio_id: int) -> List[PlanModel]: return session_any.exec(stmt).scalars().all() +def get_plans_by_ids(ids: List[int]) -> List[PlanModel]: + stmt = select(PlanModel).where(PlanModel.id.in_(ids)) + with db_read_session() as session: + session_any: Any = session # Typehint as Any to satisfy Pylance... + return session_any.exec(stmt).scalars().all() + + def get_scenarios_by_portfolio_id(portfolio_id: int) -> List[ScenarioModel]: stmt = select(ScenarioModel).where(ScenarioModel.portfolio_id == portfolio_id) with db_read_session() as session: diff --git a/backend/categorisation/categorisation_trigger_request.py b/backend/categorisation/categorisation_trigger_request.py index aa2b8ed3..46ce6f1c 100644 --- a/backend/categorisation/categorisation_trigger_request.py +++ b/backend/categorisation/categorisation_trigger_request.py @@ -5,4 +5,5 @@ from pydantic import BaseModel class CategorisationTriggerRequest(BaseModel): portfolio_id: int - plan_priority_order: Optional[List[int]] + plans_to_consider: Optional[List[int]] = None + plan_priority_order: Optional[List[int]] = None diff --git a/backend/categorisation/handler/handler.py b/backend/categorisation/handler/handler.py index 20076613..449c5ccf 100644 --- a/backend/categorisation/handler/handler.py +++ b/backend/categorisation/handler/handler.py @@ -20,7 +20,11 @@ def handler(event: Mapping[str, Any], context: Any) -> None: logger.debug("Successfully validated request body") - process_portfolio(payload.portfolio_id) + process_portfolio( + payload.portfolio_id, + payload.plans_to_consider, + payload.plan_priority_order, + ) except Exception as e: logger.error(f"Failed to process record: {e}") diff --git a/backend/categorisation/processor.py b/backend/categorisation/processor.py index 184ccac2..b7ddfc62 100644 --- a/backend/categorisation/processor.py +++ b/backend/categorisation/processor.py @@ -1,10 +1,9 @@ from collections import defaultdict from typing import Dict, List, Optional -from sqlalchemy import Tuple - from backend.app.db.functions.recommendations_functions import ( bulk_update_plans, + get_plans_by_ids, get_plans_by_portfolio_id, get_scenarios_by_portfolio_id, ) @@ -17,10 +16,14 @@ logger = setup_logger() def process_portfolio( - portfolio_id: int, plan_priority_order: Optional[List[int]] = [] + portfolio_id: int, + plans_to_consider: Optional[List[int]] = None, + plan_priority_order: Optional[List[int]] = None, ) -> None: logger.info(f"Processing portfolio {portfolio_id}") - plans: List[Plan] = _load_plans_for_portfolio(portfolio_id) + + plans: List[Plan] = _load_plans_for_portfolio(portfolio_id, plans_to_consider) + plans_by_property: Dict[int, List[Plan]] = _group_plans_by_property(plans) for uprn, property_plans in plans_by_property.items(): @@ -74,10 +77,25 @@ def choose_cheapest_relevant_plan( return cheapest_plans[0] -def _load_plans_for_portfolio(portfolio_id: int) -> List[Plan]: +def _load_plans_for_portfolio( + portfolio_id: int, plans_to_consider: Optional[List[int]] = None +) -> List[Plan]: + + if plans_to_consider: + if len(plans_to_consider) < 2: + raise ValueError("Cannot run auto categorisation for fewer than 2 plans") + + logger.info(f"Getting {len(plans_to_consider)} Plans") + plan_models: List[PlanModel] = get_plans_by_ids(plans_to_consider) + + else: + logger.info( + f"No list of Plans to consider provided. Getting all Plans for portfolio {portfolio_id}" + ) + plan_models: List[PlanModel] = get_plans_by_portfolio_id(portfolio_id) + plans: List[Plan] = [] - plan_models = get_plans_by_portfolio_id(portfolio_id) scenarios: List[ScenarioModel] = get_scenarios_by_portfolio_id(portfolio_id) if not scenarios: From 8e22ced679b2f940c2dfe98f5815b2cd1673fa49 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 18 Feb 2026 12:03:37 +0000 Subject: [PATCH 205/340] intial impelmentation of strategic optimiser with tests --- asset_list/app.py | 69 +- backend/engine/engine.py | 22 + recommendations/optimiser/CostOptimiser.py | 34 +- recommendations/optimiser/GainOptimiser.py | 9 +- .../optimiser/StrategicOptimiser.py | 175 +++++ .../optimiser/funding_optimiser.py | 1 + .../tests/test_optimiser_functions.py | 726 ++++++++++++++++++ 7 files changed, 975 insertions(+), 61 deletions(-) create mode 100644 recommendations/optimiser/StrategicOptimiser.py diff --git a/asset_list/app.py b/asset_list/app.py index b9c6bcf0..773c07b0 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -73,61 +73,24 @@ def app(): Property UPRN """ - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/West Kent" - data_filename = "West Kent Asset List.xlsx" + data_folder = "/Users/khalimconn-kowlessar/Downloads" + data_filename = "assests.xlsx" sheet_name = "Sheet1" - postcode_column = "POSTCODE" - address1_column = None + postcode_column = "Postcode" + address1_column = "Address" address1_method = "house_number_extraction" - fulladdress_column = "ADDRESS" - address_cols_to_concat = [] + fulladdress_column = None + address_cols_to_concat = ["Address"] missing_postcodes_method = None landlord_year_built = None - landlord_os_uprn = None - landlord_property_type = "PROPERTY TYPE" - landlord_built_form = None - landlord_wall_construction = "wall combined" - landlord_roof_construction = "HEATING SYSTEM" - landlord_heating_system = None + landlord_os_uprn = "UPRN" + landlord_property_type = "Archetype" + landlord_built_form = "Bedroom Count" + landlord_wall_construction = "Wall Insulation Type" + landlord_roof_construction = "Roof Type" + landlord_heating_system = "Boiler Type" landlord_existing_pv = None - landlord_property_id = "UPRN" - landlord_sap = None - outcomes_filename = None - outcomes_sheetname = None - outcomes_postcode = None - outcomes_houseno = None - outcomes_id = None - outcomes_address = None - master_filepaths = [] - master_id_colnames = [] - master_to_asset_list_filepath = None - phase = False - ecosurv_landlords = None - asset_list_header = 0 - landlord_block_reference = None - - # Peabody data for cleaning - data_folder = ( - "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting " - "Project/data_validation" - ) - data_filename = "to_standardise_uprns.xlsx" - sheet_name = "Sheet1" - postcode_column = "POSTCODE" - address1_column = None - address1_method = "house_number_extraction" - fulladdress_column = "ADDRESS" - address_cols_to_concat = [] - missing_postcodes_method = None - landlord_year_built = None - landlord_os_uprn = None - landlord_property_type = "PROPERTY TYPE" - landlord_built_form = None # Skipped as empty - landlord_wall_construction = "wall combined" # combin F + G - landlord_roof_construction = "HEATING SYSTEM" # Combine I + J - landlord_heating_system = None # Check with Khalim - landlord_existing_pv = None - landlord_property_id = "UPRN" + landlord_property_id = "Tab" landlord_sap = None outcomes_filename = None outcomes_sheetname = None @@ -279,7 +242,7 @@ def app(): if skip is not None and not force_retrieve_data: if i <= skip: continue - chunk = asset_list.standardised_asset_list[i : i + chunk_size] + chunk = asset_list.standardised_asset_list[i: i + chunk_size] epc_data_chunk, errors_chunk, no_epc_chunk = get_data( df=chunk, row_id_name=asset_list.DOMNA_PROPERTY_ID, @@ -422,7 +385,7 @@ def app(): # Retrieve just the data we need epc_df = epc_df[ [asset_list.DOMNA_PROPERTY_ID] + list(asset_list.EPC_API_DATA_NAMES.keys()) - ].rename(columns=asset_list.EPC_API_DATA_NAMES) + ].rename(columns=asset_list.EPC_API_DATA_NAMES) # Look for columns not in the find my EPC data, which will have happened if we didn't # retrieve it in the first place @@ -439,7 +402,7 @@ def app(): find_my_epc_data[ [asset_list.DOMNA_PROPERTY_ID, "epc_has_floor_recommendation"] + list(asset_list.FIND_EPC_DATA_NAMES.keys()) - ].rename(columns=asset_list.FIND_EPC_DATA_NAMES), + ].rename(columns=asset_list.FIND_EPC_DATA_NAMES), how="left", on=asset_list.DOMNA_PROPERTY_ID, ) diff --git a/backend/engine/engine.py b/backend/engine/engine.py index 80d6d078..f8b25352 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -1118,6 +1118,28 @@ async def model_engine(body: PlanTriggerRequest): # When the goal is Increasing EPC, we can run the funding optimiser if body.goal == "Increasing EPC": + solutions_no_budget = optimise_with_scenarios( + p=p, + input_measures=input_measures, + budget=None, + target_gain=gain, + enforce_heat_pump_insulation=True, + enforce_fabric_first=body.enforce_fabric_first, + already_installed_sap=already_installed_sap, # To be passed to output + ) + solutions_no_budget["total_cost"] + + solutions_with_budget = optimise_with_scenarios( + p=p, + input_measures=input_measures, + budget=5000, + target_gain=gain, + enforce_heat_pump_insulation=True, + enforce_fabric_first=body.enforce_fabric_first, + already_installed_sap=already_installed_sap, # To be passed to output + ) + solutions_with_budget["total_cost"] + solutions = optimise_with_scenarios( p=p, input_measures=input_measures, diff --git a/recommendations/optimiser/CostOptimiser.py b/recommendations/optimiser/CostOptimiser.py index 8f030123..32a869b2 100644 --- a/recommendations/optimiser/CostOptimiser.py +++ b/recommendations/optimiser/CostOptimiser.py @@ -12,13 +12,16 @@ class CostOptimiser: # We add an optional buffer to the minimum gain to allow for slack in the optimisation BUFFER = 0.2 - def __init__(self, components, min_gain, verbose=False): + def __init__( + self, components, min_gain, verbose=False, allow_slack=True + ): self.components = components self.min_gain = min_gain self.gain_constraint = None self.m = None self.variables = [] self.solution = [] + self.allow_slack = allow_slack self.solution_cost = None self.solution_gain = None @@ -81,6 +84,20 @@ class CostOptimiser: for group_vars in self.variables: self.m += xsum(var for var in group_vars) <= 1 + def add_budget_constraint(self, budget: int | float) -> None: + # Inject budget constraint, which ensures that sum of cost_ig * x_ig <= budget, where cost_ig represents the + # cost for the ith component in group g, and x_ig is the binary decision variable for the ith component in + # group g + + self.m += ( + xsum( + item["cost"] * var + for group, group_vars in zip(self.components, self.variables) + for item, var in zip(group, group_vars) + ) + <= budget + ) + def setup_slack(self): # Remove the original gain constraint @@ -109,10 +126,17 @@ class CostOptimiser: self.m.optimize() if self.m.status == OptimizationStatus.INFEASIBLE: - # Turn off logging - too noisy - # logger.info("We have an infeasible model, setting up slack model") - self.setup_slack() - self.m.optimize() + if self.allow_slack: + self.setup_slack() + self.m.optimize() + else: + # Explicity return an empty solution + self.solution = [] + self.solution_cost = 0 + self.solution_gain = 0 + return + + # If we still have an infeasible solution, we return an empty solution self.solution = [ item for group, group_vars in zip(self.components, self.variables) for item, var in zip(group, group_vars) diff --git a/recommendations/optimiser/GainOptimiser.py b/recommendations/optimiser/GainOptimiser.py index 6b757bf1..5dbf1dc5 100644 --- a/recommendations/optimiser/GainOptimiser.py +++ b/recommendations/optimiser/GainOptimiser.py @@ -21,8 +21,8 @@ class GainOptimiser: :param components: List of components, where each component is a dictionary with keys "id", "cost" and "gain" :param max_cost: Maximum cost constraint :param max_gain: Maximum gain constraint - :param allow_slack: If True, allows the model to use slack variables to relax the cost constraint if the model - is infeasible. Defaults to True. + :param allow_slack: If True, and the solution is infeasible, allows the model to use slack variables to relax + the cost constraint if the model. Defaults to True. :param verbose: If True, enables verbose logging """ self.components = components @@ -86,6 +86,9 @@ class GainOptimiser: for group_vars in self.variables: self.m += xsum(var for var in group_vars) <= 1 + self.m.max_gap = 0 + self.m.integer_tol = 1e-9 + def setup_slack(self): # Remove the original cost constraint self.m.remove(self.cost_constraint) @@ -148,5 +151,5 @@ class GainOptimiser: self.solution = solution - self.solution_gain = self.m.objective.x + self.solution_gain = sum(component['gain'] for component in self.solution) self.solution_cost = sum([component['cost'] for component in self.solution]) diff --git a/recommendations/optimiser/StrategicOptimiser.py b/recommendations/optimiser/StrategicOptimiser.py new file mode 100644 index 00000000..b75268eb --- /dev/null +++ b/recommendations/optimiser/StrategicOptimiser.py @@ -0,0 +1,175 @@ +from enum import Enum +from mip import OptimizationStatus +from typing import Sequence, Optional, TypedDict, List +from recommendations.optimiser.CostOptimiser import CostOptimiser +from recommendations.optimiser.GainOptimiser import GainOptimiser + + +class Measure(TypedDict): + id: str + cost: float + gain: float + + +class Strategies(Enum): + CASE_1_TRY_MIN_COST_WITH_CONSTRAINTS = "case_1_try_min_cost_with_constraints" + CASE_1_SOLVE_MAX_GAIN_UNDER_BUDGET = "case_1_solve_max_gain_under_budget" + CASE_2_SOLVE_MAX_GAIN_UNDER_BUDGET = "case_2_solve_max_gain_under_budget" + CASE_3_SOLVE_MIN_COST_FOR_TARGET = "case_3_solve_min_cost_for_target" + + +class StrategicOptimiser: + """ + Domain-level optimiser implementing logical optimisation logic. + + Behaviour: + + 1) If both budget and target_gain are provided: + - Minimise cost subject to: + gain >= target_gain + cost <= budget + - If infeasible: + maximise gain subject to cost <= budget + + 2) If only budget is provided: + - Maximise gain under budget + + 3) If only target_gain is provided: + - Minimise cost to achieve gain + + """ + + def __init__( + self, + components: Sequence[Sequence[Measure]], + budget: Optional[float] = None, + target_gain: Optional[float] = None, + verbose: bool = False, + ) -> None: + + if not components: + raise ValueError("Components cannot be empty.") + + if budget is None and target_gain is None: + raise ValueError("At least one of budget or target_gain must be provided.") + + self.components = components + self.budget = budget + self.target_gain = target_gain + self.verbose = verbose + + self.solution: List[Measure] = [] + self.solution_cost: float = 0.0 + self.solution_gain: float = 0.0 + + # For debugging purposes, we keep a record of which option was selected + self.strategy_used: Optional[Strategies] = None + + def solve(self) -> None: + """ + Primary entry point for solving the optimisation problem based on the provided budget and target gain. + :return: + """ + + # Case 1: budget + target + if self.budget is not None and self.target_gain is not None: + # Given: + # Budget B + # Target gain G + # + # We want the solution to: + # + # Primary problem (P1) + # min cost + # subject to + # + # gain >= 𝐺 + # cost <= B + # multiple-choice constraints + # + # If (P1) is feasible → that solution is exactly what you want. + # If (P1) is infeasible → solve the following problem (P2): + # + # max gain + # subject to + # + # cost <= B + if self._try_min_cost_with_constraints(): + # Keep a record of the strategy used to solve the problem, for debugging purposes + self.strategy_used = Strategies.CASE_1_TRY_MIN_COST_WITH_CONSTRAINTS + return + self._solve_max_gain_under_budget() + self.strategy_used = Strategies.CASE_1_SOLVE_MAX_GAIN_UNDER_BUDGET + return + + # Case 2: budget only + if self.budget is not None: + self._solve_max_gain_under_budget() + self.strategy_used = Strategies.CASE_2_SOLVE_MAX_GAIN_UNDER_BUDGET + return + + # Case 3: target only + self._solve_min_cost_for_target() + self.strategy_used = Strategies.CASE_3_SOLVE_MIN_COST_FOR_TARGET + return + + # --------------------------------------------------------- + # Internal Functions + # --------------------------------------------------------- + + def _try_min_cost_with_constraints(self) -> bool: + """ + Try to minimise cost while satisfying: + gain >= target_gain + cost <= budget + """ + + opt = CostOptimiser( + self.components, + min_gain=self.target_gain, + verbose=self.verbose, + allow_slack=False + ) + + opt.setup() + opt.add_budget_constraint(self.budget) + opt.solve() + + if opt.m.status == OptimizationStatus.INFEASIBLE: + return False + + self._store_solution(opt.solution) + return True + + def _solve_max_gain_under_budget(self) -> None: + + opt = GainOptimiser( + self.components, + max_cost=self.budget, + max_gain=None, + allow_slack=False, + verbose=self.verbose + ) + + opt.setup() + opt.solve() + + self._store_solution(opt.solution) + + def _solve_min_cost_for_target(self) -> None: + + opt = CostOptimiser( + self.components, + min_gain=self.target_gain, + verbose=self.verbose + ) + + opt.setup() + opt.solve() + + self._store_solution(opt.solution) + + def _store_solution(self, solution: List[Measure]) -> None: + self.solution = solution + self.solution_cost = sum(m["cost"] for m in solution) + self.solution_gain = sum(m["gain"] for m in solution) diff --git a/recommendations/optimiser/funding_optimiser.py b/recommendations/optimiser/funding_optimiser.py index 6afe7d78..aaf97226 100644 --- a/recommendations/optimiser/funding_optimiser.py +++ b/recommendations/optimiser/funding_optimiser.py @@ -1119,6 +1119,7 @@ def run_optimizer( return None, 0.0, 0.0 if budget is not None: + opt = GainOptimiser( input_measures, max_cost=budget, max_gain=0 if sub_target_gain == 0 else (sub_target_gain or float("inf")), allow_slack=allow_slack diff --git a/recommendations/tests/test_optimiser_functions.py b/recommendations/tests/test_optimiser_functions.py index c2927790..ca2a0dcb 100644 --- a/recommendations/tests/test_optimiser_functions.py +++ b/recommendations/tests/test_optimiser_functions.py @@ -5,6 +5,7 @@ from recommendations.tests.test_data.measures_to_optimise import measures_to_opt from recommendations.optimiser import optimiser_functions from recommendations.optimiser.GainOptimiser import GainOptimiser from recommendations.optimiser.CostOptimiser import CostOptimiser +from recommendations.optimiser.StrategicOptimiser import StrategicOptimiser, Strategies class TestPrepareInputMeasures: @@ -287,3 +288,728 @@ class TestIncreasingEpcE2e: # We don't add ventilation as major insulation work isn't done ventilation_added = any(rec["recommendation_id"] == "3_phase=2" and rec["default"] for rec in flattened) assert not ventilation_added, "Ventilation should not be added without major insulation work" + + +class TestStrategicOptimiser: + def test_budget_and_target_gain(self): + components = [ + [ + {'id': '0_phase=0', 'cost': 819.0, 'gain': 5.6, 'type': 'loft_insulation', 'innovation_uplift': 0, + 'cost_minus_uplift': 819.0, 'raw_cost': 819.0, 'partial_project_funding': 0, + 'partial_project_score': 0, 'uplift_project_score': 0, 'already_installed': False, + 'has_battery': False, 'array_size': 0}, + {'id': '1_phase=0', 'cost': 702.0, 'gain': 5.6, 'type': 'loft_insulation', 'innovation_uplift': 0, + 'cost_minus_uplift': 702.0, 'raw_cost': 702.0, 'partial_project_funding': 0, + 'partial_project_score': 0, 'uplift_project_score': 0, 'already_installed': False, + 'has_battery': False, 'array_size': 0}, + {'id': '2_phase=0', 'cost': 585.0, 'gain': 5.6, 'type': 'loft_insulation', 'innovation_uplift': 0, + 'cost_minus_uplift': 585.0, 'raw_cost': 585.0, 'partial_project_funding': 0, + 'partial_project_score': 0, 'uplift_project_score': 0, 'already_installed': False, + 'has_battery': False, 'array_size': 0}], + [{'id': '4_phase=2', 'cost': 3656.25, 'gain': 2.0, 'type': 'suspended_floor_insulation', + 'innovation_uplift': 0, 'cost_minus_uplift': 3656.25, 'raw_cost': 3656.25, 'partial_project_funding': 0, + 'partial_project_score': 0, 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, + 'array_size': 0}], + [{'id': '5_phase=3', 'cost': 17.5, 'gain': 1.0, 'type': 'low_energy_lighting', 'innovation_uplift': 0, + 'cost_minus_uplift': 17.5, 'raw_cost': 17.5, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 0}], + [{'id': '6_phase=4', 'cost': 140, 'gain': 3.4, 'type': 'roomstat_programmer_trvs', 'innovation_uplift': 0, + 'cost_minus_uplift': 140, 'raw_cost': 140, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 0}, + {'id': '7_phase=4', 'cost': 874.5680000000001, 'gain': 4.2, 'type': 'time_temperature_zone_control', + 'innovation_uplift': 0, 'cost_minus_uplift': 874.5680000000001, 'raw_cost': 874.5680000000001, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': False, 'array_size': 0}], + [{'id': '9_phase=6', 'cost': 5420.0, 'gain': 13.2, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5420.0, 'raw_cost': 5420.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.6}, + {'id': '10_phase=6', 'cost': 6210.0, 'gain': 16.2, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6210.0, 'raw_cost': 6210.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.6, + 'battery_gain': 3}, + {'id': '11_phase=6', 'cost': 6820.0, 'gain': 16.2, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6820.0, 'raw_cost': 6820.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.6, + 'battery_gain': 3}, + {'id': '12_phase=6', 'cost': 7202.0, 'gain': 14.5, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 7202.0, 'raw_cost': 7202.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.915}, + {'id': '13_phase=6', 'cost': 6495.0, 'gain': 14.5, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6495.0, 'raw_cost': 6495.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.92}, + {'id': '14_phase=6', 'cost': 7285.0, 'gain': 17.5, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 7285.0, 'raw_cost': 7285.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.92, + 'battery_gain': 3}, + {'id': '15_phase=6', 'cost': 7895.0, 'gain': 17.5, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 7895.0, 'raw_cost': 7895.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.92, + 'battery_gain': 3}, + {'id': '16_phase=6', 'cost': 5520.0, 'gain': 15.0, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5520.0, 'raw_cost': 5520.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 4.0}, + {'id': '17_phase=6', 'cost': 6310.0, 'gain': 18.0, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6310.0, 'raw_cost': 6310.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 4.0, + 'battery_gain': 3}, + {'id': '18_phase=6', 'cost': 6920.0, 'gain': 18.0, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6920.0, 'raw_cost': 6920.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 4.0, + 'battery_gain': 3}, + {'id': '19_phase=6', 'cost': 5320.0, 'gain': 12.1, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5320.0, 'raw_cost': 5320.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.2}, + {'id': '20_phase=6', 'cost': 6110.0, 'gain': 14.1, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6110.0, 'raw_cost': 6110.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.2, + 'battery_gain': 2}, + {'id': '21_phase=6', 'cost': 6720.0, 'gain': 14.1, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6720.0, 'raw_cost': 6720.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.2, + 'battery_gain': 2}, + {'id': '22_phase=6', 'cost': 6932.0, 'gain': 13.2, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6932.0, 'raw_cost': 6932.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.48}, + {'id': '23_phase=6', 'cost': 6295.0, 'gain': 13.2, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6295.0, 'raw_cost': 6295.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.48}, + {'id': '24_phase=6', 'cost': 7085.0, 'gain': 16.2, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 7085.0, 'raw_cost': 7085.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.48, + 'battery_gain': 3}, + {'id': '25_phase=6', 'cost': 7695.0, 'gain': 16.2, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 7695.0, 'raw_cost': 7695.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.48, + 'battery_gain': 3}, + {'id': '26_phase=6', 'cost': 5220.0, 'gain': 10.2, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5220.0, 'raw_cost': 5220.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.8}, + {'id': '27_phase=6', 'cost': 6662.0, 'gain': 12.3, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6662.0, 'raw_cost': 6662.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.045}, + {'id': '28_phase=6', 'cost': 6095.0, 'gain': 12.3, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6095.0, 'raw_cost': 6095.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.05}, + {'id': '29_phase=6', 'cost': 5160.0, 'gain': 9.0, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5160.0, 'raw_cost': 5160.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.4}, + {'id': '30_phase=6', 'cost': 6392.0, 'gain': 10.2, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6392.0, 'raw_cost': 6392.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.61}, + {'id': '31_phase=6', 'cost': 5910.0, 'gain': 10.2, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5910.0, 'raw_cost': 5910.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.61}, + {'id': '32_phase=6', 'cost': 5100.0, 'gain': 8.0, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5100.0, 'raw_cost': 5100.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.0}, + {'id': '33_phase=6', 'cost': 6098.0, 'gain': 8.0, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6098.0, 'raw_cost': 6098.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.175}, + {'id': '34_phase=6', 'cost': 5725.0, 'gain': 8.0, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5725.0, 'raw_cost': 5725.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.18}, + {'id': '35_phase=6', 'cost': 5040.0, 'gain': 6.0, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5040.0, 'raw_cost': 5040.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 1.6}, + {'id': '36_phase=6', 'cost': 5828.0, 'gain': 7.0, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5828.0, 'raw_cost': 5828.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 1.74}, + {'id': '37_phase=6', 'cost': 5540.0, 'gain': 7.0, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5540.0, 'raw_cost': 5540.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 1.74} + ] + ] + budget = 5000 + target_gain = 11.5 + + opt = StrategicOptimiser( + components=components, + target_gain=target_gain, + budget=budget, + ) + + opt.solve() + + # check strategy used + assert opt.strategy_used.value == "case_1_try_min_cost_with_constraints" + # Check the solution values + assert opt.solution_cost == 4398.75 + assert opt.solution_gain == 12 + + def test_budget_and_target_gain_2(self): + components = [ + [ + {'id': '0_phase=0', 'cost': 819.0, 'gain': 5.6, 'type': 'loft_insulation', 'innovation_uplift': 0, + 'cost_minus_uplift': 819.0, 'raw_cost': 819.0, 'partial_project_funding': 0, + 'partial_project_score': 0, 'uplift_project_score': 0, 'already_installed': False, + 'has_battery': False, 'array_size': 0}, + {'id': '1_phase=0', 'cost': 702.0, 'gain': 5.6, 'type': 'loft_insulation', 'innovation_uplift': 0, + 'cost_minus_uplift': 702.0, 'raw_cost': 702.0, 'partial_project_funding': 0, + 'partial_project_score': 0, 'uplift_project_score': 0, 'already_installed': False, + 'has_battery': False, 'array_size': 0}, + {'id': '2_phase=0', 'cost': 585.0, 'gain': 5.6, 'type': 'loft_insulation', 'innovation_uplift': 0, + 'cost_minus_uplift': 585.0, 'raw_cost': 585.0, 'partial_project_funding': 0, + 'partial_project_score': 0, 'uplift_project_score': 0, 'already_installed': False, + 'has_battery': False, 'array_size': 0}], + [{'id': '4_phase=2', 'cost': 3656.25, 'gain': 2.0, 'type': 'suspended_floor_insulation', + 'innovation_uplift': 0, 'cost_minus_uplift': 3656.25, 'raw_cost': 3656.25, 'partial_project_funding': 0, + 'partial_project_score': 0, 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, + 'array_size': 0}], + [{'id': '5_phase=3', 'cost': 17.5, 'gain': 1.0, 'type': 'low_energy_lighting', 'innovation_uplift': 0, + 'cost_minus_uplift': 17.5, 'raw_cost': 17.5, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 0}], + [{'id': '6_phase=4', 'cost': 140, 'gain': 3.4, 'type': 'roomstat_programmer_trvs', 'innovation_uplift': 0, + 'cost_minus_uplift': 140, 'raw_cost': 140, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 0}, + {'id': '7_phase=4', 'cost': 874.5680000000001, 'gain': 4.2, 'type': 'time_temperature_zone_control', + 'innovation_uplift': 0, 'cost_minus_uplift': 874.5680000000001, 'raw_cost': 874.5680000000001, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': False, 'array_size': 0}], + [{'id': '9_phase=6', 'cost': 5420.0, 'gain': 13.2, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5420.0, 'raw_cost': 5420.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.6}, + {'id': '10_phase=6', 'cost': 6210.0, 'gain': 16.2, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6210.0, 'raw_cost': 6210.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.6, + 'battery_gain': 3}, + {'id': '11_phase=6', 'cost': 6820.0, 'gain': 16.2, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6820.0, 'raw_cost': 6820.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.6, + 'battery_gain': 3}, + {'id': '12_phase=6', 'cost': 7202.0, 'gain': 14.5, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 7202.0, 'raw_cost': 7202.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.915}, + {'id': '13_phase=6', 'cost': 6495.0, 'gain': 14.5, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6495.0, 'raw_cost': 6495.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.92}, + {'id': '14_phase=6', 'cost': 7285.0, 'gain': 17.5, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 7285.0, 'raw_cost': 7285.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.92, + 'battery_gain': 3}, + {'id': '15_phase=6', 'cost': 7895.0, 'gain': 17.5, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 7895.0, 'raw_cost': 7895.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.92, + 'battery_gain': 3}, + {'id': '16_phase=6', 'cost': 5520.0, 'gain': 15.0, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5520.0, 'raw_cost': 5520.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 4.0}, + {'id': '17_phase=6', 'cost': 6310.0, 'gain': 18.0, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6310.0, 'raw_cost': 6310.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 4.0, + 'battery_gain': 3}, + {'id': '18_phase=6', 'cost': 6920.0, 'gain': 18.0, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6920.0, 'raw_cost': 6920.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 4.0, + 'battery_gain': 3}, + {'id': '19_phase=6', 'cost': 5320.0, 'gain': 12.1, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5320.0, 'raw_cost': 5320.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.2}, + {'id': '20_phase=6', 'cost': 6110.0, 'gain': 14.1, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6110.0, 'raw_cost': 6110.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.2, + 'battery_gain': 2}, + {'id': '21_phase=6', 'cost': 6720.0, 'gain': 14.1, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6720.0, 'raw_cost': 6720.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.2, + 'battery_gain': 2}, + {'id': '22_phase=6', 'cost': 6932.0, 'gain': 13.2, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6932.0, 'raw_cost': 6932.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.48}, + {'id': '23_phase=6', 'cost': 6295.0, 'gain': 13.2, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6295.0, 'raw_cost': 6295.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.48}, + {'id': '24_phase=6', 'cost': 7085.0, 'gain': 16.2, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 7085.0, 'raw_cost': 7085.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.48, + 'battery_gain': 3}, + {'id': '25_phase=6', 'cost': 7695.0, 'gain': 16.2, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 7695.0, 'raw_cost': 7695.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.48, + 'battery_gain': 3}, + {'id': '26_phase=6', 'cost': 5220.0, 'gain': 10.2, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5220.0, 'raw_cost': 5220.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.8}, + {'id': '27_phase=6', 'cost': 6662.0, 'gain': 12.3, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6662.0, 'raw_cost': 6662.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.045}, + {'id': '28_phase=6', 'cost': 6095.0, 'gain': 12.3, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6095.0, 'raw_cost': 6095.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.05}, + {'id': '29_phase=6', 'cost': 5160.0, 'gain': 9.0, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5160.0, 'raw_cost': 5160.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.4}, + {'id': '30_phase=6', 'cost': 6392.0, 'gain': 10.2, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6392.0, 'raw_cost': 6392.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.61}, + {'id': '31_phase=6', 'cost': 5910.0, 'gain': 10.2, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5910.0, 'raw_cost': 5910.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.61}, + {'id': '32_phase=6', 'cost': 5100.0, 'gain': 8.0, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5100.0, 'raw_cost': 5100.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.0}, + {'id': '33_phase=6', 'cost': 6098.0, 'gain': 8.0, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6098.0, 'raw_cost': 6098.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.175}, + {'id': '34_phase=6', 'cost': 5725.0, 'gain': 8.0, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5725.0, 'raw_cost': 5725.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.18}, + {'id': '35_phase=6', 'cost': 5040.0, 'gain': 6.0, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5040.0, 'raw_cost': 5040.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 1.6}, + {'id': '36_phase=6', 'cost': 5828.0, 'gain': 7.0, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5828.0, 'raw_cost': 5828.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 1.74}, + {'id': '37_phase=6', 'cost': 5540.0, 'gain': 7.0, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5540.0, 'raw_cost': 5540.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 1.74} + ] + ] + budget = 4000 + target_gain = 11.5 + + opt = StrategicOptimiser( + components=components, + target_gain=target_gain, + budget=budget, + ) + + opt.solve() + + # We expect to use case 1, but we won't be able to meet the target gain, so we should get the best solution + # possible within the budget. We end up with an infeasible solution when we try + # case_1_try_min_cost_with_constraints + assert opt.strategy_used.value == "case_1_solve_max_gain_under_budget" + assert opt.solution_cost == 1477.0680000000002 + assert opt.solution_gain == 10.8 + + def test_just_gain(self): + components = [ + [ + {'id': '0_phase=0', 'cost': 819.0, 'gain': 5.6, 'type': 'loft_insulation', 'innovation_uplift': 0, + 'cost_minus_uplift': 819.0, 'raw_cost': 819.0, 'partial_project_funding': 0, + 'partial_project_score': 0, 'uplift_project_score': 0, 'already_installed': False, + 'has_battery': False, 'array_size': 0}, + {'id': '1_phase=0', 'cost': 702.0, 'gain': 5.6, 'type': 'loft_insulation', 'innovation_uplift': 0, + 'cost_minus_uplift': 702.0, 'raw_cost': 702.0, 'partial_project_funding': 0, + 'partial_project_score': 0, 'uplift_project_score': 0, 'already_installed': False, + 'has_battery': False, 'array_size': 0}, + {'id': '2_phase=0', 'cost': 585.0, 'gain': 5.6, 'type': 'loft_insulation', 'innovation_uplift': 0, + 'cost_minus_uplift': 585.0, 'raw_cost': 585.0, 'partial_project_funding': 0, + 'partial_project_score': 0, 'uplift_project_score': 0, 'already_installed': False, + 'has_battery': False, 'array_size': 0}], + [{'id': '4_phase=2', 'cost': 3656.25, 'gain': 2.0, 'type': 'suspended_floor_insulation', + 'innovation_uplift': 0, 'cost_minus_uplift': 3656.25, 'raw_cost': 3656.25, 'partial_project_funding': 0, + 'partial_project_score': 0, 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, + 'array_size': 0}], + [{'id': '5_phase=3', 'cost': 17.5, 'gain': 1.0, 'type': 'low_energy_lighting', 'innovation_uplift': 0, + 'cost_minus_uplift': 17.5, 'raw_cost': 17.5, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 0}], + [{'id': '6_phase=4', 'cost': 140, 'gain': 3.4, 'type': 'roomstat_programmer_trvs', 'innovation_uplift': 0, + 'cost_minus_uplift': 140, 'raw_cost': 140, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 0}, + {'id': '7_phase=4', 'cost': 874.5680000000001, 'gain': 4.2, 'type': 'time_temperature_zone_control', + 'innovation_uplift': 0, 'cost_minus_uplift': 874.5680000000001, 'raw_cost': 874.5680000000001, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': False, 'array_size': 0}], + [{'id': '9_phase=6', 'cost': 5420.0, 'gain': 13.2, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5420.0, 'raw_cost': 5420.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.6}, + {'id': '10_phase=6', 'cost': 6210.0, 'gain': 16.2, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6210.0, 'raw_cost': 6210.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.6, + 'battery_gain': 3}, + {'id': '11_phase=6', 'cost': 6820.0, 'gain': 16.2, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6820.0, 'raw_cost': 6820.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.6, + 'battery_gain': 3}, + {'id': '12_phase=6', 'cost': 7202.0, 'gain': 14.5, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 7202.0, 'raw_cost': 7202.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.915}, + {'id': '13_phase=6', 'cost': 6495.0, 'gain': 14.5, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6495.0, 'raw_cost': 6495.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.92}, + {'id': '14_phase=6', 'cost': 7285.0, 'gain': 17.5, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 7285.0, 'raw_cost': 7285.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.92, + 'battery_gain': 3}, + {'id': '15_phase=6', 'cost': 7895.0, 'gain': 17.5, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 7895.0, 'raw_cost': 7895.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.92, + 'battery_gain': 3}, + {'id': '16_phase=6', 'cost': 5520.0, 'gain': 15.0, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5520.0, 'raw_cost': 5520.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 4.0}, + {'id': '17_phase=6', 'cost': 6310.0, 'gain': 18.0, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6310.0, 'raw_cost': 6310.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 4.0, + 'battery_gain': 3}, + {'id': '18_phase=6', 'cost': 6920.0, 'gain': 18.0, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6920.0, 'raw_cost': 6920.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 4.0, + 'battery_gain': 3}, + {'id': '19_phase=6', 'cost': 5320.0, 'gain': 12.1, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5320.0, 'raw_cost': 5320.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.2}, + {'id': '20_phase=6', 'cost': 6110.0, 'gain': 14.1, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6110.0, 'raw_cost': 6110.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.2, + 'battery_gain': 2}, + {'id': '21_phase=6', 'cost': 6720.0, 'gain': 14.1, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6720.0, 'raw_cost': 6720.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.2, + 'battery_gain': 2}, + {'id': '22_phase=6', 'cost': 6932.0, 'gain': 13.2, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6932.0, 'raw_cost': 6932.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.48}, + {'id': '23_phase=6', 'cost': 6295.0, 'gain': 13.2, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6295.0, 'raw_cost': 6295.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.48}, + {'id': '24_phase=6', 'cost': 7085.0, 'gain': 16.2, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 7085.0, 'raw_cost': 7085.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.48, + 'battery_gain': 3}, + {'id': '25_phase=6', 'cost': 7695.0, 'gain': 16.2, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 7695.0, 'raw_cost': 7695.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.48, + 'battery_gain': 3}, + {'id': '26_phase=6', 'cost': 5220.0, 'gain': 10.2, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5220.0, 'raw_cost': 5220.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.8}, + {'id': '27_phase=6', 'cost': 6662.0, 'gain': 12.3, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6662.0, 'raw_cost': 6662.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.045}, + {'id': '28_phase=6', 'cost': 6095.0, 'gain': 12.3, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6095.0, 'raw_cost': 6095.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.05}, + {'id': '29_phase=6', 'cost': 5160.0, 'gain': 9.0, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5160.0, 'raw_cost': 5160.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.4}, + {'id': '30_phase=6', 'cost': 6392.0, 'gain': 10.2, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6392.0, 'raw_cost': 6392.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.61}, + {'id': '31_phase=6', 'cost': 5910.0, 'gain': 10.2, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5910.0, 'raw_cost': 5910.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.61}, + {'id': '32_phase=6', 'cost': 5100.0, 'gain': 8.0, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5100.0, 'raw_cost': 5100.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.0}, + {'id': '33_phase=6', 'cost': 6098.0, 'gain': 8.0, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6098.0, 'raw_cost': 6098.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.175}, + {'id': '34_phase=6', 'cost': 5725.0, 'gain': 8.0, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5725.0, 'raw_cost': 5725.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.18}, + {'id': '35_phase=6', 'cost': 5040.0, 'gain': 6.0, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5040.0, 'raw_cost': 5040.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 1.6}, + {'id': '36_phase=6', 'cost': 5828.0, 'gain': 7.0, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5828.0, 'raw_cost': 5828.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 1.74}, + {'id': '37_phase=6', 'cost': 5540.0, 'gain': 7.0, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5540.0, 'raw_cost': 5540.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 1.74} + ] + ] + budget = None + target_gain = 11.5 + + opt = StrategicOptimiser( + components=components, + target_gain=target_gain, + budget=budget, + ) + + opt.solve() + + # Should be case 3 - minimise cost for target gain + assert opt.strategy_used.value == "case_3_solve_min_cost_for_target" + assert opt.solution_cost == 4398.75 + assert opt.solution_gain == 12 + + def test_just_gain2(self): + components = [ + [ + {'id': '0_phase=0', 'cost': 819.0, 'gain': 5.6, 'type': 'loft_insulation', 'innovation_uplift': 0, + 'cost_minus_uplift': 819.0, 'raw_cost': 819.0, 'partial_project_funding': 0, + 'partial_project_score': 0, 'uplift_project_score': 0, 'already_installed': False, + 'has_battery': False, 'array_size': 0}, + {'id': '1_phase=0', 'cost': 702.0, 'gain': 5.6, 'type': 'loft_insulation', 'innovation_uplift': 0, + 'cost_minus_uplift': 702.0, 'raw_cost': 702.0, 'partial_project_funding': 0, + 'partial_project_score': 0, 'uplift_project_score': 0, 'already_installed': False, + 'has_battery': False, 'array_size': 0}, + {'id': '2_phase=0', 'cost': 585.0, 'gain': 5.6, 'type': 'loft_insulation', 'innovation_uplift': 0, + 'cost_minus_uplift': 585.0, 'raw_cost': 585.0, 'partial_project_funding': 0, + 'partial_project_score': 0, 'uplift_project_score': 0, 'already_installed': False, + 'has_battery': False, 'array_size': 0}], + [{'id': '4_phase=2', 'cost': 3656.25, 'gain': 2.0, 'type': 'suspended_floor_insulation', + 'innovation_uplift': 0, 'cost_minus_uplift': 3656.25, 'raw_cost': 3656.25, 'partial_project_funding': 0, + 'partial_project_score': 0, 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, + 'array_size': 0}], + [{'id': '5_phase=3', 'cost': 17.5, 'gain': 1.0, 'type': 'low_energy_lighting', 'innovation_uplift': 0, + 'cost_minus_uplift': 17.5, 'raw_cost': 17.5, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 0}], + [{'id': '6_phase=4', 'cost': 140, 'gain': 3.4, 'type': 'roomstat_programmer_trvs', 'innovation_uplift': 0, + 'cost_minus_uplift': 140, 'raw_cost': 140, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 0}, + {'id': '7_phase=4', 'cost': 874.5680000000001, 'gain': 4.2, 'type': 'time_temperature_zone_control', + 'innovation_uplift': 0, 'cost_minus_uplift': 874.5680000000001, 'raw_cost': 874.5680000000001, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': False, 'array_size': 0}], + [{'id': '9_phase=6', 'cost': 5420.0, 'gain': 13.2, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5420.0, 'raw_cost': 5420.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.6}, + {'id': '10_phase=6', 'cost': 6210.0, 'gain': 16.2, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6210.0, 'raw_cost': 6210.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.6, + 'battery_gain': 3}, + {'id': '11_phase=6', 'cost': 6820.0, 'gain': 16.2, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6820.0, 'raw_cost': 6820.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.6, + 'battery_gain': 3}, + {'id': '12_phase=6', 'cost': 7202.0, 'gain': 14.5, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 7202.0, 'raw_cost': 7202.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.915}, + {'id': '13_phase=6', 'cost': 6495.0, 'gain': 14.5, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6495.0, 'raw_cost': 6495.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.92}, + {'id': '14_phase=6', 'cost': 7285.0, 'gain': 17.5, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 7285.0, 'raw_cost': 7285.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.92, + 'battery_gain': 3}, + {'id': '15_phase=6', 'cost': 7895.0, 'gain': 17.5, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 7895.0, 'raw_cost': 7895.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.92, + 'battery_gain': 3}, + {'id': '16_phase=6', 'cost': 5520.0, 'gain': 15.0, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5520.0, 'raw_cost': 5520.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 4.0}, + {'id': '17_phase=6', 'cost': 6310.0, 'gain': 18.0, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6310.0, 'raw_cost': 6310.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 4.0, + 'battery_gain': 3}, + {'id': '18_phase=6', 'cost': 6920.0, 'gain': 18.0, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6920.0, 'raw_cost': 6920.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 4.0, + 'battery_gain': 3}, + {'id': '19_phase=6', 'cost': 5320.0, 'gain': 12.1, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5320.0, 'raw_cost': 5320.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.2}, + {'id': '20_phase=6', 'cost': 6110.0, 'gain': 14.1, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6110.0, 'raw_cost': 6110.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.2, + 'battery_gain': 2}, + {'id': '21_phase=6', 'cost': 6720.0, 'gain': 14.1, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6720.0, 'raw_cost': 6720.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.2, + 'battery_gain': 2}, + {'id': '22_phase=6', 'cost': 6932.0, 'gain': 13.2, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6932.0, 'raw_cost': 6932.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.48}, + {'id': '23_phase=6', 'cost': 6295.0, 'gain': 13.2, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6295.0, 'raw_cost': 6295.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.48}, + {'id': '24_phase=6', 'cost': 7085.0, 'gain': 16.2, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 7085.0, 'raw_cost': 7085.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.48, + 'battery_gain': 3}, + {'id': '25_phase=6', 'cost': 7695.0, 'gain': 16.2, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 7695.0, 'raw_cost': 7695.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.48, + 'battery_gain': 3}, + {'id': '26_phase=6', 'cost': 5220.0, 'gain': 10.2, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5220.0, 'raw_cost': 5220.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.8}, + {'id': '27_phase=6', 'cost': 6662.0, 'gain': 12.3, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6662.0, 'raw_cost': 6662.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.045}, + {'id': '28_phase=6', 'cost': 6095.0, 'gain': 12.3, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6095.0, 'raw_cost': 6095.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.05}, + {'id': '29_phase=6', 'cost': 5160.0, 'gain': 9.0, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5160.0, 'raw_cost': 5160.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.4}, + {'id': '30_phase=6', 'cost': 6392.0, 'gain': 10.2, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6392.0, 'raw_cost': 6392.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.61}, + {'id': '31_phase=6', 'cost': 5910.0, 'gain': 10.2, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5910.0, 'raw_cost': 5910.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.61}, + {'id': '32_phase=6', 'cost': 5100.0, 'gain': 8.0, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5100.0, 'raw_cost': 5100.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.0}, + {'id': '33_phase=6', 'cost': 6098.0, 'gain': 8.0, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6098.0, 'raw_cost': 6098.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.175}, + {'id': '34_phase=6', 'cost': 5725.0, 'gain': 8.0, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5725.0, 'raw_cost': 5725.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.18}, + {'id': '35_phase=6', 'cost': 5040.0, 'gain': 6.0, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5040.0, 'raw_cost': 5040.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 1.6}, + {'id': '36_phase=6', 'cost': 5828.0, 'gain': 7.0, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5828.0, 'raw_cost': 5828.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 1.74}, + {'id': '37_phase=6', 'cost': 5540.0, 'gain': 7.0, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5540.0, 'raw_cost': 5540.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 1.74} + ] + ] + budget = None + target_gain = 20 + + opt = StrategicOptimiser( + components=components, + target_gain=target_gain, + budget=budget, + ) + + opt.solve() + + # Should be case 3 - minimise cost for target gain + assert opt.strategy_used.value == "case_3_solve_min_cost_for_target" + assert opt.solution_cost == 5962.5 + assert opt.solution_gain == 20.2 + + def test_just_budget(self): + components = [ + [ + {'id': '0_phase=0', 'cost': 819.0, 'gain': 5.6, 'type': 'loft_insulation', 'innovation_uplift': 0, + 'cost_minus_uplift': 819.0, 'raw_cost': 819.0, 'partial_project_funding': 0, + 'partial_project_score': 0, 'uplift_project_score': 0, 'already_installed': False, + 'has_battery': False, 'array_size': 0}, + {'id': '1_phase=0', 'cost': 702.0, 'gain': 5.6, 'type': 'loft_insulation', 'innovation_uplift': 0, + 'cost_minus_uplift': 702.0, 'raw_cost': 702.0, 'partial_project_funding': 0, + 'partial_project_score': 0, 'uplift_project_score': 0, 'already_installed': False, + 'has_battery': False, 'array_size': 0}, + {'id': '2_phase=0', 'cost': 585.0, 'gain': 5.6, 'type': 'loft_insulation', 'innovation_uplift': 0, + 'cost_minus_uplift': 585.0, 'raw_cost': 585.0, 'partial_project_funding': 0, + 'partial_project_score': 0, 'uplift_project_score': 0, 'already_installed': False, + 'has_battery': False, 'array_size': 0}], + [{'id': '4_phase=2', 'cost': 3656.25, 'gain': 2.0, 'type': 'suspended_floor_insulation', + 'innovation_uplift': 0, 'cost_minus_uplift': 3656.25, 'raw_cost': 3656.25, 'partial_project_funding': 0, + 'partial_project_score': 0, 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, + 'array_size': 0}], + [{'id': '5_phase=3', 'cost': 17.5, 'gain': 1.0, 'type': 'low_energy_lighting', 'innovation_uplift': 0, + 'cost_minus_uplift': 17.5, 'raw_cost': 17.5, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 0}], + [{'id': '6_phase=4', 'cost': 140, 'gain': 3.4, 'type': 'roomstat_programmer_trvs', 'innovation_uplift': 0, + 'cost_minus_uplift': 140, 'raw_cost': 140, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 0}, + {'id': '7_phase=4', 'cost': 874.5680000000001, 'gain': 4.2, 'type': 'time_temperature_zone_control', + 'innovation_uplift': 0, 'cost_minus_uplift': 874.5680000000001, 'raw_cost': 874.5680000000001, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': False, 'array_size': 0}], + [{'id': '9_phase=6', 'cost': 5420.0, 'gain': 13.2, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5420.0, 'raw_cost': 5420.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.6}, + {'id': '10_phase=6', 'cost': 6210.0, 'gain': 16.2, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6210.0, 'raw_cost': 6210.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.6, + 'battery_gain': 3}, + {'id': '11_phase=6', 'cost': 6820.0, 'gain': 16.2, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6820.0, 'raw_cost': 6820.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.6, + 'battery_gain': 3}, + {'id': '12_phase=6', 'cost': 7202.0, 'gain': 14.5, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 7202.0, 'raw_cost': 7202.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.915}, + {'id': '13_phase=6', 'cost': 6495.0, 'gain': 14.5, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6495.0, 'raw_cost': 6495.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.92}, + {'id': '14_phase=6', 'cost': 7285.0, 'gain': 17.5, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 7285.0, 'raw_cost': 7285.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.92, + 'battery_gain': 3}, + {'id': '15_phase=6', 'cost': 7895.0, 'gain': 17.5, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 7895.0, 'raw_cost': 7895.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.92, + 'battery_gain': 3}, + {'id': '16_phase=6', 'cost': 5520.0, 'gain': 15.0, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5520.0, 'raw_cost': 5520.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 4.0}, + {'id': '17_phase=6', 'cost': 6310.0, 'gain': 18.0, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6310.0, 'raw_cost': 6310.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 4.0, + 'battery_gain': 3}, + {'id': '18_phase=6', 'cost': 6920.0, 'gain': 18.0, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6920.0, 'raw_cost': 6920.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 4.0, + 'battery_gain': 3}, + {'id': '19_phase=6', 'cost': 5320.0, 'gain': 12.1, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5320.0, 'raw_cost': 5320.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.2}, + {'id': '20_phase=6', 'cost': 6110.0, 'gain': 14.1, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6110.0, 'raw_cost': 6110.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.2, + 'battery_gain': 2}, + {'id': '21_phase=6', 'cost': 6720.0, 'gain': 14.1, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6720.0, 'raw_cost': 6720.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.2, + 'battery_gain': 2}, + {'id': '22_phase=6', 'cost': 6932.0, 'gain': 13.2, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6932.0, 'raw_cost': 6932.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.48}, + {'id': '23_phase=6', 'cost': 6295.0, 'gain': 13.2, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6295.0, 'raw_cost': 6295.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.48}, + {'id': '24_phase=6', 'cost': 7085.0, 'gain': 16.2, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 7085.0, 'raw_cost': 7085.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.48, + 'battery_gain': 3}, + {'id': '25_phase=6', 'cost': 7695.0, 'gain': 16.2, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 7695.0, 'raw_cost': 7695.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.48, + 'battery_gain': 3}, + {'id': '26_phase=6', 'cost': 5220.0, 'gain': 10.2, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5220.0, 'raw_cost': 5220.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.8}, + {'id': '27_phase=6', 'cost': 6662.0, 'gain': 12.3, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6662.0, 'raw_cost': 6662.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.045}, + {'id': '28_phase=6', 'cost': 6095.0, 'gain': 12.3, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6095.0, 'raw_cost': 6095.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.05}, + {'id': '29_phase=6', 'cost': 5160.0, 'gain': 9.0, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5160.0, 'raw_cost': 5160.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.4}, + {'id': '30_phase=6', 'cost': 6392.0, 'gain': 10.2, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6392.0, 'raw_cost': 6392.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.61}, + {'id': '31_phase=6', 'cost': 5910.0, 'gain': 10.2, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5910.0, 'raw_cost': 5910.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.61}, + {'id': '32_phase=6', 'cost': 5100.0, 'gain': 8.0, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5100.0, 'raw_cost': 5100.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.0}, + {'id': '33_phase=6', 'cost': 6098.0, 'gain': 8.0, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6098.0, 'raw_cost': 6098.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.175}, + {'id': '34_phase=6', 'cost': 5725.0, 'gain': 8.0, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5725.0, 'raw_cost': 5725.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.18}, + {'id': '35_phase=6', 'cost': 5040.0, 'gain': 6.0, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5040.0, 'raw_cost': 5040.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 1.6}, + {'id': '36_phase=6', 'cost': 5828.0, 'gain': 7.0, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5828.0, 'raw_cost': 5828.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 1.74}, + {'id': '37_phase=6', 'cost': 5540.0, 'gain': 7.0, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5540.0, 'raw_cost': 5540.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 1.74} + ] + ] + budget = 10000 + target_gain = None + + opt = StrategicOptimiser( + components=components, + target_gain=target_gain, + budget=budget, + ) + + opt.solve() + + # Should be case 3 - minimise cost for target gain + assert opt.strategy_used.value == "case_2_solve_max_gain_under_budget" + assert opt.solution_cost == 7787.068 + assert opt.solution_gain == 28.8 From c08ab7a76765c476290fe7d70cd0d7d3bff07c28 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 18 Feb 2026 12:11:14 +0000 Subject: [PATCH 206/340] passing around allow slack --- .../optimiser/StrategicOptimiser.py | 10 +++++---- .../optimiser/funding_optimiser.py | 22 ++++++++----------- 2 files changed, 15 insertions(+), 17 deletions(-) diff --git a/recommendations/optimiser/StrategicOptimiser.py b/recommendations/optimiser/StrategicOptimiser.py index b75268eb..8ffc307c 100644 --- a/recommendations/optimiser/StrategicOptimiser.py +++ b/recommendations/optimiser/StrategicOptimiser.py @@ -1,6 +1,6 @@ from enum import Enum from mip import OptimizationStatus -from typing import Sequence, Optional, TypedDict, List +from typing import Mapping, Optional, TypedDict, List from recommendations.optimiser.CostOptimiser import CostOptimiser from recommendations.optimiser.GainOptimiser import GainOptimiser @@ -41,9 +41,10 @@ class StrategicOptimiser: def __init__( self, - components: Sequence[Sequence[Measure]], + components: list[list[Mapping[str, int | float | str]]], budget: Optional[float] = None, target_gain: Optional[float] = None, + allow_slack: bool = False, verbose: bool = False, ) -> None: @@ -57,6 +58,7 @@ class StrategicOptimiser: self.budget = budget self.target_gain = target_gain self.verbose = verbose + self.allow_slack = allow_slack self.solution: List[Measure] = [] self.solution_cost: float = 0.0 @@ -128,7 +130,7 @@ class StrategicOptimiser: self.components, min_gain=self.target_gain, verbose=self.verbose, - allow_slack=False + allow_slack=self.allow_slack ) opt.setup() @@ -147,7 +149,7 @@ class StrategicOptimiser: self.components, max_cost=self.budget, max_gain=None, - allow_slack=False, + allow_slack=self.allow_slack, verbose=self.verbose ) diff --git a/recommendations/optimiser/funding_optimiser.py b/recommendations/optimiser/funding_optimiser.py index aaf97226..80ba02fd 100644 --- a/recommendations/optimiser/funding_optimiser.py +++ b/recommendations/optimiser/funding_optimiser.py @@ -18,6 +18,7 @@ from backend.app.plan.schemas import ( ) from recommendations.optimiser.CostOptimiser import CostOptimiser from recommendations.optimiser.GainOptimiser import GainOptimiser +from recommendations.optimiser.StrategicOptimiser import StrategicOptimiser from utils.logger import setup_logger from backend.Funding import Funding from backend.app.BatterySapScorer import BatterySAPScorer @@ -1118,22 +1119,17 @@ def run_optimizer( if not input_measures: return None, 0.0, 0.0 - if budget is not None: + opt = StrategicOptimiser( + components=input_measures, + budget=budget, + target_gain=sub_target_gain, + allow_slack=allow_slack, + verbose=False, + ) - opt = GainOptimiser( - input_measures, max_cost=budget, max_gain=0 if sub_target_gain == 0 else (sub_target_gain or float("inf")), - allow_slack=allow_slack - ) - else: - if sub_target_gain is None: - raise ValueError("Either budget or target_gain must be provided.") - opt = CostOptimiser(input_measures, min_gain=sub_target_gain) - - opt.setup() opt.solve() - cost = sum([x["cost"] for x in opt.solution]) - return opt.solution, cost, opt.solution_gain + return opt.solution, opt.solution_cost, opt.solution_gain # ---- Define optimisation paths ---------------------------------------------------------- From 546cc2a58f6596750ae5330aa5d088dc1ed5f690 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Wed, 18 Feb 2026 12:17:23 +0000 Subject: [PATCH 207/340] added lambda in parrael code --- asset_list/AssetList.py | 2728 +++++++++++------ asset_list/app.py | 64 +- asset_list/requirements.txt | 2 +- .../terraform/lambda/_template/main.tf | 2 + .../terraform/lambda/_template/variables.tf | 5 + .../terraform/lambda/address2UPRN/main.tf | 3 + .../lambda/address2UPRN/variables.tf | 5 + .../lambda/modules/lambda_with_sqs/main.tf | 3 +- .../modules/lambda_with_sqs/variables.tf | 6 + .../modules/lambda_sqs_trigger/main.tf | 7 + .../modules/lambda_sqs_trigger/variables.tf | 6 + 11 files changed, 1852 insertions(+), 979 deletions(-) diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py index 36b3d58e..28e17e2a 100644 --- a/asset_list/AssetList.py +++ b/asset_list/AssetList.py @@ -25,21 +25,25 @@ import asset_list.mappings.outcomes as outcomes_mappings from recommendations.recommendation_utils import ( estimate_perimeter, estimate_external_wall_area, - estimate_number_of_floors + estimate_number_of_floors, ) from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes from etl.epc_clean.epc_attributes.WallAttributes import WallAttributes +from dotenv import load_dotenv + logger = setup_logger() +load_dotenv(dotenv_path="../backend/.env") + # OpenAI API Key (set this in your environment variables for security) -OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY") - +OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "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") class DataRemapper: def __init__(self, standard_values, standard_map=None, max_tokens=1000): + print(f"{OPENAI_API_KEY}") """ Initialize the remapper with standard values and a predefined mapping. @@ -61,7 +65,9 @@ class DataRemapper: self.max_tokens = max_tokens # Limit for OpenAI API # Memoization for AI calls - self.ai_cache = {} # {tuple(unmapped_values): {original_value: standardized_value}} + self.ai_cache = ( + {} + ) # {tuple(unmapped_values): {original_value: standardized_value}} # Capture the reponse for debugging self.ai_response = None @@ -79,14 +85,16 @@ class DataRemapper: if not isinstance(text, str): return None text = text.strip().lower() - text = re.sub(r'[^\w\s]', '', text) # Remove punctuation + text = re.sub(r"[^\w\s]", "", text) # Remove punctuation # Replace double strings - text = re.sub(r'\s+', ' ', text) + text = re.sub(r"\s+", " ", text) return text def fuzzy_match(self, text): """Use fuzzy matching to find the closest standard value.""" - match, score = process.extractOne(text, self.standard_values) if text else (None, 0) + match, score = ( + process.extractOne(text, self.standard_values) if text else (None, 0) + ) return match if score >= self.fuzzy_threshold else None def count_tokens(self, text): @@ -98,7 +106,9 @@ class DataRemapper: if not unmapped_values: return {} - unmapped_tuple = tuple(sorted(unmapped_values)) # Ensure consistency for memoization + unmapped_tuple = tuple( + sorted(unmapped_values) + ) # Ensure consistency for memoization if unmapped_tuple in self.ai_cache: return self.ai_cache[unmapped_tuple] # Return memoized result @@ -180,7 +190,9 @@ class DataRemapper: # Rule-Based Check (Predefined Mapping) if cleaned_value in self.standard_map or value in self.standard_map: self.remap_dict[value] = ( - self.standard_map[cleaned_value] if cleaned_value in self.standard_map else self.standard_map[value] + self.standard_map[cleaned_value] + if cleaned_value in self.standard_map + else self.standard_map[value] ) continue @@ -237,22 +249,22 @@ class AssetList: "roof-description": "epc_roof_construction", "floor-description": "epc_floor_construction", "mainheat-description": "epc_heating_type", - 'mainheatcont-description': "epc_heating_controls", + "mainheatcont-description": "epc_heating_controls", "secondheat-description": "epc_secondary_heating", "transaction-type": "epc_reason", "energy-consumption-current": "epc_heat_demand", "photo-supply": "epc_photo_supply", - "estimated": "estimated" + "estimated": "estimated", } FIND_EPC_DATA_NAMES = { "heating_text": "epc_estiamted_heating_kwh", "hot_water_text": "epc_estimated_hotwater_kwh", - 'Assessor’s name': "epc_assessor_name", + "Assessor’s name": "epc_assessor_name", "Assessor's Telephone": "epc_assessor_telephone", "Assessor's Email": "epc_assessor_email", "Accreditation scheme": "epc_assessor_accreditation", "Assessor’s ID": "epc_assessor_id", - "Solar photovoltaics": "epc_solar_pv" + "Solar photovoltaics": "epc_solar_pv", } DATETIME_REMAP = { @@ -286,44 +298,69 @@ class AssetList: DOMNA_PROPERTY_ID = "domna_property_id" # Regular expression for identifying if the address might point to multiple units - MULTI_UNIT_REGEX = re.compile(r'\b([A-Za-z0-9]+)-([A-Za-z0-9]+)\b') + MULTI_UNIT_REGEX = re.compile(r"\b([A-Za-z0-9]+)-([A-Za-z0-9]+)\b") # List of columns relating to the non-intrusive data NON_INTRUSIVES_COLNAMES = [ - "Archetype", "Construction", "Insulated", "Material", "CIGA Check Required", - "PV, ACCESS ISSUE, SEE NOTES", "OFF GAS - ROOF ORIENTATION", - "Any further surveyor notes", 'Surveyors Name' + "Archetype", + "Construction", + "Insulated", + "Material", + "CIGA Check Required", + "PV, ACCESS ISSUE, SEE NOTES", + "OFF GAS - ROOF ORIENTATION", + "Any further surveyor notes", + "Surveyors Name", ] NON_INTRUSIVES_NEW_FORMAT_COLNAMES = [ - "Has the property been re-walled?", "Is the property tile hung?", "Does the property have a render?", - "Does the property have cladding?", "Gable Wall Obstructions", + "Has the property been re-walled?", + "Is the property tile hung?", + "Does the property have a render?", + "Does the property have cladding?", + "Gable Wall Obstructions", "Does the property have foliage that needs removal?", - "Potential unsafe environment", "Date of Inspection", "Borescoped?" + "Potential unsafe environment", + "Date of Inspection", + "Borescoped?", ] # Another version of non-intrusives: NON_INTRUSIVES_NEW_FORMAT_COLNAMES_V2 = [ - 'Archetype', 'Archetype 2', 'Construction', 'Insulated', 'Material', 'Borescoped?', - 'CIGA Check Required', 'ROOF ORIENTATION', 'TILE HUNG', 'RENDERED', - 'CLADDING', 'ACCESS ISSUES', 'FURTHER SURVEYOR NOTES', 'DATE', - 'NAME OF SURVEYOR' + "Archetype", + "Archetype 2", + "Construction", + "Insulated", + "Material", + "Borescoped?", + "CIGA Check Required", + "ROOF ORIENTATION", + "TILE HUNG", + "RENDERED", + "CLADDING", + "ACCESS ISSUES", + "FURTHER SURVEYOR NOTES", + "DATE", + "NAME OF SURVEYOR", ] # Solar non-intrusive fields NON_INTRUSIVES_SOLAR_COLNAMES = [ - 'PV, ACCESS ISSUE, SEE NOTES', 'ROOF ORIENTATION', - 'AREA (m²) OF ROOF WHERE PV WILL BE SITUATED ', 'SHADING', - 'Roof Tiles - CONCRETE/SLATE/ROSEMARY', - 'NO. OF PANELS (Typical size of 420W panel is 1mx1.7m and need 30cm all the way around panels)', - 'SCAFFOLD REQUIRED? IF YES, ARE THERE ANY SURROUNDING ACCESS ISSUES - PLEASE DESCRIBE', - 'IF PANELS ARE GOING ON REAR PLEASE CHECK FOR SPACE FOR SCAFFOLDING - DESCRIBE ANY ISSUES BELOW', - 'DATE', 'NAME OF SURVEYOR' + "PV, ACCESS ISSUE, SEE NOTES", + "ROOF ORIENTATION", + "AREA (m²) OF ROOF WHERE PV WILL BE SITUATED ", + "SHADING", + "Roof Tiles - CONCRETE/SLATE/ROSEMARY", + "NO. OF PANELS (Typical size of 420W panel is 1mx1.7m and need 30cm all the way around panels)", + "SCAFFOLD REQUIRED? IF YES, ARE THERE ANY SURROUNDING ACCESS ISSUES - PLEASE DESCRIBE", + "IF PANELS ARE GOING ON REAR PLEASE CHECK FOR SPACE FOR SCAFFOLDING - DESCRIBE ANY ISSUES BELOW", + "DATE", + "NAME OF SURVEYOR", ] NON_INTRUSIVES_ELIGIBILITY_COLUMN = "Eligibility (Red/Yellow/Green)" - OLD_FORMAT_NON_INTRUSIVE_COLNAMES = ['WFT Findings', 'ECO Eligibility'] + OLD_FORMAT_NON_INTRUSIVE_COLNAMES = ["WFT Findings", "ECO Eligibility"] # This SAP threshold is a key search criteria for properties that may be eligible for extraction FILLED_CAVITY_SAP_THRESHOLD = 75 @@ -341,7 +378,9 @@ class AssetList: ATTRIBUTE_ESTIMATED_PERIMETER = "attribute_est_perimter" ATTRIBUTE_HEAT_LOSS_AREA = "attribute_heat_loss_area" ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS = "attribute_epc_roof_insulation_thickness" - ATTRIBUTE_SAP_THRESHOLD_AND_BELOW = f"sap_rating_{FILLED_CAVITY_SAP_THRESHOLD}_and_below" + ATTRIBUTE_SAP_THRESHOLD_AND_BELOW = ( + f"sap_rating_{FILLED_CAVITY_SAP_THRESHOLD}_and_below" + ) ATTRIBUTE_EPC_PRE_YEAR_THRESHOLD = f"epc_is_pre_{EPC_YEAR_THRESHOLD}" # These are the descriptions that we look for in the EPC data that are indicative of no insulation @@ -354,12 +393,17 @@ class AssetList: # List of strings that we look for in the EPC data, where substrings indicate that the wall is insulated EPC_INSULATED_WALLS_SUBSTRINGS = [ - ", insulated", "with external insulation", "with internal insulation", "filled cavity" + ", insulated", + "with external insulation", + "with internal insulation", + "filled cavity", ] # List of strings that we look for in the EPC data, where substrings indicate that the roof is insulated EPC_INSULATED_ROOF_SUBSTRINGS = [ - "(another dwelling above)", ", insulated", ", insulated (assumed) ", + "(another dwelling above)", + ", insulated", + ", insulated (assumed) ", ", ceiling insulated", ] @@ -374,35 +418,69 @@ class AssetList: # Work type prefixes: # Empties EMPTY_CAVITY_NON_INTRUSIVE = "Non-Intrusive Data Shows Empty Cavity" - EMPTY_CAVITY_NON_INTRUSIVE_YEAR = 'Non-Intrusive Data Shows Empty Cavity, built after 2002' - EPC_EMPTY_INSPECTIONS_RETRO_DRILLED = "EPC Shows Empty Cavity, inspections show retro drilled" - EPC_EMPTY_INSPECTIONS_FILLED = "EPC Shows Empty Cavity, inspections show filled or other" - EPC_EMPTY_INSPECTIONS_FILLED_AT_BUILD = "EPC Shows Empty Cavity, inspections show filled at build" - EPC_EMPTY_INSPECTIONS_NON_CAVITY = "EPC Shows Empty Cavity, inspections show non-cavity build" + EMPTY_CAVITY_NON_INTRUSIVE_YEAR = ( + "Non-Intrusive Data Shows Empty Cavity, built after 2002" + ) + EPC_EMPTY_INSPECTIONS_RETRO_DRILLED = ( + "EPC Shows Empty Cavity, inspections show retro drilled" + ) + EPC_EMPTY_INSPECTIONS_FILLED = ( + "EPC Shows Empty Cavity, inspections show filled or other" + ) + EPC_EMPTY_INSPECTIONS_FILLED_AT_BUILD = ( + "EPC Shows Empty Cavity, inspections show filled at build" + ) + EPC_EMPTY_INSPECTIONS_NON_CAVITY = ( + "EPC Shows Empty Cavity, inspections show non-cavity build" + ) EPC_EMPTY = "EPC Shows Empty Cavity" - LANDLORD_EMPTY_INSPECTIONS_OTHER = ("Landlord Data Shows Empty Cavity, EPC & Inspections Shows Filled or " - "Non-cavity") + LANDLORD_EMPTY_INSPECTIONS_OTHER = ( + "Landlord Data Shows Empty Cavity, EPC & Inspections Shows Filled or " + "Non-cavity" + ) # Extraction EXTRACTION_NON_INTRUSIVE = "Non-Intrusive Data Shows Cavity Extraction" # Solar SOLAR_ELIGIBLE = "Solar Eligible" - SOLAR_ELIGIBLE_SOLID_WALL_UNINSULATED = "Solar Eligible, Solid Wall Uninsulated, EPC E or Below" + SOLAR_ELIGIBLE_SOLID_WALL_UNINSULATED = ( + "Solar Eligible, Solid Wall Uninsulated, EPC E or Below" + ) SOLAR_ELIGIBLE_NEEDS_HEATING_UPGRADE = "Solar Eligible, Needs Heating Upgrade" CRM_HISTORICAL_CAVITY_PRODUCT = { - "id": 156989182176, "unit_price": 0, "name": "Historical ECO Cavity" + "id": 156989182176, + "unit_price": 0, + "name": "Historical ECO Cavity", } CRM_PRODUCTS = { - "Empty Cavity - ECO4": {"id": 82733738177, "unit_price": 1000, "name": "Empty Cavity - ECO4"}, - "Extract & Fill - ECO4": {"id": 100307905778, "unit_price": 500, "name": "Extract & Fill - ECO4"}, - "Solar PV - ECO4": {"id": 82623589564, "unit_price": 1608, "name": "Solar PV - ECO4"}, - "Solar PV + HHRSH - ECO4": {"id": 155529972924, "unit_price": 1608, "name": "Solar PV + HHRSH - ECO4"}, - "Solar PV + Heating Upgrade - ECO4": { - "id": 109265426665, "unit_price": 1608, "name": "Solar PV + Heating Upgrade - ECO4" + "Empty Cavity - ECO4": { + "id": 82733738177, + "unit_price": 1000, + "name": "Empty Cavity - ECO4", }, - "Historical ECO Cavity": CRM_HISTORICAL_CAVITY_PRODUCT + "Extract & Fill - ECO4": { + "id": 100307905778, + "unit_price": 500, + "name": "Extract & Fill - ECO4", + }, + "Solar PV - ECO4": { + "id": 82623589564, + "unit_price": 1608, + "name": "Solar PV - ECO4", + }, + "Solar PV + HHRSH - ECO4": { + "id": 155529972924, + "unit_price": 1608, + "name": "Solar PV + HHRSH - ECO4", + }, + "Solar PV + Heating Upgrade - ECO4": { + "id": 109265426665, + "unit_price": 1608, + "name": "Solar PV + Heating Upgrade - ECO4", + }, + "Historical ECO Cavity": CRM_HISTORICAL_CAVITY_PRODUCT, } def __init__( @@ -427,13 +505,15 @@ class AssetList: landlord_sap=None, landlord_block_reference=None, phase=False, - header=0 + header=0, ): self.local_filepath = local_filepath self.sheet_name = sheet_name # Read in the data if local_filepath.endswith(".xlsx"): - self.raw_asset_list = pd.read_excel(local_filepath, header=header, sheet_name=sheet_name) + self.raw_asset_list = pd.read_excel( + local_filepath, header=header, sheet_name=sheet_name + ) else: self.raw_asset_list = pd.read_csv(local_filepath) self.standardised_asset_list = self.raw_asset_list.copy() @@ -459,21 +539,31 @@ class AssetList: self.phase = phase # We detect the presence of the non-intrusive columns - self.non_intrusives_present = "CIGA Check Required" in self.raw_asset_list.columns + self.non_intrusives_present = ( + "CIGA Check Required" in self.raw_asset_list.columns + ) # We detect if we have the old format of non-intruvies - self.old_format_non_intrusives_present = "WFT Findings" in self.raw_asset_list.columns + self.old_format_non_intrusives_present = ( + "WFT Findings" in self.raw_asset_list.columns + ) if self.old_format_non_intrusives_present: self.non_intrusives_present = False - self.non_intrusives_eligibility = "Eligibility (Red/Yellow/Green)" in self.raw_asset_list.columns + self.non_intrusives_eligibility = ( + "Eligibility (Red/Yellow/Green)" in self.raw_asset_list.columns + ) self.new_format_non_insturives_present = ( "Has the property been re-walled?" in self.raw_asset_list.columns ) - self.new_format_non_insturives_present_v2 = 'TILE HUNG' in self.raw_asset_list.columns + self.new_format_non_insturives_present_v2 = ( + "TILE HUNG" in self.raw_asset_list.columns + ) - self.solar_non_intrusives_present = "AREA (m²) OF ROOF WHERE PV WILL BE SITUATED" in self.raw_asset_list.columns + self.solar_non_intrusives_present = ( + "AREA (m²) OF ROOF WHERE PV WILL BE SITUATED" in self.raw_asset_list.columns + ) # Names of columns self.landlord_property_id = landlord_property_id @@ -500,7 +590,7 @@ class AssetList: "property_type": None, "wall_construction": None, "heating_system": None, - "existing_pv": None + "existing_pv": None, } self.variable_mappings = {} @@ -510,8 +600,12 @@ class AssetList: self.keep_variables = [] # Finally, we handle the case where the landlord's property ID is actually the OS UPRN - if (self.landlord_uprn == self.landlord_property_id) and (self.landlord_property_id is not None): - self.standardised_asset_list[self.STANDARD_UPRN] = self.standardised_asset_list[self.landlord_uprn].copy() + if (self.landlord_uprn == self.landlord_property_id) and ( + self.landlord_property_id is not None + ): + self.standardised_asset_list[self.STANDARD_UPRN] = ( + self.standardised_asset_list[self.landlord_uprn].copy() + ) # Update the reference to landlord UPRn self.landlord_uprn = self.STANDARD_UPRN @@ -558,41 +652,63 @@ class AssetList: self.prefixes_to_products = { # Empty self.EMPTY_CAVITY_NON_INTRUSIVE: self.CRM_PRODUCTS["Empty Cavity - ECO4"], - self.EPC_EMPTY_INSPECTIONS_RETRO_DRILLED: self.CRM_PRODUCTS["Empty Cavity - ECO4"], + self.EPC_EMPTY_INSPECTIONS_RETRO_DRILLED: self.CRM_PRODUCTS[ + "Empty Cavity - ECO4" + ], self.EPC_EMPTY_INSPECTIONS_FILLED: self.CRM_PRODUCTS["Empty Cavity - ECO4"], - self.EPC_EMPTY_INSPECTIONS_FILLED_AT_BUILD: self.CRM_PRODUCTS["Empty Cavity - ECO4"], - self.EPC_EMPTY_INSPECTIONS_NON_CAVITY: self.CRM_PRODUCTS["Empty Cavity - ECO4"], + self.EPC_EMPTY_INSPECTIONS_FILLED_AT_BUILD: self.CRM_PRODUCTS[ + "Empty Cavity - ECO4" + ], + self.EPC_EMPTY_INSPECTIONS_NON_CAVITY: self.CRM_PRODUCTS[ + "Empty Cavity - ECO4" + ], self.EPC_EMPTY: self.CRM_PRODUCTS["Empty Cavity - ECO4"], - self.LANDLORD_EMPTY_INSPECTIONS_OTHER: self.CRM_PRODUCTS["Empty Cavity - ECO4"], + self.LANDLORD_EMPTY_INSPECTIONS_OTHER: self.CRM_PRODUCTS[ + "Empty Cavity - ECO4" + ], # Extraction self.EXTRACTION_NON_INTRUSIVE: self.CRM_PRODUCTS["Extract & Fill - ECO4"], # Solar self.SOLAR_ELIGIBLE: self.CRM_PRODUCTS["Solar PV - ECO4"], - self.SOLAR_ELIGIBLE_SOLID_WALL_UNINSULATED: self.CRM_PRODUCTS["Solar PV - ECO4"], - self.SOLAR_ELIGIBLE_NEEDS_HEATING_UPGRADE: self.CRM_PRODUCTS["Solar PV + Heating Upgrade - ECO4"], + self.SOLAR_ELIGIBLE_SOLID_WALL_UNINSULATED: self.CRM_PRODUCTS[ + "Solar PV - ECO4" + ], + self.SOLAR_ELIGIBLE_NEEDS_HEATING_UPGRADE: self.CRM_PRODUCTS[ + "Solar PV + Heating Upgrade - ECO4" + ], } - def _extract_address1(self, asset_list, full_address_col, postcode_col, method="first_two_words"): + def _extract_address1( + self, asset_list, full_address_col, postcode_col, method="first_two_words" + ): if method not in self.ADDRESS_1_CLEANING_METHODS: raise ValueError(f"Method {method} for producing address1 not recognized") if method == "first_two_words": - asset_list[self.address1_colname] = asset_list[full_address_col].str.split(" ").str[:2].str.join(" ") + asset_list[self.address1_colname] = ( + asset_list[full_address_col].str.split(" ").str[:2].str.join(" ") + ) return asset_list if method == "first_word": - asset_list[self.address1_colname] = asset_list[full_address_col].str.split(" ").str[0] + asset_list[self.address1_colname] = ( + asset_list[full_address_col].str.split(" ").str[0] + ) return asset_list if method == "house_number_extraction": asset_list[self.address1_colname] = asset_list.apply( - lambda x: SearchEpc.get_house_number(address=x[full_address_col], postcode=x[postcode_col]), - axis=1 + lambda x: SearchEpc.get_house_number( + address=x[full_address_col], postcode=x[postcode_col] + ), + axis=1, ) for _, x in asset_list.iterrows(): - SearchEpc.get_house_number(address=x[full_address_col], postcode=x[postcode_col]) + SearchEpc.get_house_number( + address=x[full_address_col], postcode=x[postcode_col] + ) return asset_list raise ValueError(f"Method {method} not recognized") @@ -622,9 +738,16 @@ class AssetList: # Apply transformation self.standardised_asset_list[self.DOMNA_PROPERTY_ID] = ( - self.standardised_asset_list[self.full_address_colname] + - self.standardised_asset_list[self.postcode_colname] - ).str.strip().str.replace(r"[^\w\s]", "", regex=True).str.replace(" ", "").str.lower().apply(_make_hash) + ( + self.standardised_asset_list[self.full_address_colname] + + self.standardised_asset_list[self.postcode_colname] + ) + .str.strip() + .str.replace(r"[^\w\s]", "", regex=True) + .str.replace(" ", "") + .str.lower() + .apply(_make_hash) + ) @staticmethod def _strip_postcode_from_full_address(full_address, postcode): @@ -666,9 +789,7 @@ class AssetList: postcode = postcode.replace(" ", " ") if " " not in postcode: # Restructure it - return " ".join( - [postcode[:-3], postcode[-3:]] - ) + return " ".join([postcode[:-3], postcode[-3:]]) return postcode @@ -680,52 +801,72 @@ class AssetList: # Remove rows without a postcode if self.postcode_colname is not None: - self.standardised_asset_list = self.standardised_asset_list.dropna(subset=[self.postcode_colname]) + self.standardised_asset_list = self.standardised_asset_list.dropna( + subset=[self.postcode_colname] + ) # We also clean postcode columns where if there is not space, we create one - self.standardised_asset_list[self.postcode_colname] = self.standardised_asset_list[ - self.postcode_colname - ].apply(self._clean_postcode) + self.standardised_asset_list[self.postcode_colname] = ( + self.standardised_asset_list[self.postcode_colname].apply( + self._clean_postcode + ) + ) # We clean up portential non-breaking spaces, and double spaces for col in [ - c for c in [self.postcode_colname, self.full_address_colname, self.address1_colname] if - c is not None + c + for c in [ + self.postcode_colname, + self.full_address_colname, + self.address1_colname, + ] + if c is not None ]: - self.standardised_asset_list[col] = self.standardised_asset_list[col].astype(str) - self.standardised_asset_list[col] = self.standardised_asset_list[col].str.replace('\xa0', ' ', regex=False) - self.standardised_asset_list[col] = self.standardised_asset_list[col].str.replace(' ', ' ', regex=False) + self.standardised_asset_list[col] = self.standardised_asset_list[ + col + ].astype(str) + self.standardised_asset_list[col] = self.standardised_asset_list[ + col + ].str.replace("\xa0", " ", regex=False) + self.standardised_asset_list[col] = self.standardised_asset_list[ + col + ].str.replace(" ", " ", regex=False) if self.address1_colname is None: if self.address1_extraction_method is None: - raise ValueError("Missing address 1 - please specify an extraction method") + raise ValueError( + "Missing address 1 - please specify an extraction method" + ) self.address1_colname = self.STANDARD_ADDRESS_1 # If we do not have this, we produce it self.standardised_asset_list = self._extract_address1( asset_list=self.standardised_asset_list, full_address_col=self.full_address_colname, postcode_col=self.postcode_colname, - method=self.address1_extraction_method + method=self.address1_extraction_method, ) if self.full_address_colname is None: if not self.full_address_cols_to_concat: - raise ValueError("Missing full address - please specify columns to concatenate") + raise ValueError( + "Missing full address - please specify columns to concatenate" + ) self.full_address_colname = self.STANDARD_FULL_ADDRESS self.standardised_asset_list[self.full_address_colname] = ( self.standardised_asset_list[self.full_address_cols_to_concat].apply( - lambda x: ", ".join([y for y in x if not pd.isnull(y)]), - axis=1 + lambda x: ", ".join([y for y in x if not pd.isnull(y)]), axis=1 ) ) else: # Make sure to strip the postcode out of the full address - self.standardised_asset_list[self.full_address_colname] = self.standardised_asset_list.apply( - lambda x: self._strip_postcode_from_full_address( - full_address=x[self.full_address_colname], - postcode=x[self.postcode_colname] - ), - axis=1 + self.standardised_asset_list[self.full_address_colname] = ( + self.standardised_asset_list.apply( + lambda x: self._strip_postcode_from_full_address( + full_address=x[self.full_address_colname], + postcode=x[self.postcode_colname], + ), + axis=1, + ) ) # We create the domna property id @@ -734,7 +875,9 @@ class AssetList: # Clean up the UPRN column, if the landlord has provided them if self.landlord_uprn is not None: self.standardised_asset_list[self.landlord_uprn] = ( - self.standardised_asset_list[self.landlord_uprn].apply(self._convert_uprn) + self.standardised_asset_list[self.landlord_uprn].apply( + self._convert_uprn + ) ) # We keep just the columns we care about and will work through the various columns and standardise @@ -771,12 +914,15 @@ class AssetList: self.landlord_heating_system: self.STANDARD_HEATING_SYSTEM, self.landlord_existing_pv: self.STANDARD_EXISTING_PV, self.landlord_sap: self.STANDARD_SAP, - self.landlord_block_reference: self.STANDARD_BLOCK_REFERENCE + self.landlord_block_reference: self.STANDARD_BLOCK_REFERENCE, } self.rename_map = {k: v for k, v in self.rename_map.items() if k is not None} non_intrusive_columns = [] - if self.non_intrusives_present and not self.new_format_non_insturives_present_v2: + if ( + self.non_intrusives_present + and not self.new_format_non_insturives_present_v2 + ): non_intrusive_columns = self.NON_INTRUSIVES_COLNAMES if self.non_intrusives_eligibility: @@ -794,7 +940,9 @@ class AssetList: if self.old_format_non_intrusives_present: # We check if we have the ECO Eligibility column, which we might not have non_intrusive_columns = [ - c for c in self.OLD_FORMAT_NON_INTRUSIVE_COLNAMES if c in self.standardised_asset_list.columns + c + for c in self.OLD_FORMAT_NON_INTRUSIVE_COLNAMES + if c in self.standardised_asset_list.columns ] if "Warmfront Finding" in self.standardised_asset_list.columns: @@ -805,8 +953,11 @@ class AssetList: self.rename_map = { **self.rename_map, **dict( - zip(non_intrusive_columns, ["non-intrusives: " + c for c in non_intrusive_columns]) - ) + zip( + non_intrusive_columns, + ["non-intrusives: " + c for c in non_intrusive_columns], + ) + ), } # We idenfiy addresses which are likely to be multi-addresses (i.g are rooms x-y) @@ -818,11 +969,12 @@ class AssetList: # we see instances of "average thermal transmittance" in the description if self.landlord_wall_construction is not None: self.standardised_asset_list[self.landlord_wall_construction] = np.where( - self.standardised_asset_list[self.landlord_wall_construction].str.lower().str.contains( - "average thermal transmittance" - ) == True, - "new build - average thermal transmittance", self.standardised_asset_list[self.landlord_wall_construction] + .str.lower() + .str.contains("average thermal transmittance") + == True, + "new build - average thermal transmittance", + self.standardised_asset_list[self.landlord_wall_construction], ) else: # We want to make sure that we have a column for wall construction @@ -837,15 +989,21 @@ class AssetList: # We attempt to process the year built column if self.landlord_year_built is not None: # We check if we have a datetime - year built has not been renamed - if isinstance(self.standardised_asset_list[self.landlord_year_built].iloc[0], datetime): + if isinstance( + self.standardised_asset_list[self.landlord_year_built].iloc[0], datetime + ): # We treat any string columns - with common values we see self.standardised_asset_list[self.landlord_year_built] = ( - self.standardised_asset_list[self.landlord_year_built].replace(self.DATETIME_REMAP) + self.standardised_asset_list[self.landlord_year_built].replace( + self.DATETIME_REMAP + ) ) no_data_codes = {"No Data": None} self.standardised_asset_list[self.landlord_year_built] = ( - self.standardised_asset_list[self.landlord_year_built].replace(no_data_codes) + self.standardised_asset_list[self.landlord_year_built].replace( + no_data_codes + ) ) self.standardised_asset_list[self.landlord_year_built] = pd.to_datetime( @@ -866,7 +1024,7 @@ class AssetList: "UNKNOWN", "This cell has an external reference that can't be shown or edited. Editing this cell will " "remove the external reference.", - 0 + 0, } if pd.isnull(date_str) or date_str in known_errors: @@ -889,7 +1047,9 @@ class AssetList: return int(match.group(1)) # Find all 4-digit years in string - years = [int(y) for y in re.findall(r"\b(?:19|20)\d{2}\b", date_str)] + years = [ + int(y) for y in re.findall(r"\b(?:19|20)\d{2}\b", date_str) + ] if years: return max(years) # Return most recent year @@ -898,38 +1058,42 @@ class AssetList: if len(numeric_str) == 4 and numeric_str.isdigit(): return int(numeric_str) - raise NotImplementedError(f"Unhandled format for year built, value is {date_str} - implement me") + raise NotImplementedError( + f"Unhandled format for year built, value is {date_str} - implement me" + ) - self.standardised_asset_list[self.landlord_year_built] = self.standardised_asset_list[ - self.landlord_year_built - ].apply(extract_year) + self.standardised_asset_list[self.landlord_year_built] = ( + self.standardised_asset_list[self.landlord_year_built].apply( + extract_year + ) + ) # We now create standard lookups to_remap = { self.landlord_property_type: { "standard_values": property_type_mappings.STANDARD_PROPERTY_TYPES, - "standard_map": property_type_mappings.PROPERTY_MAPPING + "standard_map": property_type_mappings.PROPERTY_MAPPING, }, self.landlord_built_form: { "standard_values": built_form_mappings.STANDARD_BUILT_FORMS, - "standard_map": built_form_mappings.BUILT_FORM_MAPPINGS + "standard_map": built_form_mappings.BUILT_FORM_MAPPINGS, }, self.landlord_wall_construction: { "standard_values": walls_mappings.STANDARD_WALL_CONSTRUCTIONS, - "standard_map": walls_mappings.WALL_CONSTRUCTION_MAPPINGS + "standard_map": walls_mappings.WALL_CONSTRUCTION_MAPPINGS, }, self.landlord_heating_system: { "standard_values": heating_mappings.STANDARD_HEATING_SYSTEMS, - "standard_map": heating_mappings.HEATING_MAPPINGS + "standard_map": heating_mappings.HEATING_MAPPINGS, }, self.landlord_existing_pv: { "standard_values": existing_pv_mappings.STANDARD_EXISTING_PV, - "standard_map": existing_pv_mappings.EXISTING_PV_MAPPINGS + "standard_map": existing_pv_mappings.EXISTING_PV_MAPPINGS, }, self.landlord_roof_construction: { "standard_values": roof_mappings.STANDARD_ROOF_CONSTRUCTIONS, - "standard_map": roof_mappings.ROOF_CONSTRUCTION_MAPPINGS - } + "standard_map": roof_mappings.ROOF_CONSTRUCTION_MAPPINGS, + }, } # Keep just entries where the key is not None to_remap = {k: v for k, v in to_remap.items() if k is not None} @@ -937,11 +1101,18 @@ class AssetList: for variable, config in to_remap.items(): logger.info("Standardising variable: %s", variable) # Strip each of these columns - self.standardised_asset_list[variable] = self.standardised_asset_list[variable].str.strip() + self.standardised_asset_list[variable] = self.standardised_asset_list[ + variable + ].str.strip() values_to_remap = self.standardised_asset_list[variable].unique() # We want to map this to our standardised list of property types we're interested in - remapper = DataRemapper(standard_values=config["standard_values"], standard_map=config["standard_map"]) - remap_dictionary = remapper.standardize_list(values_to_remap=values_to_remap.tolist()) + remapper = DataRemapper( + standard_values=config["standard_values"], + standard_map=config["standard_map"], + ) + remap_dictionary = remapper.standardize_list( + values_to_remap=values_to_remap.tolist() + ) self.variable_mappings[variable] = remap_dictionary # We now print out the variable mappings, which can be reviewed by the user, before the final standardised @@ -963,9 +1134,12 @@ class AssetList: if self.phase: # We filter on just the properties that have had an inspection - if self.new_format_non_insturives_present_v2 or self.solar_non_intrusives_present: + if ( + self.new_format_non_insturives_present_v2 + or self.solar_non_intrusives_present + ): self.standardised_asset_list = self.standardised_asset_list[ - ~self.standardised_asset_list['NAME OF SURVEYOR'].isin( + ~self.standardised_asset_list["NAME OF SURVEYOR"].isin( ["YET TO BE SURVEYED", "", None] ) ] @@ -974,7 +1148,9 @@ class AssetList: ] else: self.standardised_asset_list = self.standardised_asset_list[ - ~self.standardised_asset_list['Surveyors Name'].isin(["YET TO BE SURVEYED"]) + ~self.standardised_asset_list["Surveyors Name"].isin( + ["YET TO BE SURVEYED"] + ) ] if not self.variable_mappings and not override_empty_mappings: @@ -986,7 +1162,9 @@ class AssetList: self.standardised_asset_list[variable + "_original_from_landlord"] = ( self.standardised_asset_list[variable].copy() ) - self.standardised_asset_list[variable] = self.standardised_asset_list[variable].map(mapping) + self.standardised_asset_list[variable] = self.standardised_asset_list[ + variable + ].map(mapping) if self.standardised_asset_list[self.DOMNA_PROPERTY_ID].duplicated().sum(): # Drop the dupes @@ -998,13 +1176,28 @@ class AssetList: # Keep a record of duplicates self.duplicated_addresses = self.standardised_asset_list[ self.standardised_asset_list[self.DOMNA_PROPERTY_ID].duplicated() - ][[self.DOMNA_PROPERTY_ID, self.full_address_colname, self.address1_colname, self.postcode_colname]].copy() + ][ + [ + self.DOMNA_PROPERTY_ID, + self.full_address_colname, + self.address1_colname, + self.postcode_colname, + ] + ].copy() df = self.standardised_asset_list[ self.standardised_asset_list[self.DOMNA_PROPERTY_ID].isin( - self.duplicated_addresses[self.DOMNA_PROPERTY_ID]) - ][[self.landlord_property_id, self.DOMNA_PROPERTY_ID, self.full_address_colname, self.address1_colname, - self.postcode_colname]].copy() + self.duplicated_addresses[self.DOMNA_PROPERTY_ID] + ) + ][ + [ + self.landlord_property_id, + self.DOMNA_PROPERTY_ID, + self.full_address_colname, + self.address1_colname, + self.postcode_colname, + ] + ].copy() df = df.sort_values(by=[self.DOMNA_PROPERTY_ID]) @@ -1020,13 +1213,14 @@ class AssetList: k + "_original_from_landlord" for k in self.variable_mappings.keys() ] - self.standardised_asset_list = self.standardised_asset_list[self.keep_variables].rename( - columns=self.rename_map - ) + self.standardised_asset_list = self.standardised_asset_list[ + self.keep_variables + ].rename(columns=self.rename_map) # We fill any standard columns that are not in the data because they were not provided by the landlord missing_variables = [ - v for v in [ + v + for v in [ self.STANDARD_EXISTING_PV, self.STANDARD_HEATING_SYSTEM, self.STANDARD_UPRN, @@ -1035,7 +1229,8 @@ class AssetList: self.STANDARD_WALL_CONSTRUCTION, self.STANDARD_HEATING_SYSTEM, self.STANDARD_BLOCK_REFERENCE, - ] if v not in self.standardised_asset_list.columns + ] + if v not in self.standardised_asset_list.columns ] for v in missing_variables: self.standardised_asset_list[v] = None @@ -1050,13 +1245,13 @@ class AssetList: self.standardised_asset_list[self.STANDARD_SAP] = ( self.standardised_asset_list[self.STANDARD_SAP] .astype(str) - .str.replace('\xa0', ' ', regex=False) + .str.replace("\xa0", " ", regex=False) .str.strip() ) self.standardised_asset_list[self.STANDARD_SAP] = np.where( self.standardised_asset_list[self.STANDARD_SAP] == "", None, - self.standardised_asset_list[self.STANDARD_SAP] + self.standardised_asset_list[self.STANDARD_SAP], ) self.standardised_asset_list[self.STANDARD_SAP] = ( self.standardised_asset_list[self.STANDARD_SAP].astype(float) @@ -1065,10 +1260,13 @@ class AssetList: self.standardised_asset_list[self.STANDARD_SAP] = np.where( self.standardised_asset_list[self.STANDARD_SAP] == 0, None, - self.standardised_asset_list[self.STANDARD_SAP] + self.standardised_asset_list[self.STANDARD_SAP], ) - has_blocks_of_flats = (self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "block of flats").sum() + has_blocks_of_flats = ( + self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] + == "block of flats" + ).sum() # Perform block splitting, ahead of fetching the EPC data # If we blocks of flats, without a landlord block reference, we create this @@ -1083,13 +1281,12 @@ class AssetList: :return: """ if self.DOMNA_PROPERTY_ID not in df.columns: - raise ValueError(f"Dataframe must contain the column {self.DOMNA_PROPERTY_ID}") + raise ValueError( + f"Dataframe must contain the column {self.DOMNA_PROPERTY_ID}" + ) if df[self.DOMNA_PROPERTY_ID].duplicated().sum(): - df = df.drop_duplicates( - subset=[self.DOMNA_PROPERTY_ID], - keep="first" - ) + df = df.drop_duplicates(subset=[self.DOMNA_PROPERTY_ID], keep="first") self.standardised_asset_list = self.standardised_asset_list.merge( df, how="left", on=self.DOMNA_PROPERTY_ID @@ -1098,9 +1295,14 @@ class AssetList: def extract_attributes(self, pull_epc=True): # Used to extracty the typical attributes that we use to identify viable work - self.standardised_asset_list[self.ATTRIBUTE_HAS_SOLAR] = ( - self.standardised_asset_list[self.FIND_EPC_DATA_NAMES["Solar photovoltaics"]] | - ~self.standardised_asset_list[self.EPC_API_DATA_NAMES["photo-supply"]].isin(["0.0", 0, None, "", np.nan]) + self.standardised_asset_list[ + self.ATTRIBUTE_HAS_SOLAR + ] = self.standardised_asset_list[ + self.FIND_EPC_DATA_NAMES["Solar photovoltaics"] + ] | ~self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["photo-supply"] + ].isin( + ["0.0", 0, None, "", np.nan] ) accepted_epc_property_types = ["House", "Flat", "Bungalow", "Maisonette"] @@ -1109,83 +1311,127 @@ class AssetList: # 1) Take the property type provided by the HA themselves # 2) In absence of that, take the EPC property type # 3) Otherwise use None - self.standardised_asset_list[self.ATTRIBUTE_NUMBER_OF_FLOORS] = self.standardised_asset_list.apply( - lambda x: estimate_number_of_floors( - property_type=( - str(x[self.STANDARD_PROPERTY_TYPE]).title() if - str(x[self.STANDARD_PROPERTY_TYPE]).title() in accepted_epc_property_types else ( - x[self.EPC_API_DATA_NAMES["property-type"]] if not - pd.isnull(x[self.EPC_API_DATA_NAMES["property-type"]]) else None + self.standardised_asset_list[self.ATTRIBUTE_NUMBER_OF_FLOORS] = ( + self.standardised_asset_list.apply( + lambda x: estimate_number_of_floors( + property_type=( + str(x[self.STANDARD_PROPERTY_TYPE]).title() + if str(x[self.STANDARD_PROPERTY_TYPE]).title() + in accepted_epc_property_types + else ( + x[self.EPC_API_DATA_NAMES["property-type"]] + if not pd.isnull( + x[self.EPC_API_DATA_NAMES["property-type"]] + ) + else None + ) ) - ) - ), - axis=1 + ), + axis=1, + ) ) self.standardised_asset_list[self.EPC_API_DATA_NAMES["total-floor-area"]] = ( - self.standardised_asset_list[self.EPC_API_DATA_NAMES["total-floor-area"]].astype(float) + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["total-floor-area"] + ].astype(float) ) # Replace "" value with None - self.standardised_asset_list[self.EPC_API_DATA_NAMES["number-habitable-rooms"]] = ( - self.standardised_asset_list[self.EPC_API_DATA_NAMES["number-habitable-rooms"]].replace("", None) + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["number-habitable-rooms"] + ] = self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["number-habitable-rooms"] + ].replace( + "", None ) - self.standardised_asset_list[self.EPC_API_DATA_NAMES["number-habitable-rooms"]] = ( - self.standardised_asset_list[self.EPC_API_DATA_NAMES["number-habitable-rooms"]].astype(float) + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["number-habitable-rooms"] + ] = self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["number-habitable-rooms"] + ].astype( + float ) # Estimate the perimeter # Handle funky edge case - self.standardised_asset_list[self.EPC_API_DATA_NAMES["total-floor-area"]] = np.where( - (self.standardised_asset_list[self.EPC_API_DATA_NAMES["total-floor-area"]] == 0), - self.standardised_asset_list[self.EPC_API_DATA_NAMES["total-floor-area"]].mean(), - self.standardised_asset_list[self.EPC_API_DATA_NAMES["total-floor-area"]] - ) - - self.standardised_asset_list[self.ATTRIBUTE_ESTIMATED_PERIMETER] = self.standardised_asset_list.apply( - lambda x: estimate_perimeter( - floor_area=x[self.EPC_API_DATA_NAMES["total-floor-area"]] / x[self.ATTRIBUTE_NUMBER_OF_FLOORS], - num_rooms=x[self.EPC_API_DATA_NAMES["number-habitable-rooms"]] / x[self.ATTRIBUTE_NUMBER_OF_FLOORS], - ), axis=1 - ) - - self.standardised_asset_list[self.ATTRIBUTE_HEAT_LOSS_AREA] = self.standardised_asset_list.apply( - lambda x: estimate_external_wall_area( - num_floors=x[self.ATTRIBUTE_NUMBER_OF_FLOORS], - floor_height=( - float(x[self.EPC_API_DATA_NAMES["floor-height"]]) if - not pd.isnull(x[self.EPC_API_DATA_NAMES["floor-height"]]) else 2.5 + self.standardised_asset_list[self.EPC_API_DATA_NAMES["total-floor-area"]] = ( + np.where( + ( + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["total-floor-area"] + ] + == 0 ), - perimeter=x[self.ATTRIBUTE_ESTIMATED_PERIMETER], - built_form=x[self.EPC_API_DATA_NAMES["built-form"]] - ), - axis=1 + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["total-floor-area"] + ].mean(), + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["total-floor-area"] + ], + ) ) - + + self.standardised_asset_list[self.ATTRIBUTE_ESTIMATED_PERIMETER] = ( + self.standardised_asset_list.apply( + lambda x: estimate_perimeter( + floor_area=x[self.EPC_API_DATA_NAMES["total-floor-area"]] + / x[self.ATTRIBUTE_NUMBER_OF_FLOORS], + num_rooms=x[self.EPC_API_DATA_NAMES["number-habitable-rooms"]] + / x[self.ATTRIBUTE_NUMBER_OF_FLOORS], + ), + axis=1, + ) + ) + + self.standardised_asset_list[self.ATTRIBUTE_HEAT_LOSS_AREA] = ( + self.standardised_asset_list.apply( + lambda x: estimate_external_wall_area( + num_floors=x[self.ATTRIBUTE_NUMBER_OF_FLOORS], + floor_height=( + float(x[self.EPC_API_DATA_NAMES["floor-height"]]) + if not pd.isnull(x[self.EPC_API_DATA_NAMES["floor-height"]]) + else 2.5 + ), + perimeter=x[self.ATTRIBUTE_ESTIMATED_PERIMETER], + built_form=x[self.EPC_API_DATA_NAMES["built-form"]], + ), + axis=1, + ) + ) + col = self.EPC_API_DATA_NAMES["roof-description"] - self.standardised_asset_list[self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS] = self.standardised_asset_list.apply( - lambda x: RoofAttributes(description=x[col]).process()[ - "insulation_thickness"] if not pd.isnull( - x[col]) else None, - axis=1 + self.standardised_asset_list[self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS] = ( + self.standardised_asset_list.apply( + lambda x: ( + RoofAttributes(description=x[col]).process()["insulation_thickness"] + if not pd.isnull(x[col]) + else None + ), + axis=1, + ) ) - self.standardised_asset_list[self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS] = ( - self.standardised_asset_list[self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS].str.replace("+", "") + self.standardised_asset_list[ + self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS + ].str.replace("+", "") ) # We produce some additional fields # 1) Is the SAP rating below C75 self.standardised_asset_list[self.ATTRIBUTE_SAP_THRESHOLD_AND_BELOW] = ( - self.standardised_asset_list[self.EPC_API_DATA_NAMES["current-energy-efficiency"]].astype(float) <= - self.FILLED_CAVITY_SAP_THRESHOLD + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["current-energy-efficiency"] + ].astype(float) + <= self.FILLED_CAVITY_SAP_THRESHOLD ) # 2) Flag anything where the EPC is older than 5 years self.standardised_asset_list[self.ATTRIBUTE_EPC_PRE_YEAR_THRESHOLD] = ( pd.to_datetime( self.standardised_asset_list[self.EPC_API_DATA_NAMES["inspection-date"]] - ).dt.year < self.EPC_YEAR_THRESHOLD + ).dt.year + < self.EPC_YEAR_THRESHOLD ) self.process_age_band() @@ -1195,30 +1441,37 @@ class AssetList: for _, x in self.standardised_asset_list.iterrows(): if pd.isnull(x[self.EPC_API_DATA_NAMES["construction-age-band"]]) or ( - x[self.EPC_API_DATA_NAMES["construction-age-band"]] in Definitions.DATA_ANOMALY_MATCHES + x[self.EPC_API_DATA_NAMES["construction-age-band"]] + in Definitions.DATA_ANOMALY_MATCHES ): processed_age_band.append( { self.DOMNA_PROPERTY_ID: x[self.DOMNA_PROPERTY_ID], "epc_year_lower_bound": None, "epc_year_upper_bound": None, - "does_age_band_match_epc_age_band": "No EPC Age Band" + "does_age_band_match_epc_age_band": "No EPC Age Band", } ) continue # We exatract the upper and lower bounds if x[self.EPC_API_DATA_NAMES["construction-age-band"]] in [ - "England and Wales: 2007 onwards", "England and Wales: 2012 onwards" + "England and Wales: 2007 onwards", + "England and Wales: 2012 onwards", ]: - year_lower_bound = 2007 if x[self.EPC_API_DATA_NAMES[ - "construction-age-band"]] == "England and Wales: 2007 onwards" else 2012 + year_lower_bound = ( + 2007 + if x[self.EPC_API_DATA_NAMES["construction-age-band"]] + == "England and Wales: 2007 onwards" + else 2012 + ) if pd.isnull(x[self.STANDARD_YEAR_BUILT]): age_band_matches = "No Year Built From Landlord" else: age_band_matches = ( - "EPC Age Band Matches Year Built" if x[self.STANDARD_YEAR_BUILT] >= year_lower_bound + "EPC Age Band Matches Year Built" + if x[self.STANDARD_YEAR_BUILT] >= year_lower_bound else "EPC Age Band is older than Year Built" ) @@ -1227,18 +1480,22 @@ class AssetList: self.DOMNA_PROPERTY_ID: x[self.DOMNA_PROPERTY_ID], "epc_year_lower_bound": year_lower_bound, "epc_year_upper_bound": None, - "does_age_band_match_epc_age_band": age_band_matches + "does_age_band_match_epc_age_band": age_band_matches, } ) continue - if x[self.EPC_API_DATA_NAMES["construction-age-band"]] == "England and Wales: before 1900": + if ( + x[self.EPC_API_DATA_NAMES["construction-age-band"]] + == "England and Wales: before 1900" + ): if pd.isnull(x[self.STANDARD_YEAR_BUILT]): age_band_matches = "No Year Built From Landlord" else: age_band_matches = ( - "EPC Age Band Matches Year Built" if x[self.STANDARD_YEAR_BUILT] < 1900 + "EPC Age Band Matches Year Built" + if x[self.STANDARD_YEAR_BUILT] < 1900 else "EPC Age Band is newer than Year Built" ) @@ -1247,7 +1504,7 @@ class AssetList: self.DOMNA_PROPERTY_ID: x[self.DOMNA_PROPERTY_ID], "epc_year_lower_bound": None, "epc_year_upper_bound": 1899, - "does_age_band_match_epc_age_band": age_band_matches + "does_age_band_match_epc_age_band": age_band_matches, } ) continue @@ -1258,35 +1515,44 @@ class AssetList: age_band_matches = "No Year Built From Landlord" else: age_band_matches = ( - "EPC Age Band Matches Year Built" if x[self.STANDARD_YEAR_BUILT] == int( - x[self.EPC_API_DATA_NAMES["construction-age-band"]] - ) + "EPC Age Band Matches Year Built" + if x[self.STANDARD_YEAR_BUILT] + == int(x[self.EPC_API_DATA_NAMES["construction-age-band"]]) else "EPC Age Band is different from Year Built" ) processed_age_band.append( { self.DOMNA_PROPERTY_ID: x[self.DOMNA_PROPERTY_ID], - "epc_year_lower_bound": int(x[self.EPC_API_DATA_NAMES["construction-age-band"]]), - "epc_year_upper_bound": int(x[self.EPC_API_DATA_NAMES["construction-age-band"]]), - "does_age_band_match_epc_age_band": age_band_matches + "epc_year_lower_bound": int( + x[self.EPC_API_DATA_NAMES["construction-age-band"]] + ), + "epc_year_upper_bound": int( + x[self.EPC_API_DATA_NAMES["construction-age-band"]] + ), + "does_age_band_match_epc_age_band": age_band_matches, } ) continue # Oherwise, we extract the upper and lower bounds - age_band = x[self.EPC_API_DATA_NAMES["construction-age-band"]].split(": ")[1] + age_band = x[self.EPC_API_DATA_NAMES["construction-age-band"]].split(": ")[ + 1 + ] lower_date, upper_date = age_band.split("-") if not x[self.STANDARD_YEAR_BUILT]: age_band_matches = "No Year Built From Landlord" else: age_band_matches = ( - "EPC Age Band Matches Year Built" if (x[self.STANDARD_YEAR_BUILT] >= float(lower_date)) and ( - x[self.STANDARD_YEAR_BUILT] <= float(upper_date) + "EPC Age Band Matches Year Built" + if (x[self.STANDARD_YEAR_BUILT] >= float(lower_date)) + and (x[self.STANDARD_YEAR_BUILT] <= float(upper_date)) + else ( + "EPC Age Band is older than Year Built" + if x[self.STANDARD_YEAR_BUILT] > float(upper_date) + else "EPC Age Band is newer than Year Built" ) - else "EPC Age Band is older than Year Built" if x[self.STANDARD_YEAR_BUILT] > float(upper_date) - else "EPC Age Band is newer than Year Built" ) processed_age_band.append( @@ -1294,7 +1560,7 @@ class AssetList: self.DOMNA_PROPERTY_ID: x[self.DOMNA_PROPERTY_ID], "epc_year_lower_bound": int(lower_date), "epc_year_upper_bound": int(upper_date), - "does_age_band_match_epc_age_band": age_band_matches + "does_age_band_match_epc_age_band": age_band_matches, } ) @@ -1310,34 +1576,54 @@ class AssetList: # We add a SAP category for all work type identification self.standardised_asset_list["SAP Category"] = np.where( ( - (self.standardised_asset_list[self.EPC_API_DATA_NAMES["current-energy-efficiency"]] <= 54) | - (self.standardised_asset_list[self.STANDARD_SAP] <= 54) + ( + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["current-energy-efficiency"] + ] + <= 54 + ) + | (self.standardised_asset_list[self.STANDARD_SAP] <= 54) ), "SAP Rating 54 or less", np.where( ( - (self.standardised_asset_list[self.EPC_API_DATA_NAMES["current-energy-efficiency"]] <= 68) | - (self.standardised_asset_list[self.STANDARD_SAP] <= 68) + ( + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["current-energy-efficiency"] + ] + <= 68 + ) + | (self.standardised_asset_list[self.STANDARD_SAP] <= 68) ), "SAP Rating 55-68", np.where( ( ( - self.standardised_asset_list[self.EPC_API_DATA_NAMES["current-energy-efficiency"]] <= - self.EMPTY_CAVITY_SAP_THRESHOLD - ) | (self.standardised_asset_list[self.STANDARD_SAP] <= self.EMPTY_CAVITY_SAP_THRESHOLD) + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["current-energy-efficiency"] + ] + <= self.EMPTY_CAVITY_SAP_THRESHOLD + ) + | ( + self.standardised_asset_list[self.STANDARD_SAP] + <= self.EMPTY_CAVITY_SAP_THRESHOLD + ) ), f"SAP Rating 69-{self.EMPTY_CAVITY_SAP_THRESHOLD}", - f"SAP Rating {self.EMPTY_CAVITY_SAP_THRESHOLD + 1} or more" + f"SAP Rating {self.EMPTY_CAVITY_SAP_THRESHOLD + 1} or more", ), - ) + ), ) self.standardised_asset_list["SAP Category"] = np.where( - pd.isnull(self.standardised_asset_list[self.STANDARD_SAP]) & - pd.isnull(self.standardised_asset_list[self.EPC_API_DATA_NAMES["current-energy-efficiency"]]), + pd.isnull(self.standardised_asset_list[self.STANDARD_SAP]) + & pd.isnull( + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["current-energy-efficiency"] + ] + ), "SAP Unknown", - self.standardised_asset_list["SAP Category"] + self.standardised_asset_list["SAP Category"], ) else: @@ -1345,55 +1631,81 @@ class AssetList: # We break into 4 categories (54 or less, 55-68, 69-74, 75 or more) self.standardised_asset_list["SAP Category"] = np.where( - (self.standardised_asset_list[self.EPC_API_DATA_NAMES["current-energy-efficiency"]] <= 54), + ( + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["current-energy-efficiency"] + ] + <= 54 + ), "SAP Rating 54 or less", np.where( - (self.standardised_asset_list[self.EPC_API_DATA_NAMES["current-energy-efficiency"]] <= 68), + ( + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["current-energy-efficiency"] + ] + <= 68 + ), "SAP Rating 55-68", np.where( ( - self.standardised_asset_list[self.EPC_API_DATA_NAMES["current-energy-efficiency"]] <= - self.EMPTY_CAVITY_SAP_THRESHOLD + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["current-energy-efficiency"] + ] + <= self.EMPTY_CAVITY_SAP_THRESHOLD ), f"SAP Rating 69-{self.EMPTY_CAVITY_SAP_THRESHOLD}", - f"SAP Rating {self.EMPTY_CAVITY_SAP_THRESHOLD + 1} or more" + f"SAP Rating {self.EMPTY_CAVITY_SAP_THRESHOLD + 1} or more", ), - ) + ), ) self.standardised_asset_list["SAP Category"] = np.where( - pd.isnull(self.standardised_asset_list[self.EPC_API_DATA_NAMES["current-energy-efficiency"]]), + pd.isnull( + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["current-energy-efficiency"] + ] + ), "SAP Unknown", - self.standardised_asset_list["SAP Category"] + self.standardised_asset_list["SAP Category"], ) # Before we being, we identify if a property has solar already as we use this # for identifying cavity jobs if self.non_intrusives_present and not self.old_format_non_intrusives_present: - if self.new_format_non_insturives_present_v2 or self.solar_non_intrusives_present: + if ( + self.new_format_non_insturives_present_v2 + or self.solar_non_intrusives_present + ): existing_solar_non_intrusives_check = ( - self.standardised_asset_list["non-intrusives: ROOF ORIENTATION"].str.strip().isin( - ["ALREADY HAS SOLAR PV", "ALREADY HAS PV"] - ) + self.standardised_asset_list["non-intrusives: ROOF ORIENTATION"] + .str.strip() + .isin(["ALREADY HAS SOLAR PV", "ALREADY HAS PV"]) ) else: existing_solar_non_intrusives_check = ( - self.standardised_asset_list["non-intrusives: PV, ACCESS ISSUE, SEE NOTES"] == "SOLAR PV ON ROOF" + self.standardised_asset_list[ + "non-intrusives: PV, ACCESS ISSUE, SEE NOTES" + ] + == "SOLAR PV ON ROOF" ) elif self.old_format_non_intrusives_present: existing_solar_non_intrusives_check = ( - self.standardised_asset_list["non-intrusives: WFT Findings"].str.lower().str.strip().isin( - ["solar pv on roof"] - ) + self.standardised_asset_list["non-intrusives: WFT Findings"] + .str.lower() + .str.strip() + .isin(["solar pv on roof"]) ) else: # We don't have an indication existing_solar_non_intrusives_check = False self.standardised_asset_list["property_has_solar"] = ( - (self.standardised_asset_list[self.STANDARD_EXISTING_PV] == "already has PV") | - existing_solar_non_intrusives_check | - (self.standardised_asset_list[self.ATTRIBUTE_HAS_SOLAR]) + ( + self.standardised_asset_list[self.STANDARD_EXISTING_PV] + == "already has PV" + ) + | existing_solar_non_intrusives_check + | (self.standardised_asset_list[self.ATTRIBUTE_HAS_SOLAR]) ) # If we have non-intrusives completed, we can use this to identify work types @@ -1407,25 +1719,41 @@ class AssetList: if self.non_intrusives_present: if self.new_format_non_insturives_present_v2: non_intrusives_wall_filter = ( - (self.standardised_asset_list['non-intrusives: Construction'] == "CAVITY") & - self.standardised_asset_list['non-intrusives: Insulated'].isin(["EMPTY", "PARTIAL", "EMPTY CAVITY"]) + self.standardised_asset_list["non-intrusives: Construction"] + == "CAVITY" + ) & self.standardised_asset_list["non-intrusives: Insulated"].isin( + ["EMPTY", "PARTIAL", "EMPTY CAVITY"] ) else: non_intrusives_wall_filter = ( - (self.standardised_asset_list['non-intrusives: Construction'] == "CAVITY") & - self.standardised_asset_list['non-intrusives: Insulated'].isin(["EMPTY", "PARTIAL"]) + self.standardised_asset_list["non-intrusives: Construction"] + == "CAVITY" + ) & self.standardised_asset_list["non-intrusives: Insulated"].isin( + ["EMPTY", "PARTIAL"] ) elif self.old_format_non_intrusives_present: - non_intrusives_wall_filter = ( - self.standardised_asset_list['non-intrusives: WFT Findings'].str.lower().str.strip().isin( - ["empty cavity", "partial fill", "empty", "EMPTY CAVITY 70MM", "partial", "empty cav"] - ) | ( - ( - self.standardised_asset_list['non-intrusives: WFT Findings'] - .str.lower().str.strip().str.contains("empty cavity|partial fill") & - ~self.standardised_asset_list['non-intrusives: WFT Findings'] - .astype(str).str.lower().str.strip().str.contains("major access issues") - ) + non_intrusives_wall_filter = self.standardised_asset_list[ + "non-intrusives: WFT Findings" + ].str.lower().str.strip().isin( + [ + "empty cavity", + "partial fill", + "empty", + "EMPTY CAVITY 70MM", + "partial", + "empty cav", + ] + ) | ( + ( + self.standardised_asset_list["non-intrusives: WFT Findings"] + .str.lower() + .str.strip() + .str.contains("empty cavity|partial fill") + & ~self.standardised_asset_list["non-intrusives: WFT Findings"] + .astype(str) + .str.lower() + .str.strip() + .str.contains("major access issues") ) ) else: @@ -1433,11 +1761,17 @@ class AssetList: non_intrusives_wall_filter = False if self.landlord_year_built is None: - year_built_filter = self.standardised_asset_list["epc_year_upper_bound"] <= self.EMPTY_CAVITY_YEAR_THRESHOLD + year_built_filter = ( + self.standardised_asset_list["epc_year_upper_bound"] + <= self.EMPTY_CAVITY_YEAR_THRESHOLD + ) else: year_built_filter = ( - (self.standardised_asset_list[self.STANDARD_YEAR_BUILT] <= self.EMPTY_CAVITY_YEAR_THRESHOLD) | - (self.standardised_asset_list["epc_year_upper_bound"] <= self.EMPTY_CAVITY_YEAR_THRESHOLD) + self.standardised_asset_list[self.STANDARD_YEAR_BUILT] + <= self.EMPTY_CAVITY_YEAR_THRESHOLD + ) | ( + self.standardised_asset_list["epc_year_upper_bound"] + <= self.EMPTY_CAVITY_YEAR_THRESHOLD ) # Criteria: @@ -1446,74 +1780,118 @@ class AssetList: # The EPC year is before 2002 # We also flag where the property has solar on the roof, because this is a signal of a high EPC rating self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] = ( - (~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"])) & - non_intrusives_wall_filter & - year_built_filter & ( - ~self.standardised_asset_list["property_has_solar"] + ~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin( + ["bedsit"] + ) ) + & non_intrusives_wall_filter + & year_built_filter + & (~self.standardised_asset_list["property_has_solar"]) ) - self.standardised_asset_list["non_intrusive_indicates_empty_cavity_has_solar"] = ( - ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] & - (~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"])) & - non_intrusives_wall_filter & - year_built_filter & - ( + self.standardised_asset_list[ + "non_intrusive_indicates_empty_cavity_has_solar" + ] = ( + ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] + & ( + ~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin( + ["bedsit"] + ) + ) + & non_intrusives_wall_filter + & year_built_filter + & ( # If the property has solar, there's a chance it won't qualify self.standardised_asset_list["property_has_solar"] ) ) # We also add a filter on anything that was generally identified by the non-intrusives - self.standardised_asset_list["non_intrusive_indicates_empty_cavity_no_year_filter"] = ( - ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] & - ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity_has_solar"] & - (~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"])) & - non_intrusives_wall_filter + self.standardised_asset_list[ + "non_intrusive_indicates_empty_cavity_no_year_filter" + ] = ( + ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] + & ~self.standardised_asset_list[ + "non_intrusive_indicates_empty_cavity_has_solar" + ] + & ( + ~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin( + ["bedsit"] + ) + ) + & non_intrusives_wall_filter ) - if (not self.non_intrusives_eligibility) and (not self.old_format_non_intrusives_present): + if (not self.non_intrusives_eligibility) and ( + not self.old_format_non_intrusives_present + ): # If we have NO inspections data, we capture all of the wall types and don't filter on age of the EPC self.standardised_asset_list["epc_indicates_empty_cavity"] = ( - self.standardised_asset_list[self.EPC_API_DATA_NAMES["walls-description"]].str.lower().isin( - self.EPC_NO_WALL_INSULATION_DESCRIPTIONS - ) & ( - self.standardised_asset_list["epc_year_upper_bound"] <= self.EMPTY_CAVITY_YEAR_THRESHOLD - ) & ( - ~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"]) + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["walls-description"] + ] + .str.lower() + .isin(self.EPC_NO_WALL_INSULATION_DESCRIPTIONS) + & ( + self.standardised_asset_list["epc_year_upper_bound"] + <= self.EMPTY_CAVITY_YEAR_THRESHOLD + ) + & ( + ~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin( + ["bedsit"] + ) ) ) else: self.standardised_asset_list["epc_indicates_empty_cavity"] = ( - self.standardised_asset_list[self.EPC_API_DATA_NAMES["walls-description"]].str.lower().isin( - self.EPC_NO_WALL_INSULATION_DESCRIPTIONS - ) & ( - self.standardised_asset_list["epc_year_upper_bound"] <= self.EMPTY_CAVITY_YEAR_THRESHOLD - ) & ( - ~self.standardised_asset_list[self.ATTRIBUTE_EPC_PRE_YEAR_THRESHOLD] - ) & ( - ~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"]) + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["walls-description"] + ] + .str.lower() + .isin(self.EPC_NO_WALL_INSULATION_DESCRIPTIONS) + & ( + self.standardised_asset_list["epc_year_upper_bound"] + <= self.EMPTY_CAVITY_YEAR_THRESHOLD + ) + & (~self.standardised_asset_list[self.ATTRIBUTE_EPC_PRE_YEAR_THRESHOLD]) + & ( + ~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin( + ["bedsit"] + ) ) ) self.standardised_asset_list["landlord_data_indicates_empty_cavity"] = ( - self.standardised_asset_list[self.STANDARD_WALL_CONSTRUCTION].isin(["uninsulated cavity"]) & - ( - (self.standardised_asset_list[self.STANDARD_YEAR_BUILT] <= self.EMPTY_CAVITY_YEAR_THRESHOLD) | - (self.standardised_asset_list["epc_year_upper_bound"] <= self.EMPTY_CAVITY_YEAR_THRESHOLD) - ) & ( - ~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"]) + self.standardised_asset_list[self.STANDARD_WALL_CONSTRUCTION].isin( + ["uninsulated cavity"] + ) + & ( + ( + self.standardised_asset_list[self.STANDARD_YEAR_BUILT] + <= self.EMPTY_CAVITY_YEAR_THRESHOLD + ) + | ( + self.standardised_asset_list["epc_year_upper_bound"] + <= self.EMPTY_CAVITY_YEAR_THRESHOLD + ) + ) + & ( + ~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin( + ["bedsit"] + ) ) ) # Finally, we create a flag to indicate that the cavity is empty, based on the criteria above self.standardised_asset_list["cavity_is_empty"] = ( - non_intrusives_wall_filter | - self.standardised_asset_list[self.EPC_API_DATA_NAMES["walls-description"]].str.lower().isin( - self.EPC_NO_WALL_INSULATION_DESCRIPTIONS - ) | - self.standardised_asset_list[self.STANDARD_WALL_CONSTRUCTION].isin(["uninsulated cavity"]) + non_intrusives_wall_filter + | self.standardised_asset_list[self.EPC_API_DATA_NAMES["walls-description"]] + .str.lower() + .isin(self.EPC_NO_WALL_INSULATION_DESCRIPTIONS) + | self.standardised_asset_list[self.STANDARD_WALL_CONSTRUCTION].isin( + ["uninsulated cavity"] + ) ) ###################################################### @@ -1524,127 +1902,211 @@ class AssetList: if self.non_intrusives_present: extraction_wall_filter = ( - (self.standardised_asset_list["non-intrusives: Construction"] == "CAVITY") & - (self.standardised_asset_list["non-intrusives: Insulated"].isin(["RETRO DRILLED", "FILLED AT BUILD"])) & - (~self.standardised_asset_list['non-intrusives: Material'].isin( - ["GREY LOOSE BEAD", "COMPACTED BEAD", "FIBRE BATT NO CAVITY", "EMPTY NARROW BELOW 30mm"] - )) + ( + self.standardised_asset_list["non-intrusives: Construction"] + == "CAVITY" + ) + & ( + self.standardised_asset_list["non-intrusives: Insulated"].isin( + ["RETRO DRILLED", "FILLED AT BUILD"] + ) + ) + & ( + ~self.standardised_asset_list["non-intrusives: Material"].isin( + [ + "GREY LOOSE BEAD", + "COMPACTED BEAD", + "FIBRE BATT NO CAVITY", + "EMPTY NARROW BELOW 30mm", + ] + ) + ) ) if self.non_intrusives_eligibility: # If we have the eligibility column, we check if the wall is eligible extraction_wall_filter = ( - extraction_wall_filter & - ~self.standardised_asset_list["non-intrusives: Eligibility (Red/Yellow/Green)"].isin( - ["RED"] - ) + extraction_wall_filter + & ~self.standardised_asset_list[ + "non-intrusives: Eligibility (Red/Yellow/Green)" + ].isin(["RED"]) ) - self.standardised_asset_list["non_intrusive_indicates_cavity_extraction"] = ( - extraction_wall_filter & year_built_filter - ) - self.standardised_asset_list["non_intrusive_indicates_cavity_extraction_no_year_filter"] = ( - extraction_wall_filter & ~year_built_filter - ) + self.standardised_asset_list[ + "non_intrusive_indicates_cavity_extraction" + ] = (extraction_wall_filter & year_built_filter) + self.standardised_asset_list[ + "non_intrusive_indicates_cavity_extraction_no_year_filter" + ] = (extraction_wall_filter & ~year_built_filter) elif self.old_format_non_intrusives_present: print("Review these categories!!!!") extraction_wall_filter = ( - self.standardised_asset_list['non-intrusives: WFT Findings'].str.lower().str.strip().isin( + self.standardised_asset_list["non-intrusives: WFT Findings"] + .str.lower() + .str.strip() + .isin( [ - 'blown in yellow wool', 'retro drilled & filled', 'white fibre from build', - 'foam filled from build', 'retro drilled gas in block', 'block in rock wool', 'rdf / tilehung', - 'fibre from build', 'blown in rock wool', 'rdf / tile hung', 'retro drilled', - 'rock wool from build', 'part rendered retro drilled', 'white fibtr from build.', - 'retro drilled and filled', 'blown in white wool', 'blown in yellow fibre from build', 'rdf', - 'polybead', 'foam filled', 'blown in white bead from build', 'blown in yellow fibre', - 'retro drilled det', 'blown in rockwool', 'retro drilled det empty cav', 'retro drilled end', - 'retro filled extension', 'retro filled', 'foam' + "blown in yellow wool", + "retro drilled & filled", + "white fibre from build", + "foam filled from build", + "retro drilled gas in block", + "block in rock wool", + "rdf / tilehung", + "fibre from build", + "blown in rock wool", + "rdf / tile hung", + "retro drilled", + "rock wool from build", + "part rendered retro drilled", + "white fibtr from build.", + "retro drilled and filled", + "blown in white wool", + "blown in yellow fibre from build", + "rdf", + "polybead", + "foam filled", + "blown in white bead from build", + "blown in yellow fibre", + "retro drilled det", + "blown in rockwool", + "retro drilled det empty cav", + "retro drilled end", + "retro filled extension", + "retro filled", + "foam", ] ) ) - self.standardised_asset_list["non_intrusive_indicates_cavity_extraction"] = ( - extraction_wall_filter - ) - self.standardised_asset_list["non_intrusive_indicates_cavity_extraction_no_year_filter"] = False + self.standardised_asset_list[ + "non_intrusive_indicates_cavity_extraction" + ] = extraction_wall_filter + self.standardised_asset_list[ + "non_intrusive_indicates_cavity_extraction_no_year_filter" + ] = False else: - self.standardised_asset_list["non_intrusive_indicates_cavity_extraction"] = False - self.standardised_asset_list["non_intrusive_indicates_cavity_extraction_no_year_filter"] = False + self.standardised_asset_list[ + "non_intrusive_indicates_cavity_extraction" + ] = False + self.standardised_asset_list[ + "non_intrusive_indicates_cavity_extraction_no_year_filter" + ] = False ###################################################### # Solar ###################################################### # Criteria: # Check 1: Does the property have a valid heating system? - self.standardised_asset_list["solar_landlord_data_indicates_correct_heating_system"] = ( - self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM].isin( - [ - "air source heat pump", - "ground source heat pump", - "high heat retention storage heaters", - "electric boiler" - ] - ) + self.standardised_asset_list[ + "solar_landlord_data_indicates_correct_heating_system" + ] = self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM].isin( + [ + "air source heat pump", + "ground source heat pump", + "high heat retention storage heaters", + "electric boiler", + ] ) - self.standardised_asset_list["solar_landlord_data_indicates_needs_heating_upgrade"] = ( - self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM].isin( - ["electric storage heaters", "room heaters", "electric radiators", "no heating", "electric fuel"] - ) + self.standardised_asset_list[ + "solar_landlord_data_indicates_needs_heating_upgrade" + ] = self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM].isin( + [ + "electric storage heaters", + "room heaters", + "electric radiators", + "no heating", + "electric fuel", + ] ) - self.standardised_asset_list["solar_epc_data_indicates_correct_heating_system"] = ( - ( - self.standardised_asset_list[self.EPC_API_DATA_NAMES["mainheat-description"]] - .str.lower().str.contains("air source heat pump|ground source heat pump|boiler and radiators, electric") - ) | ( + self.standardised_asset_list[ + "solar_epc_data_indicates_correct_heating_system" + ] = ( + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["mainheat-description"] + ] + .str.lower() + .str.contains( + "air source heat pump|ground source heat pump|boiler and radiators, electric" + ) + ) | ( + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["mainheat-description"] + ] + .str.lower() + .str.contains("electric storage heaters") + & ( self.standardised_asset_list[ - self.EPC_API_DATA_NAMES["mainheat-description"]].str.lower().str.contains( - "electric storage heaters" - ) & ( - self.standardised_asset_list[self.EPC_API_DATA_NAMES[ - "mainheatcont-description"]] == "Controls for high heat retention storage heaters" - ) + self.EPC_API_DATA_NAMES["mainheatcont-description"] + ] + == "Controls for high heat retention storage heaters" ) ) # If the landlord has given us the heating system, we default to that on heating upgrades. Because of the # poor heating in place, if the EPC indicates that this property had a low efficiency heating system but the # landlord data suggests otherwise (e.g. there's a gas boiler), we default to what the landlord has told us - self.standardised_asset_list["solar_epc_data_indicates_requires_heating_upgrade"] = ( - ( - self.standardised_asset_list[self.EPC_API_DATA_NAMES["mainheat-description"]].str.lower().str.contains( - "electric storage heaters|room heaters" - ) & ( - self.standardised_asset_list[ - self.EPC_API_DATA_NAMES["mainheatcont-description"] - ] != "Controls for high heat retention storage heaters" - ) - ) & ( - ~self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM].isin( - ["district heating", "communal heating", "communal gas boiler"] - ) & ~self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM].astype(str).str.contains("gas ") + self.standardised_asset_list[ + "solar_epc_data_indicates_requires_heating_upgrade" + ] = ( + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["mainheat-description"] + ] + .str.lower() + .str.contains("electric storage heaters|room heaters") + & ( + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["mainheatcont-description"] + ] + != "Controls for high heat retention storage heaters" ) + ) & ( + ~self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM].isin( + ["district heating", "communal heating", "communal gas boiler"] + ) + & ~self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM] + .astype(str) + .str.contains("gas ") ) # Basic check - both of the previous two shouldn't be true simultaneously if ( - self.standardised_asset_list["solar_epc_data_indicates_correct_heating_system"] & - self.standardised_asset_list["solar_epc_data_indicates_requires_heating_upgrade"] + self.standardised_asset_list[ + "solar_epc_data_indicates_correct_heating_system" + ] + & self.standardised_asset_list[ + "solar_epc_data_indicates_requires_heating_upgrade" + ] ).sum(): - logger.info("We have an example of both heating system checks being true - checking known cases") - known_edge_cases = ['Ground source heat pump, radiators, electric, Electric storage heaters'] + logger.info( + "We have an example of both heating system checks being true - checking known cases" + ) + known_edge_cases = [ + "Ground source heat pump, radiators, electric, Electric storage heaters" + ] error_cases = self.standardised_asset_list[ ( - self.standardised_asset_list["solar_epc_data_indicates_correct_heating_system"] & - self.standardised_asset_list["solar_epc_data_indicates_requires_heating_upgrade"] + self.standardised_asset_list[ + "solar_epc_data_indicates_correct_heating_system" + ] + & self.standardised_asset_list[ + "solar_epc_data_indicates_requires_heating_upgrade" + ] ) ] - if all(error_cases[self.EPC_API_DATA_NAMES["mainheat-description"]].isin(known_edge_cases)): + if all( + error_cases[self.EPC_API_DATA_NAMES["mainheat-description"]].isin( + known_edge_cases + ) + ): logger.info("Within known edge cases") else: - raise ValueError("Both heating system checks are true - this should not be possible") + raise ValueError( + "Both heating system checks are true - this should not be possible" + ) # Check 3: Does the property meet the fabric condition # Solar PV installs are subject to the minimum insulation requirements which means: @@ -1663,19 +2125,19 @@ class AssetList: # With this in mind, we look for 2 clases # 1) The property is fully insulated apart from the loft (<200mm insulation) # 2) THe property is fully insulated - self.standardised_asset_list["solar_landlord_walls_insulated"] = ( - self.standardised_asset_list[self.STANDARD_WALL_CONSTRUCTION].isin( - [ - "filled cavity", - "insulated solid brick", - "insulated timber frame", - "uninsulated cavity", - "insulated system built", - "insulated granite or whinstone", - "insulated sandstone or limestone", - "new build - average thermal transmittance" - ] - ) + self.standardised_asset_list[ + "solar_landlord_walls_insulated" + ] = self.standardised_asset_list[self.STANDARD_WALL_CONSTRUCTION].isin( + [ + "filled cavity", + "insulated solid brick", + "insulated timber frame", + "uninsulated cavity", + "insulated system built", + "insulated granite or whinstone", + "insulated sandstone or limestone", + "new build - average thermal transmittance", + ] ) if self.non_intrusives_present: @@ -1685,31 +2147,43 @@ class AssetList: ) ) elif self.old_format_non_intrusives_present: - self.standardised_asset_list["solar_non_intrusives_walls_insulated"] = ( - self.standardised_asset_list["non-intrusives: WFT Findings"].str.lower().str.strip().isin( - [ - "retro drilled", "retro filled", "ewi", "retro drilled/ solid", "retro drilled and filled", - ] - ) | - self.standardised_asset_list["non-intrusives: WFT Findings"].str.lower().str.strip().str.contains( - "retro drilled" - ) + self.standardised_asset_list[ + "solar_non_intrusives_walls_insulated" + ] = self.standardised_asset_list[ + "non-intrusives: WFT Findings" + ].str.lower().str.strip().isin( + [ + "retro drilled", + "retro filled", + "ewi", + "retro drilled/ solid", + "retro drilled and filled", + ] + ) | self.standardised_asset_list[ + "non-intrusives: WFT Findings" + ].str.lower().str.strip().str.contains( + "retro drilled" ) else: self.standardised_asset_list["solar_non_intrusives_walls_insulated"] = False self.standardised_asset_list["walls_u_value"] = self.standardised_asset_list[ self.EPC_API_DATA_NAMES["walls-description"] - ].apply(lambda x: WallAttributes(x).process()["thermal_transmittance"] if not pd.isnull(x) else None) + ].apply( + lambda x: ( + WallAttributes(x).process()["thermal_transmittance"] + if not pd.isnull(x) + else None + ) + ) self.standardised_asset_list["solar_epc_walls_insulated"] = ( - ( - self.standardised_asset_list[ - self.EPC_API_DATA_NAMES["walls-description"]].str.lower().str.contains( - "|".join(self.EPC_INSULATED_WALLS_SUBSTRINGS) - ) - ) | ( - self.standardised_asset_list["walls_u_value"].apply(lambda x: x <= 0.7 if not pd.isnull(x) else False) + self.standardised_asset_list[self.EPC_API_DATA_NAMES["walls-description"]] + .str.lower() + .str.contains("|".join(self.EPC_INSULATED_WALLS_SUBSTRINGS)) + ) | ( + self.standardised_asset_list["walls_u_value"].apply( + lambda x: x <= 0.7 if not pd.isnull(x) else False ) ) @@ -1722,7 +2196,7 @@ class AssetList: roof_data.append( { self.EPC_API_DATA_NAMES["roof-description"]: desc, - **RoofAttributes(desc).process() + **RoofAttributes(desc).process(), } ) roof_data = pd.DataFrame(roof_data) @@ -1733,31 +2207,38 @@ class AssetList: # If the u-value of a roof is less than 0.7 we consider it insulated self.standardised_asset_list["solar_epc_roof_insulated"] = ( - self.standardised_asset_list[self.EPC_API_DATA_NAMES["roof-description"]].str.lower().str.contains( + self.standardised_asset_list[self.EPC_API_DATA_NAMES["roof-description"]] + .str.lower() + .str.contains( "|".join(self.EPC_INSULATED_ROOF_SUBSTRINGS), - ) | ( - self.standardised_asset_list[self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS].apply( - lambda x: int(x) >= 200 if str(x).isdigit() else False - ) - ) | ( + ) + | ( + self.standardised_asset_list[ + self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS + ].apply(lambda x: int(x) >= 200 if str(x).isdigit() else False) + ) + | ( self.standardised_asset_list["roof_u_value"].apply( lambda x: x <= 0.7 if not pd.isnull(x) else False ) ) ) - self.standardised_asset_list["solar_epc_loft_needs_topup"] = ( - self.standardised_asset_list[ - self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS].apply( - lambda x: int(x) < 200 if str(x).isdigit() else False - ) | ( - ( - self.standardised_asset_list["is_loft"] | self.standardised_asset_list["is_pitched"] - ) & ( - self.standardised_asset_list[self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS].isin( - ["below average", "none"] - ) - ) + self.standardised_asset_list[ + "solar_epc_loft_needs_topup" + ] = self.standardised_asset_list[ + self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS + ].apply( + lambda x: int(x) < 200 if str(x).isdigit() else False + ) | ( + ( + self.standardised_asset_list["is_loft"] + | self.standardised_asset_list["is_pitched"] + ) + & ( + self.standardised_asset_list[ + self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS + ].isin(["below average", "none"]) ) ) @@ -1768,13 +2249,14 @@ class AssetList: # Check if the boiler is electric # We check if it contains both the terms boiler & electric self.standardised_asset_list["has_electric_boiler"] = ( - ( - self.standardised_asset_list[self.EPC_API_DATA_NAMES["mainheat-description"]] - .str.lower().isin( - ["boiler and radiators, electric"]) - ) | ( - self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM] == "electric boiler" - ) + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["mainheat-description"] + ] + .str.lower() + .isin(["boiler and radiators, electric"]) + ) | ( + self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM] + == "electric boiler" ) #################################### @@ -1783,14 +2265,22 @@ class AssetList: # Set up the filters to stop repetition correct_heating_system = ( - self.standardised_asset_list["solar_landlord_data_indicates_correct_heating_system"] | - self.standardised_asset_list["solar_epc_data_indicates_correct_heating_system"] | - self.standardised_asset_list["has_electric_boiler"] + self.standardised_asset_list[ + "solar_landlord_data_indicates_correct_heating_system" + ] + | self.standardised_asset_list[ + "solar_epc_data_indicates_correct_heating_system" + ] + | self.standardised_asset_list["has_electric_boiler"] ) needs_heating_upgrade = ( - self.standardised_asset_list["solar_landlord_data_indicates_needs_heating_upgrade"] | - self.standardised_asset_list["solar_epc_data_indicates_requires_heating_upgrade"] + self.standardised_asset_list[ + "solar_landlord_data_indicates_needs_heating_upgrade" + ] + | self.standardised_asset_list[ + "solar_epc_data_indicates_requires_heating_upgrade" + ] ) # The requirements for walls are: @@ -1799,13 +2289,17 @@ class AssetList: walls_meet_solar_requirements = ( # The landlord is saying the walls are insulated - self.standardised_asset_list["solar_landlord_walls_insulated"] | + self.standardised_asset_list["solar_landlord_walls_insulated"] + | # EPC data is saying the walls are insulated - self.standardised_asset_list["solar_epc_walls_insulated"] | + self.standardised_asset_list["solar_epc_walls_insulated"] + | # Non-intrusives are saying the walls are insulated - self.standardised_asset_list["solar_non_intrusives_walls_insulated"] | + self.standardised_asset_list["solar_non_intrusives_walls_insulated"] + | # It's empty cavity - self.standardised_asset_list["cavity_is_empty"] | + self.standardised_asset_list["cavity_is_empty"] + | # It's a cavity wall self.standardised_asset_list[self.STANDARD_WALL_CONSTRUCTION].isin( ["filled cavity", "partial insulated cavity"] @@ -1816,7 +2310,8 @@ class AssetList: if all(self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "unknown"): # Use EPC not_a_flat = ( - self.standardised_asset_list[self.EPC_API_DATA_NAMES["property-type"]] != "Flat" + self.standardised_asset_list[self.EPC_API_DATA_NAMES["property-type"]] + != "Flat" ) else: not_a_flat = ( @@ -1824,32 +2319,40 @@ class AssetList: ) solar_roof_meets_criteria = ( - self.standardised_asset_list["solar_epc_roof_insulated"] | - self.standardised_asset_list["solar_epc_loft_needs_topup"] + self.standardised_asset_list["solar_epc_roof_insulated"] + | self.standardised_asset_list["solar_epc_loft_needs_topup"] ) self.standardised_asset_list["solar_eligible"] = ( # Property isn't a flag - not_a_flat & + not_a_flat + & # Landlord data or EPC data indicates the heating system is appropriate - correct_heating_system & + correct_heating_system + & # The property doesn't currently have solar - ~self.standardised_asset_list["property_has_solar"] & + ~self.standardised_asset_list["property_has_solar"] + & # The walls are insulated - walls_meet_solar_requirements & + walls_meet_solar_requirements + & # Roof meets criteria solar_roof_meets_criteria ) # With heating upgrade self.standardised_asset_list["solar_eligible_needs_heating_upgrade"] = ( - not_a_flat & + not_a_flat + & # Needs heating upgrade - needs_heating_upgrade & + needs_heating_upgrade + & # The property doesn't currently have solar - ~self.standardised_asset_list["property_has_solar"] & + ~self.standardised_asset_list["property_has_solar"] + & # The walls are insulated - walls_meet_solar_requirements & + walls_meet_solar_requirements + & # Roof meets criteria solar_roof_meets_criteria ) @@ -1857,15 +2360,23 @@ class AssetList: # We check for a specific sub-set of properties which are uninsulated solid wall properties that are EPC E # or below (we'll use 57 as a threshold) - These are for a pilot with Net Zero Renewables self.standardised_asset_list["solar_eligible_solid_wall_uninsulated"] = ( - not_a_flat & + not_a_flat + & # Landlord data or EPC data indicates the heating system is appropriate - in this case, we can also take # electric boilers - correct_heating_system & + correct_heating_system + & # The property doesn't currently have solar - ~self.standardised_asset_list["property_has_solar"] & + ~self.standardised_asset_list["property_has_solar"] + & # The walls are uninsulated solid - ~walls_meet_solar_requirements & - (self.standardised_asset_list[self.EPC_API_DATA_NAMES["current-energy-efficiency"]] <= 57) + ~walls_meet_solar_requirements + & ( + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["current-energy-efficiency"] + ] + <= 57 + ) ) # Drop anything we don't need @@ -1875,100 +2386,128 @@ class AssetList: # Adjust flagged extraction jobs to remove anything for solar self.standardised_asset_list["non_intrusive_indicates_cavity_extraction"] = ( - self.standardised_asset_list["non_intrusive_indicates_cavity_extraction"] & - ~self.standardised_asset_list["solar_eligible"] + self.standardised_asset_list["non_intrusive_indicates_cavity_extraction"] + & ~self.standardised_asset_list["solar_eligible"] ) # Finally, we note why each property has been flagged self.standardised_asset_list["cavity_reason"] = None empty_cavity_map = { - "non_intrusive_indicates_empty_cavity": self.EMPTY_CAVITY_NON_INTRUSIVE + ": ", + "non_intrusive_indicates_empty_cavity": self.EMPTY_CAVITY_NON_INTRUSIVE + + ": ", "non_intrusive_indicates_empty_cavity_has_solar": f"{self.EMPTY_CAVITY_NON_INTRUSIVE} - property " - "already has solar: ", + "already has solar: ", "non_intrusive_indicates_empty_cavity_no_year_filter": f"{self.EMPTY_CAVITY_NON_INTRUSIVE}, " - f"built after {self.EMPTY_CAVITY_YEAR_THRESHOLD}: ", - + f"built after {self.EMPTY_CAVITY_YEAR_THRESHOLD}: ", } for variable, description in empty_cavity_map.items(): self.standardised_asset_list["cavity_reason"] = np.where( - self.standardised_asset_list[variable] & - pd.isnull(self.standardised_asset_list["cavity_reason"]), + self.standardised_asset_list[variable] + & pd.isnull(self.standardised_asset_list["cavity_reason"]), description + self.standardised_asset_list["SAP Category"], - self.standardised_asset_list["cavity_reason"] + self.standardised_asset_list["cavity_reason"], ) # We break the cavity reason into a few different categories, when the EPC is different from inspections if self.old_format_non_intrusives_present: self.standardised_asset_list["cavity_reason"] = np.where( ( - self.standardised_asset_list["epc_indicates_empty_cavity"] & - ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] & - (self.standardised_asset_list['non-intrusives: WFT Findings'].str.lower().str.strip().isin( - [ - "retro drilled and filled", "retro drilled", "retro filled", "retro drilled & filled", - ] - )) & - pd.isnull(self.standardised_asset_list["cavity_reason"]) + self.standardised_asset_list["epc_indicates_empty_cavity"] + & ~self.standardised_asset_list[ + "non_intrusive_indicates_empty_cavity" + ] + & ( + self.standardised_asset_list["non-intrusives: WFT Findings"] + .str.lower() + .str.strip() + .isin( + [ + "retro drilled and filled", + "retro drilled", + "retro filled", + "retro drilled & filled", + ] + ) + ) + & pd.isnull(self.standardised_asset_list["cavity_reason"]) ), - f"{self.EPC_EMPTY_INSPECTIONS_RETRO_DRILLED}: " + self.standardised_asset_list[ - "SAP Category"], - self.standardised_asset_list["cavity_reason"] + f"{self.EPC_EMPTY_INSPECTIONS_RETRO_DRILLED}: " + + self.standardised_asset_list["SAP Category"], + self.standardised_asset_list["cavity_reason"], ) self.standardised_asset_list["cavity_reason"] = np.where( ( - self.standardised_asset_list["epc_indicates_empty_cavity"] & - ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] & - self.standardised_asset_list['non_intrusive_indicates_cavity_extraction'] & - pd.isnull(self.standardised_asset_list["cavity_reason"]) + self.standardised_asset_list["epc_indicates_empty_cavity"] + & ~self.standardised_asset_list[ + "non_intrusive_indicates_empty_cavity" + ] + & self.standardised_asset_list[ + "non_intrusive_indicates_cavity_extraction" + ] + & pd.isnull(self.standardised_asset_list["cavity_reason"]) ), - f"{self.EPC_EMPTY_INSPECTIONS_FILLED}: " + self.standardised_asset_list[ - "SAP Category"], - self.standardised_asset_list["cavity_reason"] + f"{self.EPC_EMPTY_INSPECTIONS_FILLED}: " + + self.standardised_asset_list["SAP Category"], + self.standardised_asset_list["cavity_reason"], ) elif self.non_intrusives_present: self.standardised_asset_list["cavity_reason"] = np.where( ( - self.standardised_asset_list["epc_indicates_empty_cavity"] & - ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] & - (self.standardised_asset_list['non-intrusives: Insulated'] == "RETRO DRILLED") & - pd.isnull(self.standardised_asset_list["cavity_reason"]) + self.standardised_asset_list["epc_indicates_empty_cavity"] + & ~self.standardised_asset_list[ + "non_intrusive_indicates_empty_cavity" + ] + & ( + self.standardised_asset_list["non-intrusives: Insulated"] + == "RETRO DRILLED" + ) + & pd.isnull(self.standardised_asset_list["cavity_reason"]) ), - f"{self.EPC_EMPTY_INSPECTIONS_RETRO_DRILLED}: " + self.standardised_asset_list[ - "SAP Category"], - self.standardised_asset_list["cavity_reason"] + f"{self.EPC_EMPTY_INSPECTIONS_RETRO_DRILLED}: " + + self.standardised_asset_list["SAP Category"], + self.standardised_asset_list["cavity_reason"], ) self.standardised_asset_list["cavity_reason"] = np.where( ( - self.standardised_asset_list["epc_indicates_empty_cavity"] & - ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] & - (self.standardised_asset_list['non-intrusives: Insulated'] == "FILLED AT BUILD") & - pd.isnull(self.standardised_asset_list["cavity_reason"]) + self.standardised_asset_list["epc_indicates_empty_cavity"] + & ~self.standardised_asset_list[ + "non_intrusive_indicates_empty_cavity" + ] + & ( + self.standardised_asset_list["non-intrusives: Insulated"] + == "FILLED AT BUILD" + ) + & pd.isnull(self.standardised_asset_list["cavity_reason"]) ), - f"{self.EPC_EMPTY_INSPECTIONS_FILLED_AT_BUILD}: " + self.standardised_asset_list["SAP Category"], - self.standardised_asset_list["cavity_reason"] + f"{self.EPC_EMPTY_INSPECTIONS_FILLED_AT_BUILD}: " + + self.standardised_asset_list["SAP Category"], + self.standardised_asset_list["cavity_reason"], ) else: self.standardised_asset_list["cavity_reason"] = np.where( ( - self.standardised_asset_list["epc_indicates_empty_cavity"] & - ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] & - pd.isnull(self.standardised_asset_list["cavity_reason"]) + self.standardised_asset_list["epc_indicates_empty_cavity"] + & ~self.standardised_asset_list[ + "non_intrusive_indicates_empty_cavity" + ] + & pd.isnull(self.standardised_asset_list["cavity_reason"]) ), f"{self.EPC_EMPTY}: " + self.standardised_asset_list["SAP Category"], - self.standardised_asset_list["cavity_reason"] + self.standardised_asset_list["cavity_reason"], ) self.standardised_asset_list["cavity_reason"] = np.where( ( - self.standardised_asset_list["epc_indicates_empty_cavity"] & - ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] & - pd.isnull(self.standardised_asset_list["cavity_reason"]) + self.standardised_asset_list["epc_indicates_empty_cavity"] + & ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] + & pd.isnull(self.standardised_asset_list["cavity_reason"]) ), - f"{self.EPC_EMPTY_INSPECTIONS_NON_CAVITY}: " + self.standardised_asset_list["SAP Category"], - self.standardised_asset_list["cavity_reason"] + f"{self.EPC_EMPTY_INSPECTIONS_NON_CAVITY}: " + + self.standardised_asset_list["SAP Category"], + self.standardised_asset_list["cavity_reason"], ) # Work type prefixes @@ -1977,34 +2516,39 @@ class AssetList: # inspections show filled self.standardised_asset_list["cavity_reason"] = np.where( ( - self.standardised_asset_list["landlord_data_indicates_empty_cavity"] & - ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] & - ~self.standardised_asset_list["epc_indicates_empty_cavity"] & - pd.isnull(self.standardised_asset_list["cavity_reason"]) + self.standardised_asset_list["landlord_data_indicates_empty_cavity"] + & ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] + & ~self.standardised_asset_list["epc_indicates_empty_cavity"] + & pd.isnull(self.standardised_asset_list["cavity_reason"]) ), - f"{self.LANDLORD_EMPTY_INSPECTIONS_OTHER}: " + - self.standardised_asset_list["SAP Category"], - self.standardised_asset_list["cavity_reason"] + f"{self.LANDLORD_EMPTY_INSPECTIONS_OTHER}: " + + self.standardised_asset_list["SAP Category"], + self.standardised_asset_list["cavity_reason"], ) # Flag extraction self.standardised_asset_list["cavity_reason"] = np.where( ( - self.standardised_asset_list["non_intrusive_indicates_cavity_extraction"] & - pd.isnull(self.standardised_asset_list["cavity_reason"]) + self.standardised_asset_list[ + "non_intrusive_indicates_cavity_extraction" + ] + & pd.isnull(self.standardised_asset_list["cavity_reason"]) ), - f"{self.EXTRACTION_NON_INTRUSIVE}: " + self.standardised_asset_list["SAP Category"], - self.standardised_asset_list["cavity_reason"] + f"{self.EXTRACTION_NON_INTRUSIVE}: " + + self.standardised_asset_list["SAP Category"], + self.standardised_asset_list["cavity_reason"], ) self.standardised_asset_list["cavity_reason"] = np.where( ( - self.standardised_asset_list["non_intrusive_indicates_cavity_extraction_no_year_filter"] & - pd.isnull(self.standardised_asset_list["cavity_reason"]) + self.standardised_asset_list[ + "non_intrusive_indicates_cavity_extraction_no_year_filter" + ] + & pd.isnull(self.standardised_asset_list["cavity_reason"]) ), - f"{self.EXTRACTION_NON_INTRUSIVE}, built after {self.EMPTY_CAVITY_YEAR_THRESHOLD}: " + - self.standardised_asset_list["SAP Category"], - self.standardised_asset_list["cavity_reason"] + f"{self.EXTRACTION_NON_INTRUSIVE}, built after {self.EMPTY_CAVITY_YEAR_THRESHOLD}: " + + self.standardised_asset_list["SAP Category"], + self.standardised_asset_list["cavity_reason"], ) ###################################################### @@ -2017,76 +2561,82 @@ class AssetList: solar_reason_map = { "solar_eligible": f"{self.SOLAR_ELIGIBLE}: ", "solar_eligible_solid_wall_uninsulated": f"{self.SOLAR_ELIGIBLE_SOLID_WALL_UNINSULATED}: ", - "solar_eligible_needs_heating_upgrade": f"{self.SOLAR_ELIGIBLE_NEEDS_HEATING_UPGRADE}: " + "solar_eligible_needs_heating_upgrade": f"{self.SOLAR_ELIGIBLE_NEEDS_HEATING_UPGRADE}: ", } for variable, reason in solar_reason_map.items(): self.standardised_asset_list["solar_reason"] = np.where( - self.standardised_asset_list[variable] & pd.isnull(self.standardised_asset_list["solar_reason"]), + self.standardised_asset_list[variable] + & pd.isnull(self.standardised_asset_list["solar_reason"]), reason + self.standardised_asset_list["SAP Category"], - self.standardised_asset_list["solar_reason"] + self.standardised_asset_list["solar_reason"], ) # Finally, anything flagged for solar should not be flagged for cavity - make them None self.standardised_asset_list["cavity_reason"] = np.where( ( - ~pd.isnull(self.standardised_asset_list["solar_reason"]) & - ~pd.isnull(self.standardised_asset_list["cavity_reason"]) + ~pd.isnull(self.standardised_asset_list["solar_reason"]) + & ~pd.isnull(self.standardised_asset_list["cavity_reason"]) ), None, - self.standardised_asset_list["cavity_reason"] + self.standardised_asset_list["cavity_reason"], ) # Flag anything that has existing outcomes - if (self.outcomes is not None) and ("surveyed" in self.standardised_asset_list.columns): + if (self.outcomes is not None) and ( + "surveyed" in self.standardised_asset_list.columns + ): if "installer refusal" not in self.standardised_asset_list.columns: self.standardised_asset_list["cavity_reason"] = np.where( - ( - (self.standardised_asset_list["surveyed"] > 0) - ), + ((self.standardised_asset_list["surveyed"] > 0)), None, - self.standardised_asset_list["cavity_reason"] + self.standardised_asset_list["cavity_reason"], ) else: for col in ["cavity_reason", "solar_reason"]: self.standardised_asset_list[col] = np.where( ( - (self.standardised_asset_list["surveyed"] > 0) | - (self.standardised_asset_list["installer refusal"] > 0) + (self.standardised_asset_list["surveyed"] > 0) + | (self.standardised_asset_list["installer refusal"] > 0) ), None, - self.standardised_asset_list[col] + self.standardised_asset_list[col], ) if self.master_surveyed is not None: for col in ["cavity_reason", "solar_reason"]: self.standardised_asset_list[col] = np.where( - ( - (~pd.isnull(self.standardised_asset_list["submission_status"])) - ), + ((~pd.isnull(self.standardised_asset_list["submission_status"]))), None, - self.standardised_asset_list[col] + self.standardised_asset_list[col], ) - if self.ecosurv is not None and "ecosurv_install_status" in self.standardised_asset_list.columns: + if ( + self.ecosurv is not None + and "ecosurv_install_status" in self.standardised_asset_list.columns + ): # If we didn't match anything to ecosurv, the ecosurv_install_status won't exist for col in ["cavity_reason", "solar_reason"]: self.standardised_asset_list[col] = np.where( ( - (~pd.isnull(self.standardised_asset_list["ecosurv_install_status"])) + ( + ~pd.isnull( + self.standardised_asset_list["ecosurv_install_status"] + ) + ) ), None, - self.standardised_asset_list[col] + self.standardised_asset_list[col], ) # We prepare outcomes for output if self.outcomes is not None: logger.info("Preparing outcomes for output") identified_work = self.standardised_asset_list[ - ~pd.isnull(self.standardised_asset_list["cavity_reason"]) | - ~pd.isnull(self.standardised_asset_list["solar_reason"]) - ][self.DOMNA_PROPERTY_ID].values + ~pd.isnull(self.standardised_asset_list["cavity_reason"]) + | ~pd.isnull(self.standardised_asset_list["solar_reason"]) + ][self.DOMNA_PROPERTY_ID].values if self.DOMNA_PROPERTY_ID in self.outcomes.columns: self.outcomes_for_output = self.outcomes[ @@ -2096,37 +2646,49 @@ class AssetList: # Finally, direct operations feedback has suggested that if a property is a flat that has a SAP rating of # 76 or above, we should exclude it because it's likely not going to be eligible for anyting self.standardised_asset_list["cavity_reason"] = np.where( - (self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "flat") & - (self.standardised_asset_list["SAP Category"] == "SAP Rating 76 or more"), + (self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "flat") + & (self.standardised_asset_list["SAP Category"] == "SAP Rating 76 or more"), self.standardised_asset_list["cavity_reason"] + " - (unlikely to quality)", - self.standardised_asset_list["cavity_reason"] + self.standardised_asset_list["cavity_reason"], ) # Split cavity_reason on the colon and check if the first part is equal to one of the two options above # that indicates empties self.standardised_asset_list["identified_empty_cavity"] = ( - self.standardised_asset_list["cavity_reason"].str.split(":").str[0].isin( - [self.EMPTY_CAVITY_NON_INTRUSIVE, self.EMPTY_CAVITY_NON_INTRUSIVE_YEAR, self.EPC_EMPTY] + self.standardised_asset_list["cavity_reason"] + .str.split(":") + .str[0] + .isin( + [ + self.EMPTY_CAVITY_NON_INTRUSIVE, + self.EMPTY_CAVITY_NON_INTRUSIVE_YEAR, + self.EPC_EMPTY, + ] ) ) def get_work_figures(self): blocks_of_flats = self.standardised_asset_list[ - self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "block of flats" - ] + self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] + == "block of flats" + ] non_blocks_of_flats = self.standardised_asset_list[ - self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] != "block of flats" - ] + self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] + != "block of flats" + ] # Produce some aggregate figures self.work_type_figures = { **non_blocks_of_flats["cavity_reason"].value_counts().to_dict(), **{ - k + " (Block of flats)": v for k, v in - blocks_of_flats["solar_reason"].value_counts().to_dict().items() + k + " (Block of flats)": v + for k, v in blocks_of_flats["solar_reason"] + .value_counts() + .to_dict() + .items() }, - **self.standardised_asset_list["solar_reason"].value_counts().to_dict() + **self.standardised_asset_list["solar_reason"].value_counts().to_dict(), } pprint(self.work_type_figures) @@ -2136,12 +2698,15 @@ class AssetList: # If we have blocks of flats, we fill the landlord_block_reference field with address 1 + postcode self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE] = np.where( - (self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "block of flats") & ( - pd.isnull(self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE]) - ), - self.standardised_asset_list[self.STANDARD_ADDRESS_1] + " " + - self.standardised_asset_list[self.STANDARD_POSTCODE], - self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE] + ( + self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] + == "block of flats" + ) + & (pd.isnull(self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE])), + self.standardised_asset_list[self.STANDARD_ADDRESS_1] + + " " + + self.standardised_asset_list[self.STANDARD_POSTCODE], + self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE], ) def split_blocks(self): @@ -2152,16 +2717,21 @@ class AssetList: """ blocks = self.standardised_asset_list[ - self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "block of flats" - ].copy() + self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] + == "block of flats" + ].copy() if blocks.empty: return - RANGE_RE = re.compile(r'\b(\d+[A-Za-z]?)\s*[-–]\s*(\d+[A-Za-z]?)\b') - NUM_RE = re.compile(r'\b\d+[A-Za-z]?\b') # captures 12, 12A, etc. - TO_RANGE_RE = re.compile(r'\b(\d+[A-Za-z]?)\s+(?:to|To|TO)\s+(\d+[A-Za-z]?)\b') # captures "13 to 15" - LETTER_RANGE_RE = re.compile(r'\b(\d+)([A-Za-z]?)\s*[-–]\s*(\d+)([A-Za-z]?)\b') # captures "1A-3B" + RANGE_RE = re.compile(r"\b(\d+[A-Za-z]?)\s*[-–]\s*(\d+[A-Za-z]?)\b") + NUM_RE = re.compile(r"\b\d+[A-Za-z]?\b") # captures 12, 12A, etc. + TO_RANGE_RE = re.compile( + r"\b(\d+[A-Za-z]?)\s+(?:to|To|TO)\s+(\d+[A-Za-z]?)\b" + ) # captures "13 to 15" + LETTER_RANGE_RE = re.compile( + r"\b(\d+)([A-Za-z]?)\s*[-–]\s*(\d+)([A-Za-z]?)\b" + ) # captures "1A-3B" expanded_rows = [] @@ -2172,16 +2742,16 @@ class AssetList: # We also look for terms like "Odd", "even", "all" in the address to indicate if it should be just # the odds, evens or all of the numbers has_odd = ( - "(odd)" in addr.lower() or - "(odd)" in full_addr.lower() or - "(odds)" in addr.lower() or - "(odds)" in full_addr.lower() + "(odd)" in addr.lower() + or "(odd)" in full_addr.lower() + or "(odds)" in addr.lower() + or "(odds)" in full_addr.lower() ) has_even = ( - "(even)" in addr.lower() or - "(even)" in full_addr.lower() or - "(evens)" in addr.lower() or - "(evens)" in full_addr.lower() + "(even)" in addr.lower() + or "(even)" in full_addr.lower() + or "(evens)" in addr.lower() + or "(evens)" in full_addr.lower() ) # 1 ─ Range (e.g. 1-7) @@ -2190,7 +2760,9 @@ class AssetList: if m_range or to_range: start, end = m_range.groups() if m_range else to_range.groups() - start, end = int(re.match(r'\d+', start)[0]), int(re.match(r'\d+', end)[0]) + start, end = int(re.match(r"\d+", start)[0]), int( + re.match(r"\d+", end)[0] + ) if start > end or (end - start) > 200: raise ValueError(f"Suspicious range '{addr}'") @@ -2217,18 +2789,26 @@ class AssetList: new["is_expended_block"] = True # We update the full address - new[self.DOMNA_PROPERTY_ID] = f"{row[self.DOMNA_PROPERTY_ID]}-{new_addr}" + new[self.DOMNA_PROPERTY_ID] = ( + f"{row[self.DOMNA_PROPERTY_ID]}-{new_addr}" + ) expanded_rows.append(new.to_dict()) continue # 2 ─ Explicit list (e.g. 1, 2, 5 Block) or split by an ampersand (e.g. 1 & 2 Block) nums = NUM_RE.findall(addr) - if len(nums) > 1 and (',' in addr or '&' in addr or ' and ' in addr.lower()): + if len(nums) > 1 and ( + "," in addr or "&" in addr or " and " in addr.lower() + ): for n in nums: new = row.copy() - new_addr = re.sub(NUM_RE, n, addr, count=1) # replace the first number only + new_addr = re.sub( + NUM_RE, n, addr, count=1 + ) # replace the first number only new[self.STANDARD_ADDRESS_1] = new_addr - new[self.DOMNA_PROPERTY_ID] = f"{row[self.DOMNA_PROPERTY_ID]}-{new_addr}" + new[self.DOMNA_PROPERTY_ID] = ( + f"{row[self.DOMNA_PROPERTY_ID]}-{new_addr}" + ) expanded_rows.append(new.to_dict()) continue @@ -2252,7 +2832,9 @@ class AssetList: new = row.copy() new_addr = f"{n}{chr(letter)}" new[self.STANDARD_ADDRESS_1] = new_addr - new[self.DOMNA_PROPERTY_ID] = f"{row[self.DOMNA_PROPERTY_ID]}-{new_addr}" + new[self.DOMNA_PROPERTY_ID] = ( + f"{row[self.DOMNA_PROPERTY_ID]}-{new_addr}" + ) expanded_rows.append(new.to_dict()) continue @@ -2272,18 +2854,19 @@ class AssetList: # We drop the blocks from the standardised asset list and append on the expanded blocks self.standardised_asset_list = self.standardised_asset_list[ - self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] != "block of flats" - ] + self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] + != "block of flats" + ] self.standardised_asset_list = pd.concat( - [self.standardised_asset_list, expanded_blocks], - ignore_index=True + [self.standardised_asset_list, expanded_blocks], ignore_index=True ) # As a final clean up, for any blocks that are size 1, we don't includr a project code sizes = ( - expanded_blocks - .groupby(self.STANDARD_BLOCK_REFERENCE)[self.DOMNA_PROPERTY_ID] + expanded_blocks.groupby(self.STANDARD_BLOCK_REFERENCE)[ + self.DOMNA_PROPERTY_ID + ] .nunique() .reset_index() ) @@ -2294,7 +2877,7 @@ class AssetList: size_1[self.STANDARD_BLOCK_REFERENCE].values ), None, - self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE] + self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE], ) def label_property_status(self): @@ -2307,10 +2890,10 @@ class AssetList: # For anything that is ready to go, that gets set to ready to be scheduled self.standardised_asset_list["hubspot_status"] = np.where( - ~pd.isnull(self.standardised_asset_list["cavity_reason"]) | - ~pd.isnull(self.standardised_asset_list["solar_reason"]), + ~pd.isnull(self.standardised_asset_list["cavity_reason"]) + | ~pd.isnull(self.standardised_asset_list["solar_reason"]), hubspot_config.HubspotProcessStatus.READY_TO_BE_SCHEDULED.label, - None + None, ) # we step through the process of flagging completed surveys @@ -2321,43 +2904,56 @@ class AssetList: def get_max_status_from_columns(row): status_candidates = [] - for col in ["submission_status", "ecosurv_install_status", "outcome_status"]: + for col in [ + "submission_status", + "ecosurv_install_status", + "outcome_status", + ]: label = row.get(col) if label in label_to_enum: status_candidates.append(label_to_enum[label]) if not status_candidates: - return row["hubspot_status"] # fallback to existing status if no updates + return row[ + "hubspot_status" + ] # fallback to existing status if no updates return max(status_candidates).label - self.standardised_asset_list["hubspot_status"] = self.standardised_asset_list.apply( - get_max_status_from_columns, axis=1 + self.standardised_asset_list["hubspot_status"] = ( + self.standardised_asset_list.apply(get_max_status_from_columns, axis=1) ) self.standardised_asset_list["project_code"] = None # if we have any blocks, where work is eligible, we flag them now # These blocks may be refecence via the landlord_block_reference field, or by property types being # blocks of flats - has_landlord_block_reference = sum(~pd.isnull(self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE])) + has_landlord_block_reference = sum( + ~pd.isnull(self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE]) + ) if has_landlord_block_reference: # For blocks that have a 50% allocation, we create project codes self.block_analysis() # find any block refs with more than 50% emptires viable_empty_blocks = self.block_analysis_df[ - self.block_analysis_df['Percentage of Empties'] >= 0.50 - ] + self.block_analysis_df["Percentage of Empties"] >= 0.50 + ] if not viable_empty_blocks.empty: project_code_lookup = viable_empty_blocks[["Block Reference"]].copy() self.standardised_asset_list = self.standardised_asset_list.merge( - project_code_lookup, how="left", left_on=self.STANDARD_BLOCK_REFERENCE, right_on="Block Reference" + project_code_lookup, + how="left", + left_on=self.STANDARD_BLOCK_REFERENCE, + right_on="Block Reference", ) self.standardised_asset_list["project_code"] = np.where( ~pd.isnull(self.standardised_asset_list["Block Reference"]), self.standardised_asset_list["Block Reference"], - self.standardised_asset_list["project_code"] + self.standardised_asset_list["project_code"], + ) + self.standardised_asset_list = self.standardised_asset_list.drop( + columns=["Block Reference"] ) - self.standardised_asset_list = self.standardised_asset_list.drop(columns=["Block Reference"]) def analyse_geographies(self): cavity_programme = ( @@ -2379,13 +2975,15 @@ class AssetList: .reset_index() .rename(columns={"landlord_property_id": "n_properties"}) ) - geographical_areas = postcodes.merge(cavity_programme, how="left", on="domna_postcode").merge( - solar_programme, how="left", on="domna_postcode" - ).fillna(0) + geographical_areas = ( + postcodes.merge(cavity_programme, how="left", on="domna_postcode") + .merge(solar_programme, how="left", on="domna_postcode") + .fillna(0) + ) geographical_areas["coverage"] = ( - ( - geographical_areas["solar_reason"] + geographical_areas["cavity_reason"] - ) / geographical_areas["n_properties"] * 100 + (geographical_areas["solar_reason"] + geographical_areas["cavity_reason"]) + / geographical_areas["n_properties"] + * 100 ) geographical_areas = geographical_areas.sort_values("coverage", ascending=False) @@ -2397,34 +2995,55 @@ class AssetList: LABEL_TO_ENUM = {e.label: e for e in hubspot_config.HubspotProcessStatus} # Threshold status - anything that is at this stage or beyond is considered surveyed - threshold = hubspot_config.HubspotProcessStatus.SURVEYED_COMPLETED_SIGNED_OFF.value + threshold = ( + hubspot_config.HubspotProcessStatus.SURVEYED_COMPLETED_SIGNED_OFF.value + ) block_analysis = [] - for block_reference, group in self.standardised_asset_list.groupby(self.STANDARD_BLOCK_REFERENCE): + for block_reference, group in self.standardised_asset_list.groupby( + self.STANDARD_BLOCK_REFERENCE + ): - cavity_breakdown = group["cavity_reason"].fillna("No Eligibility").value_counts(normalize=True) * 100 + cavity_breakdown = ( + group["cavity_reason"] + .fillna("No Eligibility") + .value_counts(normalize=True) + * 100 + ) if all(cavity_breakdown.index == "No Eligibility"): continue # We check the % of empty vs not empty as right now, we're focused on empty n_empties = ( - (group["identified_empty_cavity"] == True) & - (~pd.isnull(group["cavity_reason"])) & - (~group["cavity_reason"].str.contains("(unlikely to quality)", case=False, na=False, regex=False)) + (group["identified_empty_cavity"] == True) + & (~pd.isnull(group["cavity_reason"])) + & ( + ~group["cavity_reason"].str.contains( + "(unlikely to quality)", case=False, na=False, regex=False + ) + ) ).sum() n_empties_high_confidence = ( - (group["identified_empty_cavity"] == True) & - (~group["SAP Category"].isin(["SAP Rating 69-75", "SAP Rating 76 or more"])) & - (~pd.isnull(group["cavity_reason"])) & - (~group["cavity_reason"].str.contains("(unlikely to quality)", case=False, na=False, regex=False)) + (group["identified_empty_cavity"] == True) + & ( + ~group["SAP Category"].isin( + ["SAP Rating 69-75", "SAP Rating 76 or more"] + ) + ) + & (~pd.isnull(group["cavity_reason"])) + & ( + ~group["cavity_reason"].str.contains( + "(unlikely to quality)", case=False, na=False, regex=False + ) + ) ).sum() # Average age of the EPCs group["time_since_epc"] = ( - pd.to_datetime("now") - pd.to_datetime( - group[self.EPC_API_DATA_NAMES["inspection-date"]]) + pd.to_datetime("now") + - pd.to_datetime(group[self.EPC_API_DATA_NAMES["inspection-date"]]) ).dt.days average_age_of_epc = group["time_since_epc"].mean() @@ -2456,21 +3075,26 @@ class AssetList: block_analysis["Eligible for Works"] = ( block_analysis["Percentage of Empties"] >= 0.50 ) - block_analysis = block_analysis.sort_values("Percentage of Empties", ascending=False) + block_analysis = block_analysis.sort_values( + "Percentage of Empties", ascending=False + ) # For properties that are NOT eligible, we should update the cavity reason - ineligible_blocks = block_analysis[ - ~block_analysis["Eligible for Works"] - ]["Block Reference"].values + ineligible_blocks = block_analysis[~block_analysis["Eligible for Works"]][ + "Block Reference" + ].values - eligible_blocks = block_analysis[ - block_analysis["Eligible for Works"] - ]["Block Reference"].values + eligible_blocks = block_analysis[block_analysis["Eligible for Works"]][ + "Block Reference" + ].values self.standardised_asset_list["cavity_reason"] = np.where( - self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE].isin(ineligible_blocks), - self.standardised_asset_list["cavity_reason"] + " (Flat in block with less than 50% eligible)", + self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE].isin( + ineligible_blocks + ), self.standardised_asset_list["cavity_reason"] + + " (Flat in block with less than 50% eligible)", + self.standardised_asset_list["cavity_reason"], ) # if the property is in a block of flats that eligible, but the property itself is not eligible, we flag this @@ -2478,10 +3102,13 @@ class AssetList: # =The property should be in a block of flats self.standardised_asset_list["cavity_reason"] = np.where( - self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE].isin(eligible_blocks), - self.standardised_asset_list["cavity_reason"] - + " " + "(Flat in block with more than 50% eligible)", + self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE].isin( + eligible_blocks + ), self.standardised_asset_list["cavity_reason"] + + " " + + "(Flat in block with more than 50% eligible)", + self.standardised_asset_list["cavity_reason"], ) self.block_analysis_df = block_analysis @@ -2513,7 +3140,7 @@ class AssetList: email_column=None, fullname_column=None, firstname_column=None, - lastname_column=None + lastname_column=None, ): self.contact_detail_fields = { @@ -2524,12 +3151,16 @@ class AssetList: "email": email_column, "fullname": fullname_column, "firstname": firstname_column, - "lastname": lastname_column + "lastname": lastname_column, } details_colnames = [ - phone_number_column, secondary_phone_number_column, email_column, fullname_column, firstname_column, - lastname_column + phone_number_column, + secondary_phone_number_column, + email_column, + fullname_column, + firstname_column, + lastname_column, ] # We'll fill them none_details = [x for x in details_colnames if x is None] @@ -2537,23 +3168,29 @@ class AssetList: if local_filepath is None: # Create an empty DataFrame based on the fields in self.contact_detail_fields - self.contact_details = pd.DataFrame(columns=list(self.contact_detail_fields.keys())) + self.contact_details = pd.DataFrame( + columns=list(self.contact_detail_fields.keys()) + ) return - contact_details = pd.read_excel( - local_filepath, sheet_name=sheet_name - )[[self.contact_detail_fields["landlord_property_id"]] + details_colnames] + contact_details = pd.read_excel(local_filepath, sheet_name=sheet_name)[ + [self.contact_detail_fields["landlord_property_id"]] + details_colnames + ] contact_details = contact_details[ - ~pd.isnull(contact_details[self.contact_detail_fields["landlord_property_id"]]) + ~pd.isnull( + contact_details[self.contact_detail_fields["landlord_property_id"]] + ) ] # Fill anything we don't have for detail in none_details: contact_details[detail] = None if fullname_column and not (firstname_column and lastname_column): - contact_details["title"], contact_details["first_name"], contact_details["last_name"] = zip( - *contact_details[fullname_column].apply(self.split_full_name) - ) + ( + contact_details["title"], + contact_details["first_name"], + contact_details["last_name"], + ) = zip(*contact_details[fullname_column].apply(self.split_full_name)) else: contact_details["title"] = None @@ -2588,11 +3225,13 @@ class AssetList: landlord_sap=cls.STANDARD_SAP, landlord_block_reference=cls.STANDARD_BLOCK_REFERENCE, phase=False, - header=header + header=header, ) return instance - def prepare_for_crm(self, company_domain, installer_name, reconcile_programme=False): + def prepare_for_crm( + self, company_domain, installer_name, reconcile_programme=False + ): """ This function prepares the data for upload into Hubspot :param company_domain: The company domain name to be used in the CRM @@ -2603,10 +3242,14 @@ class AssetList: """ # This maps the opportunities as we reference them, to the product data as stored in Hubspot if not hubspot_config.Installer.is_valid_value(installer_name): - raise ValueError(f"Installer name {installer_name} is not valid. Please check the installer name.") + raise ValueError( + f"Installer name {installer_name} is not valid. Please check the installer name." + ) # We check if all products are covered in the lookup table - cavity_products = self.standardised_asset_list["cavity_reason"].unique().tolist() + cavity_products = ( + self.standardised_asset_list["cavity_reason"].unique().tolist() + ) cavity_products = [x for x in cavity_products if not pd.isnull(x)] solar_products = self.standardised_asset_list["solar_reason"].unique().tolist() solar_products = [x for x in solar_products if not pd.isnull(x)] @@ -2627,20 +3270,25 @@ class AssetList: programme_data = self.standardised_asset_list.copy() programme_data["domna_full_address"] = ( - programme_data["domna_full_address"].str.replace(";", ", ", regex=False).str.replace(" ", "") + programme_data["domna_full_address"] + .str.replace(";", ", ", regex=False) + .str.replace(" ", "") ) # Format the two date columns - programme_data["survey_date"] = pd.to_datetime(programme_data["survey_date"], errors="coerce") + programme_data["survey_date"] = pd.to_datetime( + programme_data["survey_date"], errors="coerce" + ) programme_data[self.EPC_API_DATA_NAMES["inspection-date"]] = pd.to_datetime( - programme_data[self.EPC_API_DATA_NAMES["inspection-date"]], - errors="coerce" + programme_data[self.EPC_API_DATA_NAMES["inspection-date"]], errors="coerce" ) # Convert to dd/mm/yyyy format - programme_data["survey_date"] = programme_data["survey_date"].dt.strftime("%d/%m/%Y") - programme_data[self.EPC_API_DATA_NAMES["inspection-date"]] = ( - programme_data[self.EPC_API_DATA_NAMES["inspection-date"]].dt.strftime("%d/%m/%Y") + programme_data["survey_date"] = programme_data["survey_date"].dt.strftime( + "%d/%m/%Y" ) + programme_data[self.EPC_API_DATA_NAMES["inspection-date"]] = programme_data[ + self.EPC_API_DATA_NAMES["inspection-date"] + ].dt.strftime("%d/%m/%Y") # We take rows that have a survyor and a date for the survey # We include properties under 2 circumstances: @@ -2653,12 +3301,13 @@ class AssetList: else: if programme_data["hubspot_status"].nunique() > 1: - logger.info("Multiple hubspot_status found - are you sure you don't want to reconcile the programme?") + logger.info( + "Multiple hubspot_status found - are you sure you don't want to reconcile the programme?" + ) ready_to_be_scheduled = ( - ( - programme_data["hubspot_status"] == hubspot_config.HubspotProcessStatus.READY_TO_BE_SCHEDULED.label - ) + programme_data["hubspot_status"] + == hubspot_config.HubspotProcessStatus.READY_TO_BE_SCHEDULED.label ) # completed_works = ( # (programme_data["hubspot_status"] != @@ -2685,8 +3334,14 @@ class AssetList: ) # We check if we have any missings - cavity_missing = pd.isnull(programme_data[~pd.isnull(programme_data["cavity_reason"])]["cavity_product"]).sum() - solar_missing = pd.isnull(programme_data[~pd.isnull(programme_data["solar_reason"])]["solar_product"]).sum() + cavity_missing = pd.isnull( + programme_data[~pd.isnull(programme_data["cavity_reason"])][ + "cavity_product" + ] + ).sum() + solar_missing = pd.isnull( + programme_data[~pd.isnull(programme_data["solar_reason"])]["solar_product"] + ).sum() if cavity_missing > 0 or solar_missing > 0: raise ValueError( @@ -2698,7 +3353,7 @@ class AssetList: programme_data["domna_product"] = np.where( pd.isnull(programme_data["domna_product"]), programme_data["cavity_product"], - programme_data["domna_product"] + programme_data["domna_product"], ) # We filter just on rows where we have a product if reconcile_programme: @@ -2715,33 +3370,41 @@ class AssetList: if pd.isnull(programme_data["domna_product"]).sum(): raise ValueError("Missing products") - programme_data = programme_data.drop(columns=["solar_product", "cavity_product"]) + programme_data = programme_data.drop( + columns=["solar_product", "cavity_product"] + ) product_df = ( - pd.DataFrame(self.CRM_PRODUCTS).T[["name", "id", "unit_price"]] + pd.DataFrame(self.CRM_PRODUCTS) + .T[["name", "id", "unit_price"]] .reset_index() .rename( columns={ "name": "Name ", - "id": 'Product ID ', - "unit_price": 'Unit price ', - "index": "domna_product" + "id": "Product ID ", + "unit_price": "Unit price ", + "index": "domna_product", } ) ) - product_df['Quantity '] = 1 + product_df["Quantity "] = 1 # Append on the product data - programme_data = programme_data.merge(product_df, how="left", on="domna_product") + programme_data = programme_data.merge( + product_df, how="left", on="domna_product" + ) # Add in deal and pipeline information programme_data["dealname"] = ( - programme_data[self.STANDARD_FULL_ADDRESS] + ", " + - programme_data[self.STANDARD_POSTCODE] + " : " + programme_data["domna_product"] + programme_data[self.STANDARD_FULL_ADDRESS] + + ", " + + programme_data[self.STANDARD_POSTCODE] + + " : " + + programme_data["domna_product"] ) - programme_data['Pipeline '] = hubspot_config.CRM_PIPELINE_NAME - programme_data['Associations: Listing'] = "Property Owner" + programme_data["Pipeline "] = hubspot_config.CRM_PIPELINE_NAME + programme_data["Associations: Listing"] = "Property Owner" # We determine which column we should use for the UPRN if self.STANDARD_UPRN not in programme_data.columns: @@ -2761,20 +3424,25 @@ class AssetList: programme_data[uprn_column] = np.where( programme_data["estimated"].isin([1, True]), None, - programme_data[uprn_column] + programme_data[uprn_column], ) # Add in some columns if we have them date_of_inspections = ( - "Non-Intrusives: Date of Inspection" if - "Non-Intrusives: Date of Inspection" in programme_data.columns else None + "Non-Intrusives: Date of Inspection" + if "Non-Intrusives: Date of Inspection" in programme_data.columns + else None ) # Ammend the property type and built form columns - programme_data["hubspot_property_type"] = programme_data[self.STANDARD_PROPERTY_TYPE].copy() + programme_data["hubspot_property_type"] = programme_data[ + self.STANDARD_PROPERTY_TYPE + ].copy() # We don't already have this if self.STANDARD_BUILT_FORM in programme_data.columns: - programme_data["hubspot_built_form"] = programme_data[self.STANDARD_BUILT_FORM].copy() + programme_data["hubspot_built_form"] = programme_data[ + self.STANDARD_BUILT_FORM + ].copy() else: programme_data["hubspot_built_form"] = None @@ -2787,23 +3455,30 @@ class AssetList: valid_values = ["house", "bungalow", "flat", "maisonette"] epc_fill_col = "property-type" elif column_name == "hubspot_built_form": - valid_values = ["detached", "semi-detached", "mid-terrace", "end-terrace"] + valid_values = [ + "detached", + "semi-detached", + "mid-terrace", + "end-terrace", + ] epc_fill_col = "built-form" else: - raise ValueError(f"Invalid column name: {column_name}. Must be 'hubspot_property_type' or " - f"'hubspot_built_form'.") + raise ValueError( + f"Invalid column name: {column_name}. Must be 'hubspot_property_type' or " + f"'hubspot_built_form'." + ) # Any vakue that is not house, bungalow, flat or maisonette is set to None programme_data[column_name] = np.where( ~programme_data[column_name].isin(valid_values), None, - programme_data[column_name] + programme_data[column_name], ) # We fill with the EPC property type programme_data[column_name] = np.where( pd.isnull(programme_data[column_name]), programme_data[self.EPC_API_DATA_NAMES[epc_fill_col]], - programme_data[column_name] + programme_data[column_name], ) programme_data[column_name] = programme_data[column_name].fillna("unknown") @@ -2811,8 +3486,12 @@ class AssetList: return programme_data # Clean up the property type and built form columns - programme_data = _replace_property_description_data(programme_data, "hubspot_property_type") - programme_data = _replace_property_description_data(programme_data, "hubspot_built_form") + programme_data = _replace_property_description_data( + programme_data, "hubspot_property_type" + ) + programme_data = _replace_property_description_data( + programme_data, "hubspot_built_form" + ) # We accomodate the old vs new inspections format if "non-intrusives: WFT Findings" in programme_data.columns: @@ -2826,97 +3505,133 @@ class AssetList: non_intrusives_roof_orientation = None non_intrusives_surveyor_name = None else: - non_intrusives_surveyor_notes = 'non-intrusives: Any further surveyor notes' + non_intrusives_surveyor_notes = "non-intrusives: Any further surveyor notes" non_intrusives_construction = "non-intrusives: Construction" non_intrusives_insulated = "non-intrusives: Insulated" non_intrusives_insulation_material = "non-intrusives: Material" - non_intrusives_ciga_check_required = 'non-intrusives: CIGA Check Required' - non_intrusives_pv_access = 'non-intrusives: PV, ACCESS ISSUE, SEE NOTES' - non_intrusives_roof_orientation = 'non-intrusives: OFF GAS - ROOF ORIENTATION' - non_intrusives_surveyor_name = 'non-intrusives: Surveyors Name' + non_intrusives_ciga_check_required = "non-intrusives: CIGA Check Required" + non_intrusives_pv_access = "non-intrusives: PV, ACCESS ISSUE, SEE NOTES" + non_intrusives_roof_orientation = ( + "non-intrusives: OFF GAS - ROOF ORIENTATION" + ) + non_intrusives_surveyor_name = "non-intrusives: Surveyors Name" # This maps the hubspot schema to the template. Anything that is not covered in this will be flagged schema_mappings = { - 'Company Domain Name ': 'Company Domain Name ', - 'Email ': ( - self.contact_detail_fields["email"] if self.contact_detail_fields["email"] else None + "Company Domain Name ": "Company Domain Name ", + "Email ": ( + self.contact_detail_fields["email"] + if self.contact_detail_fields["email"] + else None ), # TODO: Review - 'First Name ': ( - self.contact_detail_fields["firstname"] if self.contact_detail_fields["firstname"] else None + "First Name ": ( + self.contact_detail_fields["firstname"] + if self.contact_detail_fields["firstname"] + else None ), # TODO: Review - 'Last Name ': ( - self.contact_detail_fields["lastname"] if self.contact_detail_fields["lastname"] else None + "Last Name ": ( + self.contact_detail_fields["lastname"] + if self.contact_detail_fields["lastname"] + else None ), # TODO: Review - 'Phone ': ( - self.contact_detail_fields["phone_number"] if self.contact_detail_fields["phone_number"] else None + "Phone ": ( + self.contact_detail_fields["phone_number"] + if self.contact_detail_fields["phone_number"] + else None ), # TODO: Review - 'Secondary Phone ': ( - self.contact_detail_fields["secondary_phone_number"] if - self.contact_detail_fields["secondary_phone_number"] else None + "Secondary Phone ": ( + self.contact_detail_fields["secondary_phone_number"] + if self.contact_detail_fields["secondary_phone_number"] + else None ), "Secondary Contact Full Name ": ( - self.contact_detail_fields["secondary_contact_full_name"] if - self.contact_detail_fields["secondary_contact_full_name"] else None + self.contact_detail_fields["secondary_contact_full_name"] + if self.contact_detail_fields["secondary_contact_full_name"] + else None ), - 'Full Address ': self.STANDARD_FULL_ADDRESS, - 'Address 1 ': self.STANDARD_ADDRESS_1, - 'Address 2 ': None, # TODO: Don't have this for the moment - 'Postcode ': self.STANDARD_POSTCODE, - 'Property Type ': "hubspot_property_type", - 'Property Sub Type ': "hubspot_built_form", - 'Bedroom(s) ': None, # TODO: Don't have this for the moment - 'Domna Property ID ': self.DOMNA_PROPERTY_ID, + "Full Address ": self.STANDARD_FULL_ADDRESS, + "Address 1 ": self.STANDARD_ADDRESS_1, + "Address 2 ": None, # TODO: Don't have this for the moment + "Postcode ": self.STANDARD_POSTCODE, + "Property Type ": "hubspot_property_type", + "Property Sub Type ": "hubspot_built_form", + "Bedroom(s) ": None, # TODO: Don't have this for the moment + "Domna Property ID ": self.DOMNA_PROPERTY_ID, # We populate this with the column that we have - 'National UPRN ': uprn_column, - 'Owner Property ID ': self.STANDARD_LANDLORD_PROPERTY_ID, - 'Wall Construction ': self.STANDARD_WALL_CONSTRUCTION, - 'Heating System ': self.STANDARD_HEATING_SYSTEM, - 'Year Built ': self.STANDARD_YEAR_BUILT, - 'Boiler Make ': None, # TODO: Don't have this for the moment - 'Boiler Model ': None, # TODO: Don't have this for the moment - 'Non-Intrusives: Date Checked ': date_of_inspections, - 'Non-Intrusives: Wall Type ': non_intrusives_construction, - 'Non-intrusives: Insulation ': non_intrusives_insulated, - 'Non-intrusives: Insulation Material ': - non_intrusives_insulation_material, - 'Non-Intrusives: CIGA Check Required ': - non_intrusives_ciga_check_required, - 'Non-Intrusives: PV Access Issues ': non_intrusives_pv_access, - 'Non-Intrusives: Roof Orientation ': - non_intrusives_roof_orientation, - 'Non-Intrusives: Surveyor Notes ': non_intrusives_surveyor_notes, - 'Non-Intrusives: Surveyor Name ': non_intrusives_surveyor_name, - 'CIGA: Date Requested ': None, # TODO: Don't have this for the moment - 'CIGA: Cavity Guarantee Found ': None, - 'Last EPC: Is Estimated ': self.EPC_API_DATA_NAMES["estimated"], - 'Last EPC: EPC Rating ': self.EPC_API_DATA_NAMES["current-energy-rating"], - 'Last EPC: SAP Rating ': self.EPC_API_DATA_NAMES["current-energy-efficiency"], - 'Last EPC: Main Heating Description ': self.EPC_API_DATA_NAMES[ - "mainheat-description"], - 'Last EPC: Heating Controls ': self.EPC_API_DATA_NAMES[ - "mainheatcont-description"], - 'Last EPC: Lodgement Date ': self.EPC_API_DATA_NAMES["inspection-date"], - 'Last EPC: Floor Area ': self.EPC_API_DATA_NAMES["total-floor-area"], - 'Last EPC: Wall ': self.EPC_API_DATA_NAMES["walls-description"], - 'Last EPC: Roof ': self.EPC_API_DATA_NAMES["roof-description"], - 'Last EPC: Floor ': self.EPC_API_DATA_NAMES["floor-description"], - 'Last EPC: Room Height ': self.EPC_API_DATA_NAMES["floor-height"], - 'Last EPC: Age Band ': self.EPC_API_DATA_NAMES["construction-age-band"], - 'Pipeline ': 'Pipeline ', - 'Expected Commencement Date ': "survey_date", - 'Deal Name ': "dealname", # Need to create this, - 'Product ID ': 'Product ID ', - 'Name ': 'Name ', - 'Unit price ': 'Unit price ', - 'Quantity ': 'Quantity ', - 'Deal Owner': 'surveyor', - 'Project Code ': 'project_code', - 'Associations: Listing': 'Associations: Listing', - 'Deal Stage ': "hubspot_status", + "National UPRN ": uprn_column, + "Owner Property ID ": self.STANDARD_LANDLORD_PROPERTY_ID, + "Wall Construction ": self.STANDARD_WALL_CONSTRUCTION, + "Heating System ": self.STANDARD_HEATING_SYSTEM, + "Year Built ": self.STANDARD_YEAR_BUILT, + "Boiler Make ": None, # TODO: Don't have this for the moment + "Boiler Model ": None, # TODO: Don't have this for the moment + "Non-Intrusives: Date Checked ": date_of_inspections, + "Non-Intrusives: Wall Type ": non_intrusives_construction, + "Non-intrusives: Insulation ": non_intrusives_insulated, + "Non-intrusives: Insulation Material ": non_intrusives_insulation_material, + "Non-Intrusives: CIGA Check Required ": non_intrusives_ciga_check_required, + "Non-Intrusives: PV Access Issues ": non_intrusives_pv_access, + "Non-Intrusives: Roof Orientation ": non_intrusives_roof_orientation, + "Non-Intrusives: Surveyor Notes ": non_intrusives_surveyor_notes, + "Non-Intrusives: Surveyor Name ": non_intrusives_surveyor_name, + "CIGA: Date Requested ": None, # TODO: Don't have this for the moment + "CIGA: Cavity Guarantee Found ": None, + "Last EPC: Is Estimated ": self.EPC_API_DATA_NAMES[ + "estimated" + ], + "Last EPC: EPC Rating ": self.EPC_API_DATA_NAMES[ + "current-energy-rating" + ], + "Last EPC: SAP Rating ": self.EPC_API_DATA_NAMES[ + "current-energy-efficiency" + ], + "Last EPC: Main Heating Description ": self.EPC_API_DATA_NAMES[ + "mainheat-description" + ], + "Last EPC: Heating Controls ": self.EPC_API_DATA_NAMES[ + "mainheatcont-description" + ], + "Last EPC: Lodgement Date ": self.EPC_API_DATA_NAMES[ + "inspection-date" + ], + "Last EPC: Floor Area ": self.EPC_API_DATA_NAMES[ + "total-floor-area" + ], + "Last EPC: Wall ": self.EPC_API_DATA_NAMES[ + "walls-description" + ], + "Last EPC: Roof ": self.EPC_API_DATA_NAMES[ + "roof-description" + ], + "Last EPC: Floor ": self.EPC_API_DATA_NAMES[ + "floor-description" + ], + "Last EPC: Room Height ": self.EPC_API_DATA_NAMES[ + "floor-height" + ], + "Last EPC: Age Band ": self.EPC_API_DATA_NAMES[ + "construction-age-band" + ], + "Pipeline ": "Pipeline ", + "Expected Commencement Date ": "survey_date", + "Deal Name ": "dealname", # Need to create this, + "Product ID ": "Product ID ", + "Name ": "Name ", + "Unit price ": "Unit price ", + "Quantity ": "Quantity ", + "Deal Owner": "surveyor", + "Project Code ": "project_code", + "Associations: Listing": "Associations: Listing", + "Deal Stage ": "hubspot_status", } # We sometimes columns if the landlord never provided them - missed_mapping_cols = [c for c in schema_mappings.values() if c not in programme_data.columns if c is not None] + missed_mapping_cols = [ + c + for c in schema_mappings.values() + if c not in programme_data.columns + if c is not None + ] for c in missed_mapping_cols: programme_data[c] = None @@ -2934,22 +3649,32 @@ class AssetList: columns={v: k for k, v in schema_mappings.items() if v is not None} ) - programme_data['Postcode '] = programme_data['Postcode '].copy() - programme_data['Installer '] = installer_name - programme_data['Name '] = ( - programme_data['Full Address '] + " ," + programme_data['Postcode '] + programme_data["Postcode "] = programme_data[ + "Postcode " + ].copy() + programme_data["Installer "] = installer_name + programme_data["Name "] = ( + programme_data["Full Address "] + + " ," + + programme_data["Postcode "] ) # The listing owner email is the same as the surveyor email (deal owner), so they can see the listing - programme_data['Listing Owner Email '] = programme_data['Deal Owner'] - programme_data['Amount '] = 0 + programme_data["Listing Owner Email "] = ( + programme_data["Deal Owner"] + ) + programme_data["Amount "] = 0 programme_data["Deal Owner"] = np.where( ~pd.isnull(programme_data["Deal Owner"]), programme_data["Deal Owner"].astype(str).str.lower(), - programme_data["Deal Owner"] + programme_data["Deal Owner"], ) # We make sure we have all of the columns that we need - missed_columns = [c for c in hubspot_config.CRM_UPLOAD_COLUMNS if c not in programme_data.columns] + missed_columns = [ + c + for c in hubspot_config.CRM_UPLOAD_COLUMNS + if c not in programme_data.columns + ] if missed_columns: raise ValueError( f"We have the following columns that are not in the programme data: {missed_columns}. " @@ -2959,7 +3684,6 @@ class AssetList: self.hubspot_data = programme_data def flag_ecosurv(self, ecosurv_landlords=None, landlords_to_ignore=None): - """ This class will match ecosurv data to the asset list :return: @@ -2968,7 +3692,9 @@ class AssetList: return # TODO: Fetch from Sharepoint - ecosurv_filepath = "/Users/khalimconn-kowlessar/Documents/hestia/Ecosurv/07.05.2025.csv" + ecosurv_filepath = ( + "/Users/khalimconn-kowlessar/Documents/hestia/Ecosurv/07.05.2025.csv" + ) logger.info("Getting Ecosurv data from %s", ecosurv_filepath) self.ecosurv = pd.read_csv(ecosurv_filepath, encoding="cp437") @@ -2989,12 +3715,16 @@ class AssetList: # Try and match to asset list matched = [] unmatched = [] - for _, row in tqdm(landlord_ecosurv_data.iterrows(), total=landlord_ecosurv_data.shape[0]): + for _, row in tqdm( + landlord_ecosurv_data.iterrows(), total=landlord_ecosurv_data.shape[0] + ): postcode = row["Postcode"].lower() df = self.standardised_asset_list[ ( - self.standardised_asset_list[self.STANDARD_POSTCODE].str.replace(" ", "").str.lower() == - postcode + self.standardised_asset_list[self.STANDARD_POSTCODE] + .str.replace(" ", "") + .str.lower() + == postcode ) ].copy() @@ -3003,25 +3733,28 @@ class AssetList: continue if df.shape[0] > 1: - house_no = SearchEpc.get_house_number(row["Address Line 1"], row["Postcode"]) + house_no = SearchEpc.get_house_number( + row["Address Line 1"], row["Postcode"] + ) df["house_no"] = df.apply( lambda x: SearchEpc.get_house_number( str(x[self.STANDARD_ADDRESS_1]), x[self.STANDARD_POSTCODE] ), - axis=1 + axis=1, ) df = df[df["house_no"] == house_no] if df.shape[0] > 1: # We compare address line 1 to full address if any( - df[self.STANDARD_FULL_ADDRESS].str.lower().str.contains( - row["Address Line 1"].lower(), na=False) + df[self.STANDARD_FULL_ADDRESS] + .str.lower() + .str.contains(row["Address Line 1"].lower(), na=False) ): df = df[ - df[self.STANDARD_FULL_ADDRESS].str.lower().str.contains( - row["Address Line 1"].lower(), na=False - ) + df[self.STANDARD_FULL_ADDRESS] + .str.lower() + .str.contains(row["Address Line 1"].lower(), na=False) ] if df.shape[0] > 1: @@ -3030,7 +3763,9 @@ class AssetList: if df.shape[0] == 1: matched.append( { - self.STANDARD_LANDLORD_PROPERTY_ID: df[self.STANDARD_LANDLORD_PROPERTY_ID].values[0], + self.STANDARD_LANDLORD_PROPERTY_ID: df[ + self.STANDARD_LANDLORD_PROPERTY_ID + ].values[0], "ecosurv_reference": row["Reference"], "ecosurv_address1": row["Address Line 1"], "ecosurv_postcode": row["Postcode"], @@ -3053,7 +3788,9 @@ class AssetList: # We'll possibly have duplicates here, where properties have been sold twice. Ww de-dupe if matched[self.STANDARD_LANDLORD_PROPERTY_ID].duplicated().sum(): # It doesn't matter too much which record we take - matched = matched.drop_duplicates(subset=[self.STANDARD_LANDLORD_PROPERTY_ID]) + matched = matched.drop_duplicates( + subset=[self.STANDARD_LANDLORD_PROPERTY_ID] + ) # We merge on the status of the property matched = matched.merge( @@ -3063,12 +3800,16 @@ class AssetList: "Status": "ecosurv_status", "Lead Status": "ecosurv_lead_status", "Tags": "ecosurv_tags", - "Installer": "ecosurv_installer" + "Installer": "ecosurv_installer", } - ), how="left", on="ecosurv_reference" + ), + how="left", + on="ecosurv_reference", ) - matched["ecosurv_install_status"] = hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER + matched["ecosurv_install_status"] = ( + hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER + ) # This mapping is ordered by process order, where lodgment is the final step so if we have an indication # that the property is ready for lodgement, we set the status to that. We then proceed through the other @@ -3086,7 +3827,7 @@ class AssetList: "Retrofit: Signed off for install": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER, "Audit": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER, "Accepted": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER, - "Sold": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER + "Sold": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER, } def get_max_status(tag_str): @@ -3100,7 +3841,9 @@ class AssetList: return None return max(matched_statuses).label - matched["ecosurv_install_status"] = matched["ecosurv_tags"].apply(get_max_status) + matched["ecosurv_install_status"] = matched["ecosurv_tags"].apply( + get_max_status + ) self.standardised_asset_list = self.standardised_asset_list.merge( matched, @@ -3120,7 +3863,7 @@ class AssetList: outcomes_address, outcomes_postcode, outcomes_houseno, - outcomes_id + outcomes_id, ): if not outcomes_filepaths: return @@ -3129,7 +3872,9 @@ class AssetList: outcomes_no_match = [] lookup = [] for idx, outcomes_filepath in enumerate(outcomes_filepaths): - outcomes = pd.read_excel(outcomes_filepath, sheet_name=outcomes_sheetname[idx]) + outcomes = pd.read_excel( + outcomes_filepath, sheet_name=outcomes_sheetname[idx] + ) outcomes["row_id"] = outcomes.index if outcomes_houseno[idx] is None: @@ -3139,15 +3884,21 @@ class AssetList: ) # We handle an edge case that occured for LHP - if "Notes / Outcomes" in outcomes.columns and "Outcome" not in outcomes.columns: + if ( + "Notes / Outcomes" in outcomes.columns + and "Outcome" not in outcomes.columns + ): # We use the re-mapper to handle this: outcomes["Notes / Outcomes"] = outcomes["Notes / Outcomes"].str.strip() values_to_remap = outcomes["Notes / Outcomes"].unique() # We want to map this to our standardised list of property types we're interested in remapper = DataRemapper( - standard_values=outcomes_mappings.outcomes_values, standard_map=outcomes_mappings.outcomes_map + standard_values=outcomes_mappings.outcomes_values, + standard_map=outcomes_mappings.outcomes_map, + ) + remap_dictionary = remapper.standardize_list( + values_to_remap=values_to_remap.tolist() ) - remap_dictionary = remapper.standardize_list(values_to_remap=values_to_remap.tolist()) # Perform the remap outcomes["Outcome"] = outcomes["Notes / Outcomes"].map(remap_dictionary) @@ -3167,80 +3918,109 @@ class AssetList: if oid is not None: matched = self.standardised_asset_list[ - (self.standardised_asset_list[ - self.STANDARD_LANDLORD_PROPERTY_ID - ].str.strip() == oid) + ( + self.standardised_asset_list[ + self.STANDARD_LANDLORD_PROPERTY_ID + ].str.strip() + == oid + ) ] if matched.shape[0] == 1: lookup_i.append( { "row_id": x["row_id"], - self.DOMNA_PROPERTY_ID: matched[self.DOMNA_PROPERTY_ID].values[0] + self.DOMNA_PROPERTY_ID: matched[ + self.DOMNA_PROPERTY_ID + ].values[0], } ) continue - address_clean = x[outcomes_address[idx]].lower().replace(",", "").replace(" ", " ") + address_clean = ( + x[outcomes_address[idx]].lower().replace(",", "").replace(" ", " ") + ) matched = self.standardised_asset_list[ - (self.standardised_asset_list[ - self.STANDARD_FULL_ADDRESS - ].str.lower().str.replace(",", "").str.replace(" ", " ") == address_clean) + ( + self.standardised_asset_list[self.STANDARD_FULL_ADDRESS] + .str.lower() + .str.replace(",", "") + .str.replace(" ", " ") + == address_clean + ) ] if matched.shape[0] == 1: lookup_i.append( { "row_id": x["row_id"], - self.DOMNA_PROPERTY_ID: matched[self.DOMNA_PROPERTY_ID].values[0] + self.DOMNA_PROPERTY_ID: matched[ + self.DOMNA_PROPERTY_ID + ].values[0], } ) continue matched = self.standardised_asset_list[ - (self.standardised_asset_list[self.STANDARD_POSTCODE].str.strip() == x[outcomes_postcode[idx]]) + ( + self.standardised_asset_list[self.STANDARD_POSTCODE].str.strip() + == x[outcomes_postcode[idx]] + ) ].copy() if not matched.empty: matched["houseno"] = matched.apply( lambda x: SearchEpc.get_house_number( - str(x[self.STANDARD_ADDRESS_1]), str(x[self.STANDARD_POSTCODE]) + str(x[self.STANDARD_ADDRESS_1]), + str(x[self.STANDARD_POSTCODE]), ), - axis=1 + axis=1, ) if pd.isnull(x[outcomes_houseno[idx]]): house_no_to_match = SearchEpc.get_house_number( - str(x[outcomes_address[idx]]), str(x[outcomes_postcode[idx]]) + str(x[outcomes_address[idx]]), + str(x[outcomes_postcode[idx]]), ) if isinstance(house_no_to_match, str): house_no_to_match = house_no_to_match.lower() else: house_no_to_match = str(x[outcomes_houseno[idx]]).strip() - matched = matched[matched["houseno"].astype(str) == house_no_to_match] + matched = matched[ + matched["houseno"].astype(str) == house_no_to_match + ] if matched.shape[0] == 1: lookup_i.append( { "row_id": x["row_id"], - self.DOMNA_PROPERTY_ID: matched[self.DOMNA_PROPERTY_ID].values[0] + self.DOMNA_PROPERTY_ID: matched[ + self.DOMNA_PROPERTY_ID + ].values[0], } ) continue elif not matched.empty: # Use levenstein distance to match matched["address"] = ( - matched[self.STANDARD_ADDRESS_1] + " " + matched[self.STANDARD_POSTCODE] + matched[self.STANDARD_ADDRESS_1] + + " " + + matched[self.STANDARD_POSTCODE] ) best_match = process.extractOne( - x[outcomes_address[idx]], matched[self.STANDARD_FULL_ADDRESS].values + x[outcomes_address[idx]], + matched[self.STANDARD_FULL_ADDRESS].values, )[0] - matched = matched[matched[self.STANDARD_FULL_ADDRESS] == best_match] + matched = matched[ + matched[self.STANDARD_FULL_ADDRESS] == best_match + ] lookup_i.append( { "row_id": x["row_id"], - self.DOMNA_PROPERTY_ID: matched[self.DOMNA_PROPERTY_ID].values[0] + self.DOMNA_PROPERTY_ID: matched[ + self.DOMNA_PROPERTY_ID + ].values[0], } ) continue @@ -3290,7 +4070,9 @@ class AssetList: raise NotImplementedError("Invalid notes in outcomes - implement me") lookup = lookup.merge( - self.outcomes[["row_id", "Outcome", notes_col, date_col]], how="left", on="row_id" + self.outcomes[["row_id", "Outcome", notes_col, date_col]], + how="left", + on="row_id", ) visit_counts = ( @@ -3305,28 +4087,35 @@ class AssetList: if isinstance(s, str): match = re.search(r"(\d{2}\.\d{2}\.\d{4})", s) if match: - return pd.to_datetime(match.group(1), format="%d.%m.%Y", errors="coerce") + return pd.to_datetime( + match.group(1), format="%d.%m.%Y", errors="coerce" + ) return pd.NaT - lookup['parsed_date'] = lookup[date_col].apply(extract_date) + lookup["parsed_date"] = lookup[date_col].apply(extract_date) def get_latest_note(group): - surveyed = group[group['Outcome'] == 'surveyed'] + surveyed = group[group["Outcome"] == "surveyed"] if not surveyed.empty: - return surveyed.sort_values('parsed_date', ascending=False).iloc[0] + return surveyed.sort_values("parsed_date", ascending=False).iloc[0] else: - return group.sort_values('parsed_date', ascending=False).iloc[0] + return group.sort_values("parsed_date", ascending=False).iloc[0] latest_note = ( - lookup.groupby('domna_property_id', group_keys=False). - apply(get_latest_note). - reset_index(drop=True) + lookup.groupby("domna_property_id", group_keys=False) + .apply(get_latest_note) + .reset_index(drop=True) ) latest_note = latest_note[["domna_property_id", notes_col, "Outcome"]].rename( columns={"Notes": "latest_outcome_note", "Outcome": "latest_outcome"} ) - pivot_df = lookup.groupby(["domna_property_id", "Outcome"]).size().unstack(fill_value=0).reset_index() + pivot_df = ( + lookup.groupby(["domna_property_id", "Outcome"]) + .size() + .unstack(fill_value=0) + .reset_index() + ) pivot_df = pivot_df.merge(visit_counts, how="left", on="domna_property_id") pivot_df = pivot_df.merge(latest_note, how="left", on="domna_property_id") @@ -3336,34 +4125,46 @@ class AssetList: raise Exception("We have duplicated property IDs in the outcomes data") # We merge this data onto outcomes - self.outcomes["matched_to_asset_list"] = self.outcomes["row_id"].isin(lookup["row_id"].values) - self.outcomes = self.outcomes.merge(lookup[["row_id", "domna_property_id"]], how="left", on="row_id") + self.outcomes["matched_to_asset_list"] = self.outcomes["row_id"].isin( + lookup["row_id"].values + ) + self.outcomes = self.outcomes.merge( + lookup[["row_id", "domna_property_id"]], how="left", on="row_id" + ) # We flag the outcome status, based on the outcome pivot_df["outcome_status"] = None if "surveyed" in pivot_df.columns: pivot_df["outcome_status"] = np.where( - pivot_df["surveyed"] > 0, hubspot_config.HubspotProcessStatus.SURVEYED_COMPLETED_SIGNED_OFF.label, - pivot_df["outcome_status"] + pivot_df["surveyed"] > 0, + hubspot_config.HubspotProcessStatus.SURVEYED_COMPLETED_SIGNED_OFF.label, + pivot_df["outcome_status"], ) if "installer refusal" in pivot_df.columns: pivot_df["outcome_status"] = np.where( - pivot_df["installer refusal"] > 0, hubspot_config.HubspotProcessStatus.NOT_VIABLE.label, - pivot_df["outcome_status"] + pivot_df["installer refusal"] > 0, + hubspot_config.HubspotProcessStatus.NOT_VIABLE.label, + pivot_df["outcome_status"], ) pivot_df["outcome_status"] = np.where( - pivot_df["latest_outcome"].isin(["see notes"]) & - (pivot_df["outcome_status"] != hubspot_config.HubspotProcessStatus.SURVEYED_COMPLETED_SIGNED_OFF.label), + pivot_df["latest_outcome"].isin(["see notes"]) + & ( + pivot_df["outcome_status"] + != hubspot_config.HubspotProcessStatus.SURVEYED_COMPLETED_SIGNED_OFF.label + ), hubspot_config.HubspotProcessStatus.SURVEYED_NO_ACCESS_NEEDS_SIGN_OFF.label, - pivot_df["outcome_status"] + pivot_df["outcome_status"], ) # We merge out pivoted outcomes onto the asset list self.standardised_asset_list = self.standardised_asset_list.merge( - pivot_df, how="left", left_on=self.DOMNA_PROPERTY_ID, right_on="domna_property_id" + pivot_df, + how="left", + left_on=self.DOMNA_PROPERTY_ID, + right_on="domna_property_id", ) if self.standardised_asset_list[self.DOMNA_PROPERTY_ID].duplicated().sum(): @@ -3372,10 +4173,7 @@ class AssetList: self.outcomes = self.outcomes.sort_values("domna_property_id", ascending=False) def flag_survey_master( - self, - master_filepaths, - master_id_colnames, - master_to_asset_list_filepath=None + self, master_filepaths, master_id_colnames, master_to_asset_list_filepath=None ): # TODO: This probably needs further expansion @@ -3394,26 +4192,26 @@ class AssetList: master_data = pd.read_csv(filepath) # Strip columns master_data.columns = [c.strip() for c in master_data.columns] - master_data.columns = [re.sub(r'\s+', ' ', c) for c in master_data.columns] + master_data.columns = [re.sub(r"\s+", " ", c) for c in master_data.columns] # Drop any unnamed columns unnamed_columns = [c for c in master_data.columns if "Unnamed:" in c] master_data = master_data.drop(columns=unnamed_columns) if not id_map.empty: master_data = master_data.merge( - id_map, how="left", on=['NO.', 'Street / Block Name', 'Post Code'] + id_map, how="left", on=["NO.", "Street / Block Name", "Post Code"] ) if "INSTALLED OR CANCELLED" in master_data.columns: install_col = "INSTALLED OR CANCELLED" elif "INSTALL / CANCELLATION DATE" in master_data.columns: install_col = "INSTALL / CANCELLATION DATE" - elif 'INSTALL/ CANCELLATION DATE' in master_data.columns: - install_col = 'INSTALL/ CANCELLATION DATE' + elif "INSTALL/ CANCELLATION DATE" in master_data.columns: + install_col = "INSTALL/ CANCELLATION DATE" elif "INSTALL/CANCELLATION DATE" in master_data.columns: install_col = "INSTALL/CANCELLATION DATE" - elif 'Measure 1 Install Date' in master_data.columns: - install_col = 'Measure 1 Install Date' + elif "Measure 1 Install Date" in master_data.columns: + install_col = "Measure 1 Install Date" else: raise ValueError("No install or cancellation date") @@ -3428,14 +4226,19 @@ class AssetList: master_data["row_id"] = master_data.index - self.standardised_asset_list["house_no"] = self.standardised_asset_list.apply( - lambda x: SearchEpc.get_house_number( - str(x[self.STANDARD_ADDRESS_1]), str(x[self.STANDARD_POSTCODE]) - ), - axis=1 + self.standardised_asset_list["house_no"] = ( + self.standardised_asset_list.apply( + lambda x: SearchEpc.get_house_number( + str(x[self.STANDARD_ADDRESS_1]), str(x[self.STANDARD_POSTCODE]) + ), + axis=1, + ) ) - if "AFFORDABLE WARMTH OR EPC FOR HOUSING ASSOCIATION" in master_data.columns: + if ( + "AFFORDABLE WARMTH OR EPC FOR HOUSING ASSOCIATION" + in master_data.columns + ): scheme_col = "AFFORDABLE WARMTH OR EPC FOR HOUSING ASSOCIATION" elif "AFFORDABLE WARMTH" in master_data.columns: scheme_col = "AFFORDABLE WARMTH" @@ -3446,11 +4249,13 @@ class AssetList: else: scheme_col = "OFFICE USE ONLY" - postcode_col = "POSTCODE" if "POSTCODE" in master_data.columns else "Post Code" - if 'NO.' in master_data.columns: - house_no_col = 'NO.' + postcode_col = ( + "POSTCODE" if "POSTCODE" in master_data.columns else "Post Code" + ) + if "NO." in master_data.columns: + house_no_col = "NO." elif "NO" in master_data.columns: - house_no_col = 'NO' + house_no_col = "NO" else: house_no_col = "NUMBER" @@ -3460,8 +4265,8 @@ class AssetList: property_type_col = "PROPERTY TYPE As per table emailed" elif "PROPERTY TYPE" in master_data.columns: property_type_col = "PROPERTY TYPE" - elif 'Property Type' in master_data.columns: - property_type_col = 'Property Type' + elif "Property Type" in master_data.columns: + property_type_col = "Property Type" else: property_type_col = "PROPERTY TYPE (SEE DEEMED SCORES SHEET) Eg. 3W_Flat_1 (As per Matrix)" @@ -3469,14 +4274,21 @@ class AssetList: installer_notes_col = "INSTALLERS NOTES ; REASONS FOR CANCELLATIONS" elif "INSTALLERS NOTES" in master_data.columns: installer_notes_col = "INSTALLERS NOTES" - elif 'Installers Notes' in master_data.columns: - installer_notes_col = 'Installers Notes' - elif 'NOTES ; REASONS FOR CANCELLATIONS OR WHERE INSTALL DATE WAS OBTAINED FROM' in master_data.columns: - installer_notes_col = 'NOTES ; REASONS FOR CANCELLATIONS OR WHERE INSTALL DATE WAS OBTAINED FROM' - elif ('INSTALLERS NOTES / REASONS FOR CANCELLATIONS / WHERE INSTALL DATE WAS RECEIVED FROM' in - master_data.columns): - installer_notes_col = ('INSTALLERS NOTES / REASONS FOR CANCELLATIONS / WHERE INSTALL DATE WAS RECEIVED ' - 'FROM') + elif "Installers Notes" in master_data.columns: + installer_notes_col = "Installers Notes" + elif ( + "NOTES ; REASONS FOR CANCELLATIONS OR WHERE INSTALL DATE WAS OBTAINED FROM" + in master_data.columns + ): + installer_notes_col = "NOTES ; REASONS FOR CANCELLATIONS OR WHERE INSTALL DATE WAS OBTAINED FROM" + elif ( + "INSTALLERS NOTES / REASONS FOR CANCELLATIONS / WHERE INSTALL DATE WAS RECEIVED FROM" + in master_data.columns + ): + installer_notes_col = ( + "INSTALLERS NOTES / REASONS FOR CANCELLATIONS / WHERE INSTALL DATE WAS RECEIVED " + "FROM" + ) else: raise ValueError("No installer notes column found in master data") @@ -3491,8 +4303,8 @@ class AssetList: if "TOWN" in master_data.columns: town_colname = "TOWN" - elif 'Town/Area' in master_data.columns: - town_colname = 'Town/Area' + elif "Town/Area" in master_data.columns: + town_colname = "Town/Area" else: town_colname = "Town/City" @@ -3511,8 +4323,9 @@ class AssetList: if master_id_colnames[idx] is not None: # Filter the standardised asset list on this df = self.standardised_asset_list[ - self.standardised_asset_list[self.STANDARD_LANDLORD_PROPERTY_ID] == row[master_id_colnames[idx]] - ] + self.standardised_asset_list[self.STANDARD_LANDLORD_PROPERTY_ID] + == row[master_id_colnames[idx]] + ] if df.shape[0] == 1: matched.append( { @@ -3520,7 +4333,9 @@ class AssetList: "original_house_no": original_house_no, "original_street": original_street, "original_postcode": original_postcode, - self.STANDARD_LANDLORD_PROPERTY_ID: df[self.STANDARD_LANDLORD_PROPERTY_ID].values[0], + self.STANDARD_LANDLORD_PROPERTY_ID: df[ + self.STANDARD_LANDLORD_PROPERTY_ID + ].values[0], } ) continue @@ -3530,7 +4345,10 @@ class AssetList: df = self.standardised_asset_list[ ( self.standardised_asset_list[self.STANDARD_POSTCODE] - .str.strip().str.lower().str.replace(" ", "") == postcode_no_space + .str.strip() + .str.lower() + .str.replace(" ", "") + == postcode_no_space ) ] @@ -3548,7 +4366,9 @@ class AssetList: df = self.standardised_asset_list[ ( self.standardised_asset_list[self.STANDARD_POSTCODE] - .str.strip().str.lower().str.startswith(postal_region) + .str.strip() + .str.lower() + .str.startswith(postal_region) ) ] @@ -3558,7 +4378,9 @@ class AssetList: df = df[df["house_no"] == house_no] if df.shape[0] > 1: df = df[ - df[self.STANDARD_FULL_ADDRESS].str.lower().str.contains(row["Street / Block Name"].lower()) + df[self.STANDARD_FULL_ADDRESS] + .str.lower() + .str.contains(row["Street / Block Name"].lower()) ] if df.shape[0] == 0: unmatched.append(row["row_id"]) @@ -3569,7 +4391,9 @@ class AssetList: "original_house_no": original_house_no, "original_street": original_street, "original_postcode": original_postcode, - self.STANDARD_LANDLORD_PROPERTY_ID: df[self.STANDARD_LANDLORD_PROPERTY_ID].values[0], + self.STANDARD_LANDLORD_PROPERTY_ID: df[ + self.STANDARD_LANDLORD_PROPERTY_ID + ].values[0], } ) continue @@ -3579,44 +4403,71 @@ class AssetList: if df.shape[0] != 1: # Levenstein distance - if any(df[self.STANDARD_FULL_ADDRESS].str.contains(row["Street / Block Name"])): + if any( + df[self.STANDARD_FULL_ADDRESS].str.contains( + row["Street / Block Name"] + ) + ): df = df[ - df[self.STANDARD_FULL_ADDRESS].str.contains(row["Street / Block Name"]) + df[self.STANDARD_FULL_ADDRESS].str.contains( + row["Street / Block Name"] + ) ] else: # Levenstein distance df = df[ - df[self.STANDARD_FULL_ADDRESS].str.lower().apply( + df[self.STANDARD_FULL_ADDRESS] + .str.lower() + .apply( lambda x: process.extractOne( " ".join( - [row[house_no_col], row["Street / Block Name"], row[town_colname]]).lower(), - x + [ + row[house_no_col], + row["Street / Block Name"], + row[town_colname], + ] + ).lower(), + x, )[1] - ) > 90 - ] + ) + > 90 + ] if df.shape[0] == 0: unmatched.append(row["row_id"]) continue - if any(df[self.STANDARD_FULL_ADDRESS].str.lower().str.contains( - " ".join([row[house_no_col], row["Street / Block Name"]]).lower() - )): + if any( + df[self.STANDARD_FULL_ADDRESS] + .str.lower() + .str.contains( + " ".join( + [row[house_no_col], row["Street / Block Name"]] + ).lower() + ) + ): df = df[ - df[self.STANDARD_FULL_ADDRESS].str.lower().str.contains( - " ".join([row[house_no_col], row["Street / Block Name"]]).lower() + df[self.STANDARD_FULL_ADDRESS] + .str.lower() + .str.contains( + " ".join( + [row[house_no_col], row["Street / Block Name"]] + ).lower() ) ] if any( - df[self.STANDARD_PROPERTY_TYPE].str.contains(row[property_type_col].split(" ")[-1].lower()) + df[self.STANDARD_PROPERTY_TYPE].str.contains( + row[property_type_col].split(" ")[-1].lower() + ) ): # We ignore "block of flats" entries df = df[ df[self.STANDARD_PROPERTY_TYPE].str.contains( row[property_type_col].split(" ")[-1].lower() - ) & (df[self.STANDARD_PROPERTY_TYPE] != "block of flats") - ] + ) + & (df[self.STANDARD_PROPERTY_TYPE] != "block of flats") + ] if df.shape[0] != 1: # We have multiple matches - it's likely because the landlord has a duplicate @@ -3628,7 +4479,9 @@ class AssetList: "original_house_no": original_house_no, "original_street": original_street, "original_postcode": original_postcode, - self.STANDARD_LANDLORD_PROPERTY_ID: x[self.STANDARD_LANDLORD_PROPERTY_ID], + self.STANDARD_LANDLORD_PROPERTY_ID: x[ + self.STANDARD_LANDLORD_PROPERTY_ID + ], } ) continue @@ -3639,11 +4492,15 @@ class AssetList: "original_house_no": original_house_no, "original_street": original_street, "original_postcode": original_postcode, - self.STANDARD_LANDLORD_PROPERTY_ID: df[self.STANDARD_LANDLORD_PROPERTY_ID].values[0], + self.STANDARD_LANDLORD_PROPERTY_ID: df[ + self.STANDARD_LANDLORD_PROPERTY_ID + ].values[0], } ) - self.standardised_asset_list = self.standardised_asset_list.drop(columns="house_no") + self.standardised_asset_list = self.standardised_asset_list.drop( + columns="house_no" + ) # We match the "UPRN" which is the landlords ID, onto the master sheet @@ -3654,19 +4511,29 @@ class AssetList: if matched.empty: continue - master_to_append = master_data[ - [scheme_col, "row_id", install_col, submission_col, measure_mix_col, installer_notes_col, installer_col] - ].merge( - matched, how="left", on="row_id" - ).rename( - columns={ - scheme_col: "funding_scheme", - measure_mix_col: "measure_mix", - install_col: "survey_status", - submission_col: "submission_date", - installer_notes_col: "submission_installer_notes", - installer_col: "submission_installer" - } + master_to_append = ( + master_data[ + [ + scheme_col, + "row_id", + install_col, + submission_col, + measure_mix_col, + installer_notes_col, + installer_col, + ] + ] + .merge(matched, how="left", on="row_id") + .rename( + columns={ + scheme_col: "funding_scheme", + measure_mix_col: "measure_mix", + install_col: "survey_status", + submission_col: "submission_date", + installer_notes_col: "submission_installer_notes", + installer_col: "submission_installer", + } + ) ) master_to_append["submission_cancelled"] = ( master_to_append["survey_status"].str.lower().str.contains("cancel") @@ -3675,14 +4542,17 @@ class AssetList: master_to_append["survey_status"].str.lower().str.contains("installed") ) master_surveyed.append(master_to_append) - unmatched_df = master_data[ - master_data["row_id"].isin(unmatched) - ] + unmatched_df = master_data[master_data["row_id"].isin(unmatched)] # The columns are massively different - we take just a few unmatched_df = unmatched_df[ [ - scheme_col, house_no_col, "Street / Block Name", postcode_col, install_col, submission_col + scheme_col, + house_no_col, + "Street / Block Name", + postcode_col, + install_col, + submission_col, ] ].rename( columns={ @@ -3690,14 +4560,16 @@ class AssetList: house_no_col: "House Number", postcode_col: "Postcode", install_col: "survey_status", - submission_col: "submission_date" + submission_col: "submission_date", } ) unmatched_submissions.append(unmatched_df) master_surveyed = pd.concat(master_surveyed) - master_surveyed = master_surveyed[~pd.isnull(master_surveyed[self.STANDARD_LANDLORD_PROPERTY_ID])] + master_surveyed = master_surveyed[ + ~pd.isnull(master_surveyed[self.STANDARD_LANDLORD_PROPERTY_ID]) + ] master_surveyed = master_surveyed[ ~master_surveyed[self.STANDARD_LANDLORD_PROPERTY_ID].isin( ["NOT ON ASSET LIST", "Missing From Asset List"] @@ -3709,20 +4581,24 @@ class AssetList: ].astype(str) # We de-dupe crudely on landlord property id - self.master_surveyed = master_surveyed.drop_duplicates(subset=[self.STANDARD_LANDLORD_PROPERTY_ID]).copy() + self.master_surveyed = master_surveyed.drop_duplicates( + subset=[self.STANDARD_LANDLORD_PROPERTY_ID] + ).copy() # We now add the submission status, based on the hubspot stages - self.master_surveyed["submission_status"] = hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER.label + self.master_surveyed["submission_status"] = ( + hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER.label + ) self.master_surveyed["submission_status"] = np.where( self.master_surveyed["submission_cancelled"] == True, hubspot_config.HubspotProcessStatus.INSTALLER_CANCELLED_FINALIZED.label, - self.master_surveyed["submission_status"] + self.master_surveyed["submission_status"], ) self.master_surveyed["submission_status"] = np.where( self.master_surveyed["submission_installed"] == True, hubspot_config.HubspotProcessStatus.INSTALL_COMPLETE.label, - self.master_surveyed["submission_status"] + self.master_surveyed["submission_status"], ) self.standardised_asset_list = self.standardised_asset_list.merge( @@ -3735,6 +4611,4 @@ class AssetList: # Finally, we keep a record of the unmatched if unmatched_submissions: - self.unmatched_submissions = pd.concat( - unmatched_submissions - ) + self.unmatched_submissions = pd.concat(unmatched_submissions) diff --git a/asset_list/app.py b/asset_list/app.py index b9c6bcf0..3e492118 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -18,6 +18,7 @@ EPC_AUTH_TOKEN = os.getenv( "EPC_AUTH_TOKEN", ) + OPENAI_API_KEY = os.getenv( "OPENAI_API_KEY", ) @@ -73,61 +74,24 @@ def app(): Property UPRN """ - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/West Kent" - data_filename = "West Kent Asset List.xlsx" + data_folder = "/workspaces/model/asset_list" + data_filename = "assests.xlsx" sheet_name = "Sheet1" - postcode_column = "POSTCODE" - address1_column = None + postcode_column = "Postcode" + address1_column = "Address" address1_method = "house_number_extraction" - fulladdress_column = "ADDRESS" - address_cols_to_concat = [] + fulladdress_column = None + address_cols_to_concat = ["Address"] missing_postcodes_method = None landlord_year_built = None - landlord_os_uprn = None - landlord_property_type = "PROPERTY TYPE" - landlord_built_form = None - landlord_wall_construction = "wall combined" - landlord_roof_construction = "HEATING SYSTEM" - landlord_heating_system = None + landlord_os_uprn = "UPRN" + landlord_property_type = "Archetype" + landlord_built_form = "Bedroom Count" + landlord_wall_construction = "Wall Insulation Type" + landlord_roof_construction = "Roof Type" + landlord_heating_system = "Boiler Type" landlord_existing_pv = None - landlord_property_id = "UPRN" - landlord_sap = None - outcomes_filename = None - outcomes_sheetname = None - outcomes_postcode = None - outcomes_houseno = None - outcomes_id = None - outcomes_address = None - master_filepaths = [] - master_id_colnames = [] - master_to_asset_list_filepath = None - phase = False - ecosurv_landlords = None - asset_list_header = 0 - landlord_block_reference = None - - # Peabody data for cleaning - data_folder = ( - "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting " - "Project/data_validation" - ) - data_filename = "to_standardise_uprns.xlsx" - sheet_name = "Sheet1" - postcode_column = "POSTCODE" - address1_column = None - address1_method = "house_number_extraction" - fulladdress_column = "ADDRESS" - address_cols_to_concat = [] - missing_postcodes_method = None - landlord_year_built = None - landlord_os_uprn = None - landlord_property_type = "PROPERTY TYPE" - landlord_built_form = None # Skipped as empty - landlord_wall_construction = "wall combined" # combin F + G - landlord_roof_construction = "HEATING SYSTEM" # Combine I + J - landlord_heating_system = None # Check with Khalim - landlord_existing_pv = None - landlord_property_id = "UPRN" + landlord_property_id = "Tab" landlord_sap = None outcomes_filename = None outcomes_sheetname = None diff --git a/asset_list/requirements.txt b/asset_list/requirements.txt index dc7e572e..56469fc0 100644 --- a/asset_list/requirements.txt +++ b/asset_list/requirements.txt @@ -5,7 +5,7 @@ epc-api-python==1.0.2 thefuzz boto3 openpyxl -openai>=1.3.5 +openai==1.93.0 tiktoken msgpack beautifulsoup4 diff --git a/infrastructure/terraform/lambda/_template/main.tf b/infrastructure/terraform/lambda/_template/main.tf index 7f60d684..89e6c4c1 100644 --- a/infrastructure/terraform/lambda/_template/main.tf +++ b/infrastructure/terraform/lambda/_template/main.tf @@ -33,6 +33,8 @@ module "lambda" { image_uri = local.image_uri + # Optional: Set maximum_concurrency to limit concurrent SQS-triggered invocations (2-1000) + maximum_concurrency = var.maximum_concurrency environment = { STAGE = var.stage diff --git a/infrastructure/terraform/lambda/_template/variables.tf b/infrastructure/terraform/lambda/_template/variables.tf index e4bab243..e0061321 100644 --- a/infrastructure/terraform/lambda/_template/variables.tf +++ b/infrastructure/terraform/lambda/_template/variables.tf @@ -17,6 +17,11 @@ variable "image_digest" { description = "Image digest (sha256:...)" } +variable "maximum_concurrency" { + type = number + default = null + description = "Maximum number of concurrent Lambda invocations from SQS (2-1000). null = no limit." +} locals { image_uri = "${var.ecr_repo_url}@${var.image_digest}" diff --git a/infrastructure/terraform/lambda/address2UPRN/main.tf b/infrastructure/terraform/lambda/address2UPRN/main.tf index f53d55c8..2d185497 100644 --- a/infrastructure/terraform/lambda/address2UPRN/main.tf +++ b/infrastructure/terraform/lambda/address2UPRN/main.tf @@ -24,6 +24,9 @@ module "address2uprn" { timeout = 900 + # Optional: Set maximum_concurrency to limit concurrent SQS-triggered invocations (2-1000) + maximum_concurrency = var.maximum_concurrency + environment = merge( { STAGE = var.stage diff --git a/infrastructure/terraform/lambda/address2UPRN/variables.tf b/infrastructure/terraform/lambda/address2UPRN/variables.tf index e4bab243..e0061321 100644 --- a/infrastructure/terraform/lambda/address2UPRN/variables.tf +++ b/infrastructure/terraform/lambda/address2UPRN/variables.tf @@ -17,6 +17,11 @@ variable "image_digest" { description = "Image digest (sha256:...)" } +variable "maximum_concurrency" { + type = number + default = null + description = "Maximum number of concurrent Lambda invocations from SQS (2-1000). null = no limit." +} locals { image_uri = "${var.ecr_repo_url}@${var.image_digest}" diff --git a/infrastructure/terraform/lambda/modules/lambda_with_sqs/main.tf b/infrastructure/terraform/lambda/modules/lambda_with_sqs/main.tf index 065fb790..74345d24 100644 --- a/infrastructure/terraform/lambda/modules/lambda_with_sqs/main.tf +++ b/infrastructure/terraform/lambda/modules/lambda_with_sqs/main.tf @@ -44,5 +44,6 @@ module "sqs_trigger" { lambda_role_name = module.role.role_name queue_arn = module.queue.queue_arn - batch_size = var.batch_size + batch_size = var.batch_size + maximum_concurrency = var.maximum_concurrency } diff --git a/infrastructure/terraform/lambda/modules/lambda_with_sqs/variables.tf b/infrastructure/terraform/lambda/modules/lambda_with_sqs/variables.tf index b20ab2a8..7c2832d2 100644 --- a/infrastructure/terraform/lambda/modules/lambda_with_sqs/variables.tf +++ b/infrastructure/terraform/lambda/modules/lambda_with_sqs/variables.tf @@ -34,3 +34,9 @@ variable "batch_size" { type = number default = 10 } + +variable "maximum_concurrency" { + type = number + default = null + description = "Maximum number of concurrent Lambda invocations from SQS. null = no limit." +} diff --git a/infrastructure/terraform/modules/lambda_sqs_trigger/main.tf b/infrastructure/terraform/modules/lambda_sqs_trigger/main.tf index 5919e10f..4afaf773 100644 --- a/infrastructure/terraform/modules/lambda_sqs_trigger/main.tf +++ b/infrastructure/terraform/modules/lambda_sqs_trigger/main.tf @@ -3,6 +3,13 @@ resource "aws_lambda_event_source_mapping" "this" { function_name = var.lambda_arn batch_size = var.batch_size enabled = true + + dynamic "scaling_config" { + for_each = var.maximum_concurrency != null ? [1] : [] + content { + maximum_concurrency = var.maximum_concurrency + } + } } resource "aws_iam_role_policy" "allow_sqs" { diff --git a/infrastructure/terraform/modules/lambda_sqs_trigger/variables.tf b/infrastructure/terraform/modules/lambda_sqs_trigger/variables.tf index 0e50cd54..c3127c74 100644 --- a/infrastructure/terraform/modules/lambda_sqs_trigger/variables.tf +++ b/infrastructure/terraform/modules/lambda_sqs_trigger/variables.tf @@ -6,3 +6,9 @@ variable "batch_size" { type = number default = 10 } + +variable "maximum_concurrency" { + type = number + default = null + description = "Maximum number of concurrent Lambda invocations from SQS. null = no limit." +} From c2196b6e0d6b0ddaabf6a0d3cf973614439c3476 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Wed, 18 Feb 2026 12:23:29 +0000 Subject: [PATCH 208/340] 10 address 2uprn --- infrastructure/terraform/lambda/address2UPRN/variables.tf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/infrastructure/terraform/lambda/address2UPRN/variables.tf b/infrastructure/terraform/lambda/address2UPRN/variables.tf index e0061321..347964de 100644 --- a/infrastructure/terraform/lambda/address2UPRN/variables.tf +++ b/infrastructure/terraform/lambda/address2UPRN/variables.tf @@ -19,7 +19,7 @@ variable "image_digest" { variable "maximum_concurrency" { type = number - default = null + default = 10 # null if you don't want to set it for this handler description = "Maximum number of concurrent Lambda invocations from SQS (2-1000). null = no limit." } From f4db5389f5f226e5610fbd91c53596f3a8944984 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 18 Feb 2026 12:30:20 +0000 Subject: [PATCH 209/340] getting rid of test code --- backend/engine/engine.py | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/backend/engine/engine.py b/backend/engine/engine.py index f8b25352..80d6d078 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -1118,28 +1118,6 @@ async def model_engine(body: PlanTriggerRequest): # When the goal is Increasing EPC, we can run the funding optimiser if body.goal == "Increasing EPC": - solutions_no_budget = optimise_with_scenarios( - p=p, - input_measures=input_measures, - budget=None, - target_gain=gain, - enforce_heat_pump_insulation=True, - enforce_fabric_first=body.enforce_fabric_first, - already_installed_sap=already_installed_sap, # To be passed to output - ) - solutions_no_budget["total_cost"] - - solutions_with_budget = optimise_with_scenarios( - p=p, - input_measures=input_measures, - budget=5000, - target_gain=gain, - enforce_heat_pump_insulation=True, - enforce_fabric_first=body.enforce_fabric_first, - already_installed_sap=already_installed_sap, # To be passed to output - ) - solutions_with_budget["total_cost"] - solutions = optimise_with_scenarios( p=p, input_measures=input_measures, From ecaf742a18b07cab6899988a22cbef4e1a437bfe Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 18 Feb 2026 12:38:45 +0000 Subject: [PATCH 210/340] added catch if budget is not set --- recommendations/optimiser/funding_optimiser.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/recommendations/optimiser/funding_optimiser.py b/recommendations/optimiser/funding_optimiser.py index 80ba02fd..787af8e0 100644 --- a/recommendations/optimiser/funding_optimiser.py +++ b/recommendations/optimiser/funding_optimiser.py @@ -714,7 +714,9 @@ def optimise_with_scenarios( remaining_measures.append(kept) remaining_budget = budget - fabric_cost if budget is not None else None - remaining_budget = 0 if remaining_budget < 0 else remaining_budget + + if remaining_budget is not None: + remaining_budget = 0 if remaining_budget < 0 else remaining_budget picked_extra, extra_cost, extra_gain = run_optimizer( remaining_measures, From e0f897bf4466bb476b99cef34972c17eefae4be8 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 18 Feb 2026 12:46:25 +0000 Subject: [PATCH 211/340] minor stying and typing --- recommendations/optimiser/CostOptimiser.py | 7 ++++++- recommendations/optimiser/GainOptimiser.py | 10 +++++++++- recommendations/optimiser/StrategicOptimiser.py | 2 +- 3 files changed, 16 insertions(+), 3 deletions(-) diff --git a/recommendations/optimiser/CostOptimiser.py b/recommendations/optimiser/CostOptimiser.py index 32a869b2..43e303a7 100644 --- a/recommendations/optimiser/CostOptimiser.py +++ b/recommendations/optimiser/CostOptimiser.py @@ -1,4 +1,5 @@ from mip import Model, xsum, minimize, BINARY, OptimizationStatus +from typing import Mapping from utils.logger import setup_logger logger = setup_logger() @@ -13,7 +14,11 @@ class CostOptimiser: BUFFER = 0.2 def __init__( - self, components, min_gain, verbose=False, allow_slack=True + self, + components: list[list[Mapping[str, int | float | str]]], + min_gain: float | int, + verbose: bool = False, + allow_slack: bool = True ): self.components = components self.min_gain = min_gain diff --git a/recommendations/optimiser/GainOptimiser.py b/recommendations/optimiser/GainOptimiser.py index 5dbf1dc5..94e022da 100644 --- a/recommendations/optimiser/GainOptimiser.py +++ b/recommendations/optimiser/GainOptimiser.py @@ -1,5 +1,6 @@ from mip import Model, xsum, maximize, BINARY, OptimizationStatus from utils.logger import setup_logger +from typing import Mapping logger = setup_logger() @@ -9,7 +10,14 @@ class GainOptimiser: This class is used to maximise gain, given a constrained cost """ - def __init__(self, components, max_cost, max_gain, allow_slack=True, verbose=False): + def __init__( + self, + components: list[list[Mapping[str, int | float | str]]], + max_cost: float | int, + max_gain: float | int, + allow_slack: bool = True, + verbose: bool = False + ): """ This function will try and maximise the gain, given a constrained cost. If we specific a max_gain, then the optimisation routine is constained to try not to exceed a maximum increase diff --git a/recommendations/optimiser/StrategicOptimiser.py b/recommendations/optimiser/StrategicOptimiser.py index 8ffc307c..69de4085 100644 --- a/recommendations/optimiser/StrategicOptimiser.py +++ b/recommendations/optimiser/StrategicOptimiser.py @@ -85,7 +85,7 @@ class StrategicOptimiser: # min cost # subject to # - # gain >= 𝐺 + # gain >= G # cost <= B # multiple-choice constraints # From 3dbe118b38064b6e93884b2e80ba6a92ab082365 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 18 Feb 2026 13:29:16 +0000 Subject: [PATCH 212/340] additional logs in handler for local testing --- backend/categorisation/handler/handler.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/backend/categorisation/handler/handler.py b/backend/categorisation/handler/handler.py index 449c5ccf..ee0e7a7d 100644 --- a/backend/categorisation/handler/handler.py +++ b/backend/categorisation/handler/handler.py @@ -1,5 +1,6 @@ import json from typing import Any, Mapping + from backend.categorisation.categorisation_trigger_request import ( CategorisationTriggerRequest, ) @@ -12,6 +13,10 @@ logger = setup_logger() def handler(event: Mapping[str, Any], context: Any) -> None: + logger.info("Received message") + + logger.info(f"Number of events: {len(event.get('Records', []))}") + for record in event.get("Records", []): try: body_dict = json.loads(record["body"]) @@ -27,4 +32,5 @@ def handler(event: Mapping[str, Any], context: Any) -> None: ) except Exception as e: + logger.info("Handler exception") logger.error(f"Failed to process record: {e}") From f7fe7132c7a1f2a86eed8b239070584723ce0a88 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 18 Feb 2026 15:12:24 +0000 Subject: [PATCH 213/340] docker compose for running lambdas locally --- backend/docker-compose-local-lambdas.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 backend/docker-compose-local-lambdas.yml diff --git a/backend/docker-compose-local-lambdas.yml b/backend/docker-compose-local-lambdas.yml new file mode 100644 index 00000000..781f4955 --- /dev/null +++ b/backend/docker-compose-local-lambdas.yml @@ -0,0 +1,11 @@ +version: "3.9" + +services: + lambda: + build: + context: ../ + dockerfile: backend/categorisation/handler/Dockerfile + ports: + - "9000:8080" + env_file: + - ../.env \ No newline at end of file From 3f022ba5488499fe630739c52a71cb5a572d908d Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 18 Feb 2026 15:13:24 +0000 Subject: [PATCH 214/340] rename service in docker compose --- backend/docker-compose-local-lambdas.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/docker-compose-local-lambdas.yml b/backend/docker-compose-local-lambdas.yml index 781f4955..50e9193b 100644 --- a/backend/docker-compose-local-lambdas.yml +++ b/backend/docker-compose-local-lambdas.yml @@ -1,7 +1,7 @@ version: "3.9" services: - lambda: + categorisation-lambda: build: context: ../ dockerfile: backend/categorisation/handler/Dockerfile From b4e3dc9f42a8d24674ef19a72f8efbe75e178610 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 18 Feb 2026 15:19:36 +0000 Subject: [PATCH 215/340] add example request body --- backend/categorisation/categorisation_trigger_request.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/backend/categorisation/categorisation_trigger_request.py b/backend/categorisation/categorisation_trigger_request.py index 46ce6f1c..9bd7d7c8 100644 --- a/backend/categorisation/categorisation_trigger_request.py +++ b/backend/categorisation/categorisation_trigger_request.py @@ -7,3 +7,6 @@ class CategorisationTriggerRequest(BaseModel): plans_to_consider: Optional[List[int]] = None plan_priority_order: Optional[List[int]] = None + + +# {"portfolio_id": 556, "plans_to_consider": [1589319,1589320], "plan_priority_order": [1589319,1589320]} From 478947b8da91320f55b5e452e9803c90dec7cc6d Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 18 Feb 2026 16:12:23 +0000 Subject: [PATCH 216/340] move docker compose back to categorisation directory and define simple invoke script --- .../local_handler/docker-compose.yml | 11 ++++++++ .../local_handler/invoke_local_lambda.py | 25 +++++++++++++++++++ 2 files changed, 36 insertions(+) create mode 100644 backend/categorisation/local_handler/docker-compose.yml create mode 100644 backend/categorisation/local_handler/invoke_local_lambda.py diff --git a/backend/categorisation/local_handler/docker-compose.yml b/backend/categorisation/local_handler/docker-compose.yml new file mode 100644 index 00000000..9529fdb2 --- /dev/null +++ b/backend/categorisation/local_handler/docker-compose.yml @@ -0,0 +1,11 @@ +version: "3.9" + +services: + categorisation-lambda: + build: + context: ../../../ + dockerfile: backend/categorisation/handler/Dockerfile + ports: + - "9000:8080" + env_file: + - ../../../.env \ No newline at end of file diff --git a/backend/categorisation/local_handler/invoke_local_lambda.py b/backend/categorisation/local_handler/invoke_local_lambda.py new file mode 100644 index 00000000..9eb5adda --- /dev/null +++ b/backend/categorisation/local_handler/invoke_local_lambda.py @@ -0,0 +1,25 @@ +#!/usr/bin/env python3 +import json +import requests + +LAMBDA_URL = "http://localhost:9000/2015-03-31/functions/function/invocations" + +payload = { + "Records": [ + { + "body": json.dumps( + { + "portfolio_id": 556, + "plans_to_consider": [], + "plan_priority_order": [], + } + ) + } + ] +} + +response = requests.post(LAMBDA_URL, json=payload) + +print("Status code:", response.status_code) +print("Response:") +print(response.text) From 490c8946d721725b16a90d40903ff667757b86cb Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 18 Feb 2026 17:11:00 +0000 Subject: [PATCH 217/340] Unset existing default before setting new one --- .../db/functions/recommendations_functions.py | 34 +++++++++++++++++++ .../local_handler/invoke_local_lambda.py | 2 +- backend/categorisation/processor.py | 16 +++++++-- 3 files changed, 48 insertions(+), 4 deletions(-) diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py index d4c3fcb9..3af9fd29 100644 --- a/backend/app/db/functions/recommendations_functions.py +++ b/backend/app/db/functions/recommendations_functions.py @@ -646,6 +646,40 @@ def get_scenario(scenario_id: int) -> Optional[ScenarioModel]: return session_any.exec(stmt).scalar_one_or_none() +def get_default_plan_ids_for_property(property_id: int) -> List[int]: + # This should in reality always return exactly 1 ID, but there's currently + # no database constraint to enforce that, so account for 0 or >1 + stmt = select(PlanModel.id).where( + PlanModel.property_id == property_id and PlanModel.is_default + ) + with db_read_session() as session: + session_any: Any = session # Typehint as Any to satisfy Pylance... + return session_any.exec(stmt).scalars().all() + + +def set_plan_and_scenario_default(plan_id: int, default: bool) -> bool: + with db_session() as session: + plan: PlanModel = session.get(PlanModel, plan_id) + if not plan: + return False + + scenario_id = plan.scenario_id + + plan_mapper: Mapper[Any] = inspect(PlanModel) + scenario_mapper: Mapper[Any] = inspect(ScenarioModel) + + plan_mappings: List[Dict[str, Any]] = [{"id": plan.id, "is_default": default}] + scenario_mappings: List[Dict[str, Any]] = [ + {"id": scenario_id, "is_default": default} + ] + + session.bulk_update_mappings(plan_mapper, plan_mappings) + session.bulk_update_mappings(scenario_mapper, scenario_mappings) + session.commit() + + return True + + def bulk_update_plans( plan_models: List[PlanModel], scenario_models: List[ScenarioModel], diff --git a/backend/categorisation/local_handler/invoke_local_lambda.py b/backend/categorisation/local_handler/invoke_local_lambda.py index 9eb5adda..23e5fda2 100644 --- a/backend/categorisation/local_handler/invoke_local_lambda.py +++ b/backend/categorisation/local_handler/invoke_local_lambda.py @@ -10,7 +10,7 @@ payload = { "body": json.dumps( { "portfolio_id": 556, - "plans_to_consider": [], + "plans_to_consider": [1589319, 1589320], "plan_priority_order": [], } ) diff --git a/backend/categorisation/processor.py b/backend/categorisation/processor.py index b7ddfc62..b07f1c3b 100644 --- a/backend/categorisation/processor.py +++ b/backend/categorisation/processor.py @@ -3,9 +3,11 @@ from typing import Dict, List, Optional from backend.app.db.functions.recommendations_functions import ( bulk_update_plans, + get_default_plan_ids_for_property, get_plans_by_ids, get_plans_by_portfolio_id, get_scenarios_by_portfolio_id, + set_plan_and_scenario_default, ) from backend.app.db.models.recommendations import PlanModel, ScenarioModel from backend.app.domain.classes.plan import Plan @@ -26,15 +28,23 @@ def process_portfolio( plans_by_property: Dict[int, List[Plan]] = _group_plans_by_property(plans) - for uprn, property_plans in plans_by_property.items(): + for property_id, property_plans in plans_by_property.items(): if not property_plans: - raise ValueError(f"No plans for property {uprn}") + raise ValueError(f"No plans for property {property_id}") cheapest_plan = choose_cheapest_relevant_plan( property_plans, plan_priority_order ) - _update_default_flags(property_plans, cheapest_plan) + + # Unset existing default(s) in case they are outside the plans to consider + default_plan_ids: List[int] = get_default_plan_ids_for_property(property_id) + for id in default_plan_ids: + set_plan_and_scenario_default(id, False) + + _update_default_flags( + property_plans, cheapest_plan + ) # TODO: we have already unset existing default(s), so this method can probably be a bit simpler now def choose_cheapest_relevant_plan( From ffbfe4992aea2c1a6a7bebb18c197f47c2a57f81 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 18 Feb 2026 17:17:52 +0000 Subject: [PATCH 218/340] fixed tests --- recommendations/optimiser/GainOptimiser.py | 2 +- recommendations/tests/test_optimisers.py | 242 ++++++++++----------- 2 files changed, 113 insertions(+), 131 deletions(-) diff --git a/recommendations/optimiser/GainOptimiser.py b/recommendations/optimiser/GainOptimiser.py index 94e022da..9c291313 100644 --- a/recommendations/optimiser/GainOptimiser.py +++ b/recommendations/optimiser/GainOptimiser.py @@ -14,7 +14,7 @@ class GainOptimiser: self, components: list[list[Mapping[str, int | float | str]]], max_cost: float | int, - max_gain: float | int, + max_gain: float | int | None, allow_slack: bool = True, verbose: bool = False ): diff --git a/recommendations/tests/test_optimisers.py b/recommendations/tests/test_optimisers.py index 0c794119..5a4df160 100644 --- a/recommendations/tests/test_optimisers.py +++ b/recommendations/tests/test_optimisers.py @@ -1,76 +1,34 @@ import pytest -from recommendations.optimiser.funding_optimiser import build_heat_pump_paths -from recommendations.optimiser.funding_optimiser import run_optimizer +from recommendations.optimiser.funding_optimiser import ( + build_heat_pump_paths, + run_optimizer, +) -class DummyProp: - """Minimal property stub exposing just what your code reads.""" - - def __init__(self): - self.data = { - "current-energy-rating": "E", # or "D" for the special Social+D path - "current-energy-efficiency": 55, # numeric SAP points used in eligibility calc - "mainheat-energy-eff": "Very Good", - } - self.has_ventilation = False - self.floor_area = 70.0 - self.main_heating_controls = {"clean_description": "time and temperature zone control"} - self.walls = {'original_description': 'Solid brick, as built, no insulation (assumed)', - 'thermal_transmittance': None, - 'thermal_transmittance_unit': None, 'is_cavity_wall': False, 'is_filled_cavity': False, - 'is_solid_brick': True, - 'is_system_built': False, 'is_timber_frame': False, 'is_granite_or_whinstone': False, - 'is_as_built': True, - 'is_cob': False, 'is_assumed': True, 'is_sandstone_or_limestone': False, - 'insulation_thickness': 'none', - 'external_insulation': False, 'internal_insulation': False} - - self.main_heating = { - 'original_description': 'Boiler and radiators, mains gas', - 'clean_description': 'Boiler and radiators, mains gas', - 'has_radiators': True, 'has_fan_coil_units': False, 'has_pipes_in_screed_above_insulation': False, - 'has_pipes_in_insulated_timber_floor': False, 'has_pipes_in_concrete_slab': False, 'has_boiler': True, - 'has_air_source_heat_pump': False, 'has_room_heaters': False, 'has_electric_storage_heaters': False, - 'has_warm_air': False, 'has_electric_underfloor_heating': False, 'has_electric_ceiling_heating': False, - 'has_community_scheme': False, 'has_ground_source_heat_pump': False, 'has_no_system_present': False, - 'has_portable_electric_heaters': False, 'has_water_source_heat_pump': False, 'has_electric_heat_pump': - False, - 'has_micro-cogeneration': False, 'has_solar_assisted_heat_pump': False, 'has_exhaust_source_heat_pump': - False, - 'has_community_heat_pump': False, 'has_hot-water-only': False, 'has_electric': False, 'has_mains_gas': - True, - 'has_wood_logs': False, 'has_coal': False, 'has_oil': False, 'has_wood_pellets': False, - 'has_anthracite': False, - 'has_dual_fuel_mineral_and_wood': False, 'has_smokeless_fuel': False, 'has_lpg': False, 'has_b30k': False, - 'has_mineral_and_wood': False, 'has_dual_fuel_appliance': False, 'has_assumed': False, - 'has_electricaire': False, - 'has_assumed_for_most_rooms': False, 'has_underfloor_heating': False - } - - self.main_fuel = { - 'original_description': 'mains gas (not community)', 'clean_description': 'Mains gas not community', - 'fuel_type': 'mains gas', 'tariff_type': None, 'is_community': False, - 'no_individual_heating_or_community_network': False, 'complex_fuel_type': None - } - - -@pytest.fixture -def p(): - return DummyProp() - +# --------------------------------------------------------------------- +# Heat pump path tests (unchanged – these are fine) +# --------------------------------------------------------------------- def test_build_heat_pump_paths(): eg1 = build_heat_pump_paths([], ["loft_insulation"]) - assert eg1 == [{'AND': ['loft_insulation', 'air_source_heat_pump']}] - eg2 = build_heat_pump_paths(["internal_wall_insulation", "external_wall_insulation"], ["loft_insulation"]) + eg2 = build_heat_pump_paths( + ["internal_wall_insulation", "external_wall_insulation"], + ["loft_insulation"], + ) - assert eg2 == [{'AND': ['internal_wall_insulation', 'loft_insulation', 'air_source_heat_pump']}, - {'AND': ['external_wall_insulation', 'loft_insulation', 'air_source_heat_pump']}] + assert eg2 == [ + {'AND': ['internal_wall_insulation', 'loft_insulation', 'air_source_heat_pump']}, + {'AND': ['external_wall_insulation', 'loft_insulation', 'air_source_heat_pump']}, + ] +# --------------------------------------------------------------------- +# run_optimizer tests +# --------------------------------------------------------------------- + def test_run_optimizer_empty_input(): solution, cost, gain = run_optimizer([]) assert solution is None @@ -78,134 +36,158 @@ def test_run_optimizer_empty_input(): assert gain == 0.0 -def test_uses_gain_optimiser_when_budget_provided(monkeypatch): - captured_args = {} +# --------------------------------------------------------------------- +# StrategicOptimiser mocking boundary +# --------------------------------------------------------------------- - class FakeGainOptimiser: - def __init__(self, measures, max_cost, max_gain, allow_slack): - captured_args["measures"] = measures - captured_args["max_cost"] = max_cost - captured_args["max_gain"] = max_gain - captured_args["allow_slack"] = allow_slack - self.solution = [{"cost": 100}] +def test_budget_and_target_are_passed_correctly(monkeypatch): + captured = {} + + class FakeStrategicOptimiser: + def __init__( + self, + components, + budget=None, + target_gain=None, + allow_slack=False, + verbose=False, + ): + captured["components"] = components + captured["budget"] = budget + captured["target_gain"] = target_gain + captured["allow_slack"] = allow_slack + + self.solution = [{"cost": 100, "gain": 5}] + self.solution_cost = 100 self.solution_gain = 5 - def setup(self): - pass - def solve(self): pass monkeypatch.setattr( - "recommendations.optimiser.funding_optimiser.GainOptimiser", - FakeGainOptimiser + "recommendations.optimiser.funding_optimiser.StrategicOptimiser", + FakeStrategicOptimiser, ) - measures = [[{"cost": 100, "gain": 5}]] - solution, cost, gain = run_optimizer( - measures, + [[{"cost": 100, "gain": 5}]], budget=500, sub_target_gain=10, - allow_slack=True + allow_slack=True, ) - assert captured_args["max_cost"] == 500 - assert captured_args["max_gain"] == 10 - assert captured_args["allow_slack"] is True + assert captured["budget"] == 500 + assert captured["target_gain"] == 10 + assert captured["allow_slack"] is True + assert cost == 100 assert gain == 5 + assert solution == [{"cost": 100, "gain": 5}] -def test_sub_target_gain_zero_sets_max_gain_zero(monkeypatch): - captured_args = {} +def test_sub_target_gain_zero_is_passed_as_zero(monkeypatch): + captured = {} - class FakeGainOptimiser: - def __init__(self, measures, max_cost, max_gain, allow_slack): - captured_args["max_gain"] = max_gain + class FakeStrategicOptimiser: + def __init__( + self, + components, + budget=None, + target_gain=None, + allow_slack=False, + verbose=False, + ): + captured["target_gain"] = target_gain self.solution = [] - self.solution_gain = 0 - - def setup(self): - pass + self.solution_cost = 0.0 + self.solution_gain = 0.0 def solve(self): pass monkeypatch.setattr( - "recommendations.optimiser.funding_optimiser.GainOptimiser", - FakeGainOptimiser + "recommendations.optimiser.funding_optimiser.StrategicOptimiser", + FakeStrategicOptimiser, ) - measures = [[{"cost": 100, "gain": 5}]] - run_optimizer( - measures, + [[{"cost": 100, "gain": 5}]], budget=500, - sub_target_gain=0 + sub_target_gain=0, ) - assert captured_args["max_gain"] == 0 + assert captured["target_gain"] == 0 -def test_sub_target_gain_none_sets_max_gain_infinity(monkeypatch): - captured_args = {} +def test_sub_target_gain_none_becomes_infinity(monkeypatch): + captured = {} - class FakeGainOptimiser: - def __init__(self, measures, max_cost, max_gain, allow_slack): - captured_args["max_gain"] = max_gain + class FakeStrategicOptimiser: + def __init__( + self, + components, + budget=None, + target_gain=None, + allow_slack=False, + verbose=False, + ): + captured["target_gain"] = target_gain self.solution = [] - self.solution_gain = 0 - - def setup(self): - pass + self.solution_cost = 0.0 + self.solution_gain = 0.0 def solve(self): pass monkeypatch.setattr( - "recommendations.optimiser.funding_optimiser.GainOptimiser", - FakeGainOptimiser + "recommendations.optimiser.funding_optimiser.StrategicOptimiser", + FakeStrategicOptimiser, ) - measures = [[{"cost": 100, "gain": 5}]] - run_optimizer( - measures, + [[{"cost": 100, "gain": 5}]], budget=500, - sub_target_gain=None + sub_target_gain=None, ) - assert captured_args["max_gain"] == float("inf") + assert captured["target_gain"] == None -def test_uses_cost_optimiser_when_no_budget(monkeypatch): - captured_args = {} +def test_target_only_case(monkeypatch): + captured = {} - class FakeCostOptimiser: - def __init__(self, measures, min_gain): - captured_args["min_gain"] = min_gain - self.solution = [{"cost": 50}] + class FakeStrategicOptimiser: + def __init__( + self, + components, + budget=None, + target_gain=None, + allow_slack=False, + verbose=False, + ): + captured["budget"] = budget + captured["target_gain"] = target_gain + + self.solution = [{"cost": 50, "gain": 10}] + self.solution_cost = 50 self.solution_gain = 10 - def setup(self): - pass - def solve(self): pass monkeypatch.setattr( - "recommendations.optimiser.funding_optimiser.CostOptimiser", - FakeCostOptimiser + "recommendations.optimiser.funding_optimiser.StrategicOptimiser", + FakeStrategicOptimiser, ) - measures = [[{"cost": 50, "gain": 10}]] - solution, cost, gain = run_optimizer( - measures, - sub_target_gain=10 + [[{"cost": 50, "gain": 10}]], + sub_target_gain=10, ) - assert captured_args["min_gain"] == 10 + assert captured["budget"] is None + assert captured["target_gain"] == 10 + assert cost == 50 assert gain == 10 + assert solution == [{"cost": 50, "gain": 10}] From 378eb055089e4a88b6c2b0ae6f4746cd9f4069e3 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 18 Feb 2026 17:18:29 +0000 Subject: [PATCH 219/340] aesthetics --- recommendations/tests/test_optimisers.py | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/recommendations/tests/test_optimisers.py b/recommendations/tests/test_optimisers.py index 5a4df160..63280907 100644 --- a/recommendations/tests/test_optimisers.py +++ b/recommendations/tests/test_optimisers.py @@ -6,10 +6,6 @@ from recommendations.optimiser.funding_optimiser import ( ) -# --------------------------------------------------------------------- -# Heat pump path tests (unchanged – these are fine) -# --------------------------------------------------------------------- - def test_build_heat_pump_paths(): eg1 = build_heat_pump_paths([], ["loft_insulation"]) assert eg1 == [{'AND': ['loft_insulation', 'air_source_heat_pump']}] @@ -25,10 +21,6 @@ def test_build_heat_pump_paths(): ] -# --------------------------------------------------------------------- -# run_optimizer tests -# --------------------------------------------------------------------- - def test_run_optimizer_empty_input(): solution, cost, gain = run_optimizer([]) assert solution is None @@ -36,10 +28,6 @@ def test_run_optimizer_empty_input(): assert gain == 0.0 -# --------------------------------------------------------------------- -# StrategicOptimiser mocking boundary -# --------------------------------------------------------------------- - def test_budget_and_target_are_passed_correctly(monkeypatch): captured = {} From 475b3d0e1305a801857bf13dde0915d80482894e Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 18 Feb 2026 17:19:38 +0000 Subject: [PATCH 220/340] input is list of scenarios to consider not list of plans --- .../db/functions/recommendations_functions.py | 7 +++++++ .../categorisation_trigger_request.py | 2 +- backend/categorisation/handler/handler.py | 2 +- .../local_handler/invoke_local_lambda.py | 2 +- backend/categorisation/processor.py | 19 +++++++++++-------- 5 files changed, 21 insertions(+), 11 deletions(-) diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py index 3af9fd29..6f7dd41f 100644 --- a/backend/app/db/functions/recommendations_functions.py +++ b/backend/app/db/functions/recommendations_functions.py @@ -632,6 +632,13 @@ def get_plans_by_ids(ids: List[int]) -> List[PlanModel]: return session_any.exec(stmt).scalars().all() +def get_plans_by_scenario_ids(ids: List[int]) -> List[PlanModel]: + stmt = select(PlanModel).where(PlanModel.scenario_id.in_(ids)) + with db_read_session() as session: + session_any: Any = session # Typehint as Any to satisfy Pylance... + return session_any.exec(stmt).scalars().all() + + def get_scenarios_by_portfolio_id(portfolio_id: int) -> List[ScenarioModel]: stmt = select(ScenarioModel).where(ScenarioModel.portfolio_id == portfolio_id) with db_read_session() as session: diff --git a/backend/categorisation/categorisation_trigger_request.py b/backend/categorisation/categorisation_trigger_request.py index 9bd7d7c8..4b35f75c 100644 --- a/backend/categorisation/categorisation_trigger_request.py +++ b/backend/categorisation/categorisation_trigger_request.py @@ -5,7 +5,7 @@ from pydantic import BaseModel class CategorisationTriggerRequest(BaseModel): portfolio_id: int - plans_to_consider: Optional[List[int]] = None + scenarios_to_consider: Optional[List[int]] = None plan_priority_order: Optional[List[int]] = None diff --git a/backend/categorisation/handler/handler.py b/backend/categorisation/handler/handler.py index ee0e7a7d..dc10fa4e 100644 --- a/backend/categorisation/handler/handler.py +++ b/backend/categorisation/handler/handler.py @@ -27,7 +27,7 @@ def handler(event: Mapping[str, Any], context: Any) -> None: process_portfolio( payload.portfolio_id, - payload.plans_to_consider, + payload.scenarios_to_consider, payload.plan_priority_order, ) diff --git a/backend/categorisation/local_handler/invoke_local_lambda.py b/backend/categorisation/local_handler/invoke_local_lambda.py index 23e5fda2..ce599ca9 100644 --- a/backend/categorisation/local_handler/invoke_local_lambda.py +++ b/backend/categorisation/local_handler/invoke_local_lambda.py @@ -10,7 +10,7 @@ payload = { "body": json.dumps( { "portfolio_id": 556, - "plans_to_consider": [1589319, 1589320], + "scenarios_to_consider": [1040, 1041], "plan_priority_order": [], } ) diff --git a/backend/categorisation/processor.py b/backend/categorisation/processor.py index b07f1c3b..e017c069 100644 --- a/backend/categorisation/processor.py +++ b/backend/categorisation/processor.py @@ -6,6 +6,7 @@ from backend.app.db.functions.recommendations_functions import ( get_default_plan_ids_for_property, get_plans_by_ids, get_plans_by_portfolio_id, + get_plans_by_scenario_ids, get_scenarios_by_portfolio_id, set_plan_and_scenario_default, ) @@ -19,12 +20,12 @@ logger = setup_logger() def process_portfolio( portfolio_id: int, - plans_to_consider: Optional[List[int]] = None, + scenarios_to_consider: Optional[List[int]] = None, plan_priority_order: Optional[List[int]] = None, ) -> None: logger.info(f"Processing portfolio {portfolio_id}") - plans: List[Plan] = _load_plans_for_portfolio(portfolio_id, plans_to_consider) + plans: List[Plan] = _load_plans_for_portfolio(portfolio_id, scenarios_to_consider) plans_by_property: Dict[int, List[Plan]] = _group_plans_by_property(plans) @@ -88,15 +89,17 @@ def choose_cheapest_relevant_plan( def _load_plans_for_portfolio( - portfolio_id: int, plans_to_consider: Optional[List[int]] = None + portfolio_id: int, scenarios_to_consider: Optional[List[int]] = None ) -> List[Plan]: - if plans_to_consider: - if len(plans_to_consider) < 2: - raise ValueError("Cannot run auto categorisation for fewer than 2 plans") + if scenarios_to_consider: + if len(scenarios_to_consider) < 2: + raise ValueError( + "Cannot run auto categorisation for fewer than 2 scenarios" + ) - logger.info(f"Getting {len(plans_to_consider)} Plans") - plan_models: List[PlanModel] = get_plans_by_ids(plans_to_consider) + logger.info(f"Getting {len(scenarios_to_consider)} plans") + plan_models: List[PlanModel] = get_plans_by_scenario_ids(scenarios_to_consider) else: logger.info( From ce8c1d23e6c8081b85f5dc2d59f71ca62df6a14f Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 18 Feb 2026 17:24:17 +0000 Subject: [PATCH 221/340] =?UTF-8?q?priority=20list=20is=20scenarios=20not?= =?UTF-8?q?=20plans=20=F0=9F=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../categorisation/categorisation_trigger_request.py | 2 +- backend/categorisation/handler/handler.py | 2 +- backend/categorisation/processor.py | 11 +++++------ .../tests/test_prioritised_plan_selected.py | 12 ++++++------ 4 files changed, 13 insertions(+), 14 deletions(-) diff --git a/backend/categorisation/categorisation_trigger_request.py b/backend/categorisation/categorisation_trigger_request.py index 4b35f75c..fbc2328b 100644 --- a/backend/categorisation/categorisation_trigger_request.py +++ b/backend/categorisation/categorisation_trigger_request.py @@ -6,7 +6,7 @@ class CategorisationTriggerRequest(BaseModel): portfolio_id: int scenarios_to_consider: Optional[List[int]] = None - plan_priority_order: Optional[List[int]] = None + scenario_priority_order: Optional[List[int]] = None # {"portfolio_id": 556, "plans_to_consider": [1589319,1589320], "plan_priority_order": [1589319,1589320]} diff --git a/backend/categorisation/handler/handler.py b/backend/categorisation/handler/handler.py index dc10fa4e..9fb235d5 100644 --- a/backend/categorisation/handler/handler.py +++ b/backend/categorisation/handler/handler.py @@ -28,7 +28,7 @@ def handler(event: Mapping[str, Any], context: Any) -> None: process_portfolio( payload.portfolio_id, payload.scenarios_to_consider, - payload.plan_priority_order, + payload.scenario_priority_order, ) except Exception as e: diff --git a/backend/categorisation/processor.py b/backend/categorisation/processor.py index e017c069..fd0d9c89 100644 --- a/backend/categorisation/processor.py +++ b/backend/categorisation/processor.py @@ -4,7 +4,6 @@ from typing import Dict, List, Optional from backend.app.db.functions.recommendations_functions import ( bulk_update_plans, get_default_plan_ids_for_property, - get_plans_by_ids, get_plans_by_portfolio_id, get_plans_by_scenario_ids, get_scenarios_by_portfolio_id, @@ -21,7 +20,7 @@ logger = setup_logger() def process_portfolio( portfolio_id: int, scenarios_to_consider: Optional[List[int]] = None, - plan_priority_order: Optional[List[int]] = None, + scenario_priority_order: Optional[List[int]] = None, ) -> None: logger.info(f"Processing portfolio {portfolio_id}") @@ -35,7 +34,7 @@ def process_portfolio( raise ValueError(f"No plans for property {property_id}") cheapest_plan = choose_cheapest_relevant_plan( - property_plans, plan_priority_order + property_plans, scenario_priority_order ) # Unset existing default(s) in case they are outside the plans to consider @@ -49,9 +48,9 @@ def process_portfolio( def choose_cheapest_relevant_plan( - plans: List[Plan], plan_priority_order: Optional[List[int]] = None + plans: List[Plan], scenario_priority_order: Optional[List[int]] = None ) -> Plan: - plan_priority_order = plan_priority_order or [] + scenario_priority_order = scenario_priority_order or [] eligible_plans: List[Plan] = [plan for plan in plans if plan.is_compliant] or plans if not eligible_plans: @@ -80,7 +79,7 @@ def choose_cheapest_relevant_plan( if (plan.record.cost_of_works or float("inf")) == min_cost ] - for priority_plan_id in plan_priority_order: + for priority_plan_id in scenario_priority_order: for plan in cheapest_plans: if plan.id == priority_plan_id: return plan diff --git a/backend/categorisation/tests/test_prioritised_plan_selected.py b/backend/categorisation/tests/test_prioritised_plan_selected.py index 5424dd5e..74eb8c69 100644 --- a/backend/categorisation/tests/test_prioritised_plan_selected.py +++ b/backend/categorisation/tests/test_prioritised_plan_selected.py @@ -39,7 +39,7 @@ def make_scenario(name: str, created_at: datetime, is_default: bool) -> Scenario multi_plan=False, is_default=is_default, ) - return Scenario(record=record, id=1 if is_default else 2) + return Scenario(record=record, id=3 if is_default else 4) def make_plan( @@ -54,17 +54,17 @@ def make_plan( ) -def test_prioritised_plan_selected(created_at_datetime: datetime) -> None: +def test_prioritised_scenario_selected(created_at_datetime: datetime) -> None: # arrange epc_c_plan = make_plan(created_at_datetime, True, name="EPC C") minor_works_plan = make_plan(created_at_datetime, False, name="EPC C - Minor Works") - plan_priority_order: List[int] = [2, 1] + scenario_priority_order: List[int] = [4, 3] expected_default_plan_id = 2 # act actual_default_plan = choose_cheapest_relevant_plan( plans=[epc_c_plan, minor_works_plan], - plan_priority_order=plan_priority_order, + scenario_priority_order=scenario_priority_order, ) # assert @@ -81,13 +81,13 @@ def test_cheapest_plan_returned_if_not_in_priority_list( minor_works_plan = make_plan( created_at_datetime, False, cost_of_works=100.0, name="EPC C - Minor Works" ) - plan_priority_order: List[int] = [1, 3] + scenario_priority_order: List[int] = [3, 5] expected_default_plan_id = 2 # act actual_default_plan = choose_cheapest_relevant_plan( plans=[epc_c_plan, minor_works_plan], - plan_priority_order=plan_priority_order, + scenario_priority_order=scenario_priority_order, ) # assert From c1aa5716beed1fd9de3f4266918cf977c712e957 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 18 Feb 2026 17:26:22 +0000 Subject: [PATCH 222/340] =?UTF-8?q?priority=20list=20is=20scenarios=20not?= =?UTF-8?q?=20plans=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/categorisation/processor.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backend/categorisation/processor.py b/backend/categorisation/processor.py index fd0d9c89..ca58fb9d 100644 --- a/backend/categorisation/processor.py +++ b/backend/categorisation/processor.py @@ -79,9 +79,9 @@ def choose_cheapest_relevant_plan( if (plan.record.cost_of_works or float("inf")) == min_cost ] - for priority_plan_id in scenario_priority_order: + for priority_scenario_id in scenario_priority_order: for plan in cheapest_plans: - if plan.id == priority_plan_id: + if plan.scenario.id == priority_scenario_id: return plan return cheapest_plans[0] From 5ffa7290782ee5c2fb904b1c81453de6fc6074a8 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 18 Feb 2026 17:35:31 +0000 Subject: [PATCH 223/340] rename variable in invoke local lambda --- backend/categorisation/local_handler/invoke_local_lambda.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/categorisation/local_handler/invoke_local_lambda.py b/backend/categorisation/local_handler/invoke_local_lambda.py index ce599ca9..a53e0d8e 100644 --- a/backend/categorisation/local_handler/invoke_local_lambda.py +++ b/backend/categorisation/local_handler/invoke_local_lambda.py @@ -11,7 +11,7 @@ payload = { { "portfolio_id": 556, "scenarios_to_consider": [1040, 1041], - "plan_priority_order": [], + "scenarios_priority_order": [], } ) } From 0e10923353e1f9175bda5b9c9833d99a722bb727 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 18 Feb 2026 17:39:40 +0000 Subject: [PATCH 224/340] addressing feedback --- recommendations/optimiser/GainOptimiser.py | 3 - .../optimiser/funding_optimiser.py | 10 +- .../tests/test_optimiser_functions.py | 525 +----------------- 3 files changed, 19 insertions(+), 519 deletions(-) diff --git a/recommendations/optimiser/GainOptimiser.py b/recommendations/optimiser/GainOptimiser.py index 9c291313..bd907b4d 100644 --- a/recommendations/optimiser/GainOptimiser.py +++ b/recommendations/optimiser/GainOptimiser.py @@ -94,9 +94,6 @@ class GainOptimiser: for group_vars in self.variables: self.m += xsum(var for var in group_vars) <= 1 - self.m.max_gap = 0 - self.m.integer_tol = 1e-9 - def setup_slack(self): # Remove the original cost constraint self.m.remove(self.cost_constraint) diff --git a/recommendations/optimiser/funding_optimiser.py b/recommendations/optimiser/funding_optimiser.py index 787af8e0..324e2c74 100644 --- a/recommendations/optimiser/funding_optimiser.py +++ b/recommendations/optimiser/funding_optimiser.py @@ -1114,8 +1114,14 @@ def run_optimizer( allow_slack: bool = False ): """ - Thin wrapper over your optimisers. - Returns: list[dict] selected_options + Thin wrapper around the StrategicOptimiser to run it on a subset of measures with an optional budget and target + gain. Handles the cases of no input measures, and extracts the outputs for ease of use. + :param input_measures: list of groups of measures (each group is a list of measure dicts) + :param budget: optional budget to constrain the optimisation + :param sub_target_gain: optional target gain to achieve from this optimisation run + :param allow_slack: whether to allow solutions that exceed the target gain (True) or only solutions that meet it + exactly (False) + :return: tuple of (picked measures, total cost, total gain) where picked measures is a list of measure dicts """ if not input_measures: diff --git a/recommendations/tests/test_optimiser_functions.py b/recommendations/tests/test_optimiser_functions.py index ca2a0dcb..f0ca6dac 100644 --- a/recommendations/tests/test_optimiser_functions.py +++ b/recommendations/tests/test_optimiser_functions.py @@ -291,7 +291,9 @@ class TestIncreasingEpcE2e: class TestStrategicOptimiser: - def test_budget_and_target_gain(self): + + @pytest.fixture + def components(self): components = [ [ {'id': '0_phase=0', 'cost': 819.0, 'gain': 5.6, 'type': 'loft_insulation', 'innovation_uplift': 0, @@ -419,6 +421,9 @@ class TestStrategicOptimiser: 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 1.74} ] ] + return components + + def test_budget_and_target_gain_strategy_case_1_try_min_cost_with_constraints(self, components): budget = 5000 target_gain = 11.5 @@ -436,134 +441,7 @@ class TestStrategicOptimiser: assert opt.solution_cost == 4398.75 assert opt.solution_gain == 12 - def test_budget_and_target_gain_2(self): - components = [ - [ - {'id': '0_phase=0', 'cost': 819.0, 'gain': 5.6, 'type': 'loft_insulation', 'innovation_uplift': 0, - 'cost_minus_uplift': 819.0, 'raw_cost': 819.0, 'partial_project_funding': 0, - 'partial_project_score': 0, 'uplift_project_score': 0, 'already_installed': False, - 'has_battery': False, 'array_size': 0}, - {'id': '1_phase=0', 'cost': 702.0, 'gain': 5.6, 'type': 'loft_insulation', 'innovation_uplift': 0, - 'cost_minus_uplift': 702.0, 'raw_cost': 702.0, 'partial_project_funding': 0, - 'partial_project_score': 0, 'uplift_project_score': 0, 'already_installed': False, - 'has_battery': False, 'array_size': 0}, - {'id': '2_phase=0', 'cost': 585.0, 'gain': 5.6, 'type': 'loft_insulation', 'innovation_uplift': 0, - 'cost_minus_uplift': 585.0, 'raw_cost': 585.0, 'partial_project_funding': 0, - 'partial_project_score': 0, 'uplift_project_score': 0, 'already_installed': False, - 'has_battery': False, 'array_size': 0}], - [{'id': '4_phase=2', 'cost': 3656.25, 'gain': 2.0, 'type': 'suspended_floor_insulation', - 'innovation_uplift': 0, 'cost_minus_uplift': 3656.25, 'raw_cost': 3656.25, 'partial_project_funding': 0, - 'partial_project_score': 0, 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, - 'array_size': 0}], - [{'id': '5_phase=3', 'cost': 17.5, 'gain': 1.0, 'type': 'low_energy_lighting', 'innovation_uplift': 0, - 'cost_minus_uplift': 17.5, 'raw_cost': 17.5, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 0}], - [{'id': '6_phase=4', 'cost': 140, 'gain': 3.4, 'type': 'roomstat_programmer_trvs', 'innovation_uplift': 0, - 'cost_minus_uplift': 140, 'raw_cost': 140, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 0}, - {'id': '7_phase=4', 'cost': 874.5680000000001, 'gain': 4.2, 'type': 'time_temperature_zone_control', - 'innovation_uplift': 0, 'cost_minus_uplift': 874.5680000000001, 'raw_cost': 874.5680000000001, - 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, - 'already_installed': False, 'has_battery': False, 'array_size': 0}], - [{'id': '9_phase=6', 'cost': 5420.0, 'gain': 13.2, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 5420.0, 'raw_cost': 5420.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.6}, - {'id': '10_phase=6', 'cost': 6210.0, 'gain': 16.2, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 6210.0, 'raw_cost': 6210.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.6, - 'battery_gain': 3}, - {'id': '11_phase=6', 'cost': 6820.0, 'gain': 16.2, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 6820.0, 'raw_cost': 6820.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.6, - 'battery_gain': 3}, - {'id': '12_phase=6', 'cost': 7202.0, 'gain': 14.5, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 7202.0, 'raw_cost': 7202.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.915}, - {'id': '13_phase=6', 'cost': 6495.0, 'gain': 14.5, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 6495.0, 'raw_cost': 6495.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.92}, - {'id': '14_phase=6', 'cost': 7285.0, 'gain': 17.5, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 7285.0, 'raw_cost': 7285.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.92, - 'battery_gain': 3}, - {'id': '15_phase=6', 'cost': 7895.0, 'gain': 17.5, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 7895.0, 'raw_cost': 7895.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.92, - 'battery_gain': 3}, - {'id': '16_phase=6', 'cost': 5520.0, 'gain': 15.0, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 5520.0, 'raw_cost': 5520.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 4.0}, - {'id': '17_phase=6', 'cost': 6310.0, 'gain': 18.0, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 6310.0, 'raw_cost': 6310.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 4.0, - 'battery_gain': 3}, - {'id': '18_phase=6', 'cost': 6920.0, 'gain': 18.0, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 6920.0, 'raw_cost': 6920.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 4.0, - 'battery_gain': 3}, - {'id': '19_phase=6', 'cost': 5320.0, 'gain': 12.1, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 5320.0, 'raw_cost': 5320.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.2}, - {'id': '20_phase=6', 'cost': 6110.0, 'gain': 14.1, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 6110.0, 'raw_cost': 6110.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.2, - 'battery_gain': 2}, - {'id': '21_phase=6', 'cost': 6720.0, 'gain': 14.1, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 6720.0, 'raw_cost': 6720.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.2, - 'battery_gain': 2}, - {'id': '22_phase=6', 'cost': 6932.0, 'gain': 13.2, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 6932.0, 'raw_cost': 6932.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.48}, - {'id': '23_phase=6', 'cost': 6295.0, 'gain': 13.2, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 6295.0, 'raw_cost': 6295.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.48}, - {'id': '24_phase=6', 'cost': 7085.0, 'gain': 16.2, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 7085.0, 'raw_cost': 7085.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.48, - 'battery_gain': 3}, - {'id': '25_phase=6', 'cost': 7695.0, 'gain': 16.2, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 7695.0, 'raw_cost': 7695.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.48, - 'battery_gain': 3}, - {'id': '26_phase=6', 'cost': 5220.0, 'gain': 10.2, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 5220.0, 'raw_cost': 5220.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.8}, - {'id': '27_phase=6', 'cost': 6662.0, 'gain': 12.3, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 6662.0, 'raw_cost': 6662.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.045}, - {'id': '28_phase=6', 'cost': 6095.0, 'gain': 12.3, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 6095.0, 'raw_cost': 6095.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.05}, - {'id': '29_phase=6', 'cost': 5160.0, 'gain': 9.0, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 5160.0, 'raw_cost': 5160.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.4}, - {'id': '30_phase=6', 'cost': 6392.0, 'gain': 10.2, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 6392.0, 'raw_cost': 6392.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.61}, - {'id': '31_phase=6', 'cost': 5910.0, 'gain': 10.2, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 5910.0, 'raw_cost': 5910.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.61}, - {'id': '32_phase=6', 'cost': 5100.0, 'gain': 8.0, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 5100.0, 'raw_cost': 5100.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.0}, - {'id': '33_phase=6', 'cost': 6098.0, 'gain': 8.0, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 6098.0, 'raw_cost': 6098.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.175}, - {'id': '34_phase=6', 'cost': 5725.0, 'gain': 8.0, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 5725.0, 'raw_cost': 5725.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.18}, - {'id': '35_phase=6', 'cost': 5040.0, 'gain': 6.0, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 5040.0, 'raw_cost': 5040.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 1.6}, - {'id': '36_phase=6', 'cost': 5828.0, 'gain': 7.0, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 5828.0, 'raw_cost': 5828.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 1.74}, - {'id': '37_phase=6', 'cost': 5540.0, 'gain': 7.0, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 5540.0, 'raw_cost': 5540.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 1.74} - ] - ] + def test_budget_and_target_gain_expecting_case_1_solve_max_gain_under_budget_strategy(self, components): budget = 4000 target_gain = 11.5 @@ -582,134 +460,7 @@ class TestStrategicOptimiser: assert opt.solution_cost == 1477.0680000000002 assert opt.solution_gain == 10.8 - def test_just_gain(self): - components = [ - [ - {'id': '0_phase=0', 'cost': 819.0, 'gain': 5.6, 'type': 'loft_insulation', 'innovation_uplift': 0, - 'cost_minus_uplift': 819.0, 'raw_cost': 819.0, 'partial_project_funding': 0, - 'partial_project_score': 0, 'uplift_project_score': 0, 'already_installed': False, - 'has_battery': False, 'array_size': 0}, - {'id': '1_phase=0', 'cost': 702.0, 'gain': 5.6, 'type': 'loft_insulation', 'innovation_uplift': 0, - 'cost_minus_uplift': 702.0, 'raw_cost': 702.0, 'partial_project_funding': 0, - 'partial_project_score': 0, 'uplift_project_score': 0, 'already_installed': False, - 'has_battery': False, 'array_size': 0}, - {'id': '2_phase=0', 'cost': 585.0, 'gain': 5.6, 'type': 'loft_insulation', 'innovation_uplift': 0, - 'cost_minus_uplift': 585.0, 'raw_cost': 585.0, 'partial_project_funding': 0, - 'partial_project_score': 0, 'uplift_project_score': 0, 'already_installed': False, - 'has_battery': False, 'array_size': 0}], - [{'id': '4_phase=2', 'cost': 3656.25, 'gain': 2.0, 'type': 'suspended_floor_insulation', - 'innovation_uplift': 0, 'cost_minus_uplift': 3656.25, 'raw_cost': 3656.25, 'partial_project_funding': 0, - 'partial_project_score': 0, 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, - 'array_size': 0}], - [{'id': '5_phase=3', 'cost': 17.5, 'gain': 1.0, 'type': 'low_energy_lighting', 'innovation_uplift': 0, - 'cost_minus_uplift': 17.5, 'raw_cost': 17.5, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 0}], - [{'id': '6_phase=4', 'cost': 140, 'gain': 3.4, 'type': 'roomstat_programmer_trvs', 'innovation_uplift': 0, - 'cost_minus_uplift': 140, 'raw_cost': 140, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 0}, - {'id': '7_phase=4', 'cost': 874.5680000000001, 'gain': 4.2, 'type': 'time_temperature_zone_control', - 'innovation_uplift': 0, 'cost_minus_uplift': 874.5680000000001, 'raw_cost': 874.5680000000001, - 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, - 'already_installed': False, 'has_battery': False, 'array_size': 0}], - [{'id': '9_phase=6', 'cost': 5420.0, 'gain': 13.2, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 5420.0, 'raw_cost': 5420.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.6}, - {'id': '10_phase=6', 'cost': 6210.0, 'gain': 16.2, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 6210.0, 'raw_cost': 6210.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.6, - 'battery_gain': 3}, - {'id': '11_phase=6', 'cost': 6820.0, 'gain': 16.2, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 6820.0, 'raw_cost': 6820.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.6, - 'battery_gain': 3}, - {'id': '12_phase=6', 'cost': 7202.0, 'gain': 14.5, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 7202.0, 'raw_cost': 7202.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.915}, - {'id': '13_phase=6', 'cost': 6495.0, 'gain': 14.5, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 6495.0, 'raw_cost': 6495.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.92}, - {'id': '14_phase=6', 'cost': 7285.0, 'gain': 17.5, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 7285.0, 'raw_cost': 7285.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.92, - 'battery_gain': 3}, - {'id': '15_phase=6', 'cost': 7895.0, 'gain': 17.5, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 7895.0, 'raw_cost': 7895.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.92, - 'battery_gain': 3}, - {'id': '16_phase=6', 'cost': 5520.0, 'gain': 15.0, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 5520.0, 'raw_cost': 5520.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 4.0}, - {'id': '17_phase=6', 'cost': 6310.0, 'gain': 18.0, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 6310.0, 'raw_cost': 6310.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 4.0, - 'battery_gain': 3}, - {'id': '18_phase=6', 'cost': 6920.0, 'gain': 18.0, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 6920.0, 'raw_cost': 6920.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 4.0, - 'battery_gain': 3}, - {'id': '19_phase=6', 'cost': 5320.0, 'gain': 12.1, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 5320.0, 'raw_cost': 5320.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.2}, - {'id': '20_phase=6', 'cost': 6110.0, 'gain': 14.1, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 6110.0, 'raw_cost': 6110.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.2, - 'battery_gain': 2}, - {'id': '21_phase=6', 'cost': 6720.0, 'gain': 14.1, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 6720.0, 'raw_cost': 6720.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.2, - 'battery_gain': 2}, - {'id': '22_phase=6', 'cost': 6932.0, 'gain': 13.2, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 6932.0, 'raw_cost': 6932.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.48}, - {'id': '23_phase=6', 'cost': 6295.0, 'gain': 13.2, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 6295.0, 'raw_cost': 6295.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.48}, - {'id': '24_phase=6', 'cost': 7085.0, 'gain': 16.2, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 7085.0, 'raw_cost': 7085.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.48, - 'battery_gain': 3}, - {'id': '25_phase=6', 'cost': 7695.0, 'gain': 16.2, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 7695.0, 'raw_cost': 7695.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.48, - 'battery_gain': 3}, - {'id': '26_phase=6', 'cost': 5220.0, 'gain': 10.2, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 5220.0, 'raw_cost': 5220.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.8}, - {'id': '27_phase=6', 'cost': 6662.0, 'gain': 12.3, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 6662.0, 'raw_cost': 6662.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.045}, - {'id': '28_phase=6', 'cost': 6095.0, 'gain': 12.3, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 6095.0, 'raw_cost': 6095.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.05}, - {'id': '29_phase=6', 'cost': 5160.0, 'gain': 9.0, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 5160.0, 'raw_cost': 5160.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.4}, - {'id': '30_phase=6', 'cost': 6392.0, 'gain': 10.2, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 6392.0, 'raw_cost': 6392.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.61}, - {'id': '31_phase=6', 'cost': 5910.0, 'gain': 10.2, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 5910.0, 'raw_cost': 5910.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.61}, - {'id': '32_phase=6', 'cost': 5100.0, 'gain': 8.0, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 5100.0, 'raw_cost': 5100.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.0}, - {'id': '33_phase=6', 'cost': 6098.0, 'gain': 8.0, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 6098.0, 'raw_cost': 6098.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.175}, - {'id': '34_phase=6', 'cost': 5725.0, 'gain': 8.0, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 5725.0, 'raw_cost': 5725.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.18}, - {'id': '35_phase=6', 'cost': 5040.0, 'gain': 6.0, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 5040.0, 'raw_cost': 5040.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 1.6}, - {'id': '36_phase=6', 'cost': 5828.0, 'gain': 7.0, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 5828.0, 'raw_cost': 5828.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 1.74}, - {'id': '37_phase=6', 'cost': 5540.0, 'gain': 7.0, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 5540.0, 'raw_cost': 5540.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 1.74} - ] - ] + def test_just_gain_expecting_case_3_solve_min_cost_for_target_strategy(self, components): budget = None target_gain = 11.5 @@ -726,134 +477,7 @@ class TestStrategicOptimiser: assert opt.solution_cost == 4398.75 assert opt.solution_gain == 12 - def test_just_gain2(self): - components = [ - [ - {'id': '0_phase=0', 'cost': 819.0, 'gain': 5.6, 'type': 'loft_insulation', 'innovation_uplift': 0, - 'cost_minus_uplift': 819.0, 'raw_cost': 819.0, 'partial_project_funding': 0, - 'partial_project_score': 0, 'uplift_project_score': 0, 'already_installed': False, - 'has_battery': False, 'array_size': 0}, - {'id': '1_phase=0', 'cost': 702.0, 'gain': 5.6, 'type': 'loft_insulation', 'innovation_uplift': 0, - 'cost_minus_uplift': 702.0, 'raw_cost': 702.0, 'partial_project_funding': 0, - 'partial_project_score': 0, 'uplift_project_score': 0, 'already_installed': False, - 'has_battery': False, 'array_size': 0}, - {'id': '2_phase=0', 'cost': 585.0, 'gain': 5.6, 'type': 'loft_insulation', 'innovation_uplift': 0, - 'cost_minus_uplift': 585.0, 'raw_cost': 585.0, 'partial_project_funding': 0, - 'partial_project_score': 0, 'uplift_project_score': 0, 'already_installed': False, - 'has_battery': False, 'array_size': 0}], - [{'id': '4_phase=2', 'cost': 3656.25, 'gain': 2.0, 'type': 'suspended_floor_insulation', - 'innovation_uplift': 0, 'cost_minus_uplift': 3656.25, 'raw_cost': 3656.25, 'partial_project_funding': 0, - 'partial_project_score': 0, 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, - 'array_size': 0}], - [{'id': '5_phase=3', 'cost': 17.5, 'gain': 1.0, 'type': 'low_energy_lighting', 'innovation_uplift': 0, - 'cost_minus_uplift': 17.5, 'raw_cost': 17.5, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 0}], - [{'id': '6_phase=4', 'cost': 140, 'gain': 3.4, 'type': 'roomstat_programmer_trvs', 'innovation_uplift': 0, - 'cost_minus_uplift': 140, 'raw_cost': 140, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 0}, - {'id': '7_phase=4', 'cost': 874.5680000000001, 'gain': 4.2, 'type': 'time_temperature_zone_control', - 'innovation_uplift': 0, 'cost_minus_uplift': 874.5680000000001, 'raw_cost': 874.5680000000001, - 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, - 'already_installed': False, 'has_battery': False, 'array_size': 0}], - [{'id': '9_phase=6', 'cost': 5420.0, 'gain': 13.2, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 5420.0, 'raw_cost': 5420.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.6}, - {'id': '10_phase=6', 'cost': 6210.0, 'gain': 16.2, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 6210.0, 'raw_cost': 6210.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.6, - 'battery_gain': 3}, - {'id': '11_phase=6', 'cost': 6820.0, 'gain': 16.2, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 6820.0, 'raw_cost': 6820.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.6, - 'battery_gain': 3}, - {'id': '12_phase=6', 'cost': 7202.0, 'gain': 14.5, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 7202.0, 'raw_cost': 7202.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.915}, - {'id': '13_phase=6', 'cost': 6495.0, 'gain': 14.5, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 6495.0, 'raw_cost': 6495.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.92}, - {'id': '14_phase=6', 'cost': 7285.0, 'gain': 17.5, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 7285.0, 'raw_cost': 7285.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.92, - 'battery_gain': 3}, - {'id': '15_phase=6', 'cost': 7895.0, 'gain': 17.5, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 7895.0, 'raw_cost': 7895.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.92, - 'battery_gain': 3}, - {'id': '16_phase=6', 'cost': 5520.0, 'gain': 15.0, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 5520.0, 'raw_cost': 5520.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 4.0}, - {'id': '17_phase=6', 'cost': 6310.0, 'gain': 18.0, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 6310.0, 'raw_cost': 6310.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 4.0, - 'battery_gain': 3}, - {'id': '18_phase=6', 'cost': 6920.0, 'gain': 18.0, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 6920.0, 'raw_cost': 6920.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 4.0, - 'battery_gain': 3}, - {'id': '19_phase=6', 'cost': 5320.0, 'gain': 12.1, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 5320.0, 'raw_cost': 5320.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.2}, - {'id': '20_phase=6', 'cost': 6110.0, 'gain': 14.1, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 6110.0, 'raw_cost': 6110.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.2, - 'battery_gain': 2}, - {'id': '21_phase=6', 'cost': 6720.0, 'gain': 14.1, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 6720.0, 'raw_cost': 6720.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.2, - 'battery_gain': 2}, - {'id': '22_phase=6', 'cost': 6932.0, 'gain': 13.2, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 6932.0, 'raw_cost': 6932.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.48}, - {'id': '23_phase=6', 'cost': 6295.0, 'gain': 13.2, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 6295.0, 'raw_cost': 6295.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.48}, - {'id': '24_phase=6', 'cost': 7085.0, 'gain': 16.2, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 7085.0, 'raw_cost': 7085.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.48, - 'battery_gain': 3}, - {'id': '25_phase=6', 'cost': 7695.0, 'gain': 16.2, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 7695.0, 'raw_cost': 7695.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.48, - 'battery_gain': 3}, - {'id': '26_phase=6', 'cost': 5220.0, 'gain': 10.2, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 5220.0, 'raw_cost': 5220.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.8}, - {'id': '27_phase=6', 'cost': 6662.0, 'gain': 12.3, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 6662.0, 'raw_cost': 6662.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.045}, - {'id': '28_phase=6', 'cost': 6095.0, 'gain': 12.3, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 6095.0, 'raw_cost': 6095.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.05}, - {'id': '29_phase=6', 'cost': 5160.0, 'gain': 9.0, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 5160.0, 'raw_cost': 5160.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.4}, - {'id': '30_phase=6', 'cost': 6392.0, 'gain': 10.2, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 6392.0, 'raw_cost': 6392.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.61}, - {'id': '31_phase=6', 'cost': 5910.0, 'gain': 10.2, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 5910.0, 'raw_cost': 5910.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.61}, - {'id': '32_phase=6', 'cost': 5100.0, 'gain': 8.0, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 5100.0, 'raw_cost': 5100.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.0}, - {'id': '33_phase=6', 'cost': 6098.0, 'gain': 8.0, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 6098.0, 'raw_cost': 6098.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.175}, - {'id': '34_phase=6', 'cost': 5725.0, 'gain': 8.0, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 5725.0, 'raw_cost': 5725.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.18}, - {'id': '35_phase=6', 'cost': 5040.0, 'gain': 6.0, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 5040.0, 'raw_cost': 5040.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 1.6}, - {'id': '36_phase=6', 'cost': 5828.0, 'gain': 7.0, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 5828.0, 'raw_cost': 5828.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 1.74}, - {'id': '37_phase=6', 'cost': 5540.0, 'gain': 7.0, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 5540.0, 'raw_cost': 5540.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 1.74} - ] - ] + def test_just_gain_of_20_expecting_case_3_solve_min_cost_for_target_strategy(self, components): budget = None target_gain = 20 @@ -870,134 +494,7 @@ class TestStrategicOptimiser: assert opt.solution_cost == 5962.5 assert opt.solution_gain == 20.2 - def test_just_budget(self): - components = [ - [ - {'id': '0_phase=0', 'cost': 819.0, 'gain': 5.6, 'type': 'loft_insulation', 'innovation_uplift': 0, - 'cost_minus_uplift': 819.0, 'raw_cost': 819.0, 'partial_project_funding': 0, - 'partial_project_score': 0, 'uplift_project_score': 0, 'already_installed': False, - 'has_battery': False, 'array_size': 0}, - {'id': '1_phase=0', 'cost': 702.0, 'gain': 5.6, 'type': 'loft_insulation', 'innovation_uplift': 0, - 'cost_minus_uplift': 702.0, 'raw_cost': 702.0, 'partial_project_funding': 0, - 'partial_project_score': 0, 'uplift_project_score': 0, 'already_installed': False, - 'has_battery': False, 'array_size': 0}, - {'id': '2_phase=0', 'cost': 585.0, 'gain': 5.6, 'type': 'loft_insulation', 'innovation_uplift': 0, - 'cost_minus_uplift': 585.0, 'raw_cost': 585.0, 'partial_project_funding': 0, - 'partial_project_score': 0, 'uplift_project_score': 0, 'already_installed': False, - 'has_battery': False, 'array_size': 0}], - [{'id': '4_phase=2', 'cost': 3656.25, 'gain': 2.0, 'type': 'suspended_floor_insulation', - 'innovation_uplift': 0, 'cost_minus_uplift': 3656.25, 'raw_cost': 3656.25, 'partial_project_funding': 0, - 'partial_project_score': 0, 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, - 'array_size': 0}], - [{'id': '5_phase=3', 'cost': 17.5, 'gain': 1.0, 'type': 'low_energy_lighting', 'innovation_uplift': 0, - 'cost_minus_uplift': 17.5, 'raw_cost': 17.5, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 0}], - [{'id': '6_phase=4', 'cost': 140, 'gain': 3.4, 'type': 'roomstat_programmer_trvs', 'innovation_uplift': 0, - 'cost_minus_uplift': 140, 'raw_cost': 140, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 0}, - {'id': '7_phase=4', 'cost': 874.5680000000001, 'gain': 4.2, 'type': 'time_temperature_zone_control', - 'innovation_uplift': 0, 'cost_minus_uplift': 874.5680000000001, 'raw_cost': 874.5680000000001, - 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, - 'already_installed': False, 'has_battery': False, 'array_size': 0}], - [{'id': '9_phase=6', 'cost': 5420.0, 'gain': 13.2, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 5420.0, 'raw_cost': 5420.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.6}, - {'id': '10_phase=6', 'cost': 6210.0, 'gain': 16.2, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 6210.0, 'raw_cost': 6210.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.6, - 'battery_gain': 3}, - {'id': '11_phase=6', 'cost': 6820.0, 'gain': 16.2, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 6820.0, 'raw_cost': 6820.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.6, - 'battery_gain': 3}, - {'id': '12_phase=6', 'cost': 7202.0, 'gain': 14.5, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 7202.0, 'raw_cost': 7202.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.915}, - {'id': '13_phase=6', 'cost': 6495.0, 'gain': 14.5, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 6495.0, 'raw_cost': 6495.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.92}, - {'id': '14_phase=6', 'cost': 7285.0, 'gain': 17.5, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 7285.0, 'raw_cost': 7285.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.92, - 'battery_gain': 3}, - {'id': '15_phase=6', 'cost': 7895.0, 'gain': 17.5, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 7895.0, 'raw_cost': 7895.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.92, - 'battery_gain': 3}, - {'id': '16_phase=6', 'cost': 5520.0, 'gain': 15.0, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 5520.0, 'raw_cost': 5520.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 4.0}, - {'id': '17_phase=6', 'cost': 6310.0, 'gain': 18.0, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 6310.0, 'raw_cost': 6310.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 4.0, - 'battery_gain': 3}, - {'id': '18_phase=6', 'cost': 6920.0, 'gain': 18.0, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 6920.0, 'raw_cost': 6920.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 4.0, - 'battery_gain': 3}, - {'id': '19_phase=6', 'cost': 5320.0, 'gain': 12.1, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 5320.0, 'raw_cost': 5320.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.2}, - {'id': '20_phase=6', 'cost': 6110.0, 'gain': 14.1, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 6110.0, 'raw_cost': 6110.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.2, - 'battery_gain': 2}, - {'id': '21_phase=6', 'cost': 6720.0, 'gain': 14.1, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 6720.0, 'raw_cost': 6720.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.2, - 'battery_gain': 2}, - {'id': '22_phase=6', 'cost': 6932.0, 'gain': 13.2, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 6932.0, 'raw_cost': 6932.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.48}, - {'id': '23_phase=6', 'cost': 6295.0, 'gain': 13.2, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 6295.0, 'raw_cost': 6295.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.48}, - {'id': '24_phase=6', 'cost': 7085.0, 'gain': 16.2, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 7085.0, 'raw_cost': 7085.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.48, - 'battery_gain': 3}, - {'id': '25_phase=6', 'cost': 7695.0, 'gain': 16.2, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 7695.0, 'raw_cost': 7695.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.48, - 'battery_gain': 3}, - {'id': '26_phase=6', 'cost': 5220.0, 'gain': 10.2, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 5220.0, 'raw_cost': 5220.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.8}, - {'id': '27_phase=6', 'cost': 6662.0, 'gain': 12.3, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 6662.0, 'raw_cost': 6662.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.045}, - {'id': '28_phase=6', 'cost': 6095.0, 'gain': 12.3, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 6095.0, 'raw_cost': 6095.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.05}, - {'id': '29_phase=6', 'cost': 5160.0, 'gain': 9.0, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 5160.0, 'raw_cost': 5160.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.4}, - {'id': '30_phase=6', 'cost': 6392.0, 'gain': 10.2, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 6392.0, 'raw_cost': 6392.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.61}, - {'id': '31_phase=6', 'cost': 5910.0, 'gain': 10.2, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 5910.0, 'raw_cost': 5910.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.61}, - {'id': '32_phase=6', 'cost': 5100.0, 'gain': 8.0, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 5100.0, 'raw_cost': 5100.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.0}, - {'id': '33_phase=6', 'cost': 6098.0, 'gain': 8.0, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 6098.0, 'raw_cost': 6098.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.175}, - {'id': '34_phase=6', 'cost': 5725.0, 'gain': 8.0, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 5725.0, 'raw_cost': 5725.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.18}, - {'id': '35_phase=6', 'cost': 5040.0, 'gain': 6.0, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 5040.0, 'raw_cost': 5040.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 1.6}, - {'id': '36_phase=6', 'cost': 5828.0, 'gain': 7.0, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 5828.0, 'raw_cost': 5828.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 1.74}, - {'id': '37_phase=6', 'cost': 5540.0, 'gain': 7.0, 'type': 'solar_pv', 'innovation_uplift': 0, - 'cost_minus_uplift': 5540.0, 'raw_cost': 5540.0, 'partial_project_funding': 0, 'partial_project_score': 0, - 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 1.74} - ] - ] + def test_just_budget_expecting_case_2_solve_max_gain_under_budget_strategy(self, components): budget = 10000 target_gain = None @@ -1009,7 +506,7 @@ class TestStrategicOptimiser: opt.solve() - # Should be case 3 - minimise cost for target gain + # Should be case 2 - minimise cost for target gain assert opt.strategy_used.value == "case_2_solve_max_gain_under_budget" assert opt.solution_cost == 7787.068 assert opt.solution_gain == 28.8 From b715a60d4a8f6e21aef7043385c44d805c810f2e Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 19 Feb 2026 09:25:55 +0000 Subject: [PATCH 225/340] typo in log --- backend/categorisation/processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/categorisation/processor.py b/backend/categorisation/processor.py index ca58fb9d..e1c0c6ff 100644 --- a/backend/categorisation/processor.py +++ b/backend/categorisation/processor.py @@ -165,4 +165,4 @@ def _update_default_flags(plans: List[Plan], cheapest_plan: Plan) -> None: logger.info("Successfully updated Plan default values") else: - logger.info("All plan default values already correct. Not udpating") + logger.info("All plan default values already correct. Not updating") From dae74d2f8b9ec93b7859a2bff4207055d374d295 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 19 Feb 2026 12:18:22 +0000 Subject: [PATCH 226/340] Fix bug where default plans were not being unset if they weren't included in scenarios to be considered --- .../db/functions/recommendations_functions.py | 21 ++++++++- .../local_handler/invoke_local_lambda.py | 2 +- backend/categorisation/processor.py | 46 ++++++++++++++----- 3 files changed, 56 insertions(+), 13 deletions(-) diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py index 6f7dd41f..ff9b9dc8 100644 --- a/backend/app/db/functions/recommendations_functions.py +++ b/backend/app/db/functions/recommendations_functions.py @@ -639,6 +639,13 @@ def get_plans_by_scenario_ids(ids: List[int]) -> List[PlanModel]: return session_any.exec(stmt).scalars().all() +def get_plan_ids_by_scenario_ids(scenario_ids: List[int]) -> List[int]: + stmt = select(PlanModel.id).where(PlanModel.scenario_id.in_(scenario_ids)) + with db_read_session() as session: + session_any: Any = session # Typehint as Any to satisfy Pylance... + return session_any.exec(stmt).scalars().all() + + def get_scenarios_by_portfolio_id(portfolio_id: int) -> List[ScenarioModel]: stmt = select(ScenarioModel).where(ScenarioModel.portfolio_id == portfolio_id) with db_read_session() as session: @@ -657,7 +664,19 @@ def get_default_plan_ids_for_property(property_id: int) -> List[int]: # This should in reality always return exactly 1 ID, but there's currently # no database constraint to enforce that, so account for 0 or >1 stmt = select(PlanModel.id).where( - PlanModel.property_id == property_id and PlanModel.is_default + (PlanModel.property_id == property_id) & (PlanModel.is_default == True) + ) + with db_read_session() as session: + session_any: Any = session # Typehint as Any to satisfy Pylance... + return session_any.exec(stmt).scalars().all() + + +def get_default_scenario_ids_for_portfolio(portfolio_id: int) -> List[int]: + # This should in reality always return exactly 1 ID, but there's currently + # no database constraint to enforce that, so account for 0 or >1 + stmt = select(ScenarioModel.id).where( + (ScenarioModel.portfolio_id == portfolio_id) + & (ScenarioModel.is_default == True) ) with db_read_session() as session: session_any: Any = session # Typehint as Any to satisfy Pylance... diff --git a/backend/categorisation/local_handler/invoke_local_lambda.py b/backend/categorisation/local_handler/invoke_local_lambda.py index a53e0d8e..7d092d67 100644 --- a/backend/categorisation/local_handler/invoke_local_lambda.py +++ b/backend/categorisation/local_handler/invoke_local_lambda.py @@ -10,7 +10,7 @@ payload = { "body": json.dumps( { "portfolio_id": 556, - "scenarios_to_consider": [1040, 1041], + "scenarios_to_consider": [], "scenarios_priority_order": [], } ) diff --git a/backend/categorisation/processor.py b/backend/categorisation/processor.py index e1c0c6ff..f619d5fd 100644 --- a/backend/categorisation/processor.py +++ b/backend/categorisation/processor.py @@ -3,7 +3,8 @@ from typing import Dict, List, Optional from backend.app.db.functions.recommendations_functions import ( bulk_update_plans, - get_default_plan_ids_for_property, + get_default_scenario_ids_for_portfolio, + get_plan_ids_by_scenario_ids, get_plans_by_portfolio_id, get_plans_by_scenario_ids, get_scenarios_by_portfolio_id, @@ -24,6 +25,17 @@ def process_portfolio( ) -> None: logger.info(f"Processing portfolio {portfolio_id}") + if scenarios_to_consider: + if len(scenarios_to_consider) < 2: + raise ValueError( + "Cannot run auto categorisation for fewer than 2 scenarios" + ) + + if scenarios_to_consider is not None: + _unset_defaults_for_scenarios_not_being_considered( + portfolio_id, scenarios_to_consider + ) + plans: List[Plan] = _load_plans_for_portfolio(portfolio_id, scenarios_to_consider) plans_by_property: Dict[int, List[Plan]] = _group_plans_by_property(plans) @@ -37,11 +49,6 @@ def process_portfolio( property_plans, scenario_priority_order ) - # Unset existing default(s) in case they are outside the plans to consider - default_plan_ids: List[int] = get_default_plan_ids_for_property(property_id) - for id in default_plan_ids: - set_plan_and_scenario_default(id, False) - _update_default_flags( property_plans, cheapest_plan ) # TODO: we have already unset existing default(s), so this method can probably be a bit simpler now @@ -87,16 +94,33 @@ def choose_cheapest_relevant_plan( return cheapest_plans[0] +def _unset_defaults_for_scenarios_not_being_considered( + portfolio_id: int, scenarios_to_consider: List[int] +) -> None: + default_scenario_ids: List[int] = get_default_scenario_ids_for_portfolio( + portfolio_id + ) + scenarios_to_unset_default: List[int] = [] + + for id in default_scenario_ids: + if id not in scenarios_to_consider: + scenarios_to_unset_default.append(id) + + logger.info(f"Scenarios to unset defaults: {scenarios_to_unset_default}") + + if len(scenarios_to_unset_default) > 0: + plans_to_unset_default: List[int] = get_plan_ids_by_scenario_ids( + scenarios_to_unset_default + ) + for plan_id in plans_to_unset_default: + set_plan_and_scenario_default(plan_id, False) # TODO: do this in batch + + def _load_plans_for_portfolio( portfolio_id: int, scenarios_to_consider: Optional[List[int]] = None ) -> List[Plan]: if scenarios_to_consider: - if len(scenarios_to_consider) < 2: - raise ValueError( - "Cannot run auto categorisation for fewer than 2 scenarios" - ) - logger.info(f"Getting {len(scenarios_to_consider)} plans") plan_models: List[PlanModel] = get_plans_by_scenario_ids(scenarios_to_consider) From 9d12eef0e587537932a55fdb7726300380b4c60a Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 19 Feb 2026 12:22:19 +0000 Subject: [PATCH 227/340] Remove unused db functions --- .../db/functions/recommendations_functions.py | 25 ------------------- 1 file changed, 25 deletions(-) diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py index ff9b9dc8..7ffcf603 100644 --- a/backend/app/db/functions/recommendations_functions.py +++ b/backend/app/db/functions/recommendations_functions.py @@ -625,13 +625,6 @@ def get_plans_by_portfolio_id(portfolio_id: int) -> List[PlanModel]: return session_any.exec(stmt).scalars().all() -def get_plans_by_ids(ids: List[int]) -> List[PlanModel]: - stmt = select(PlanModel).where(PlanModel.id.in_(ids)) - with db_read_session() as session: - session_any: Any = session # Typehint as Any to satisfy Pylance... - return session_any.exec(stmt).scalars().all() - - def get_plans_by_scenario_ids(ids: List[int]) -> List[PlanModel]: stmt = select(PlanModel).where(PlanModel.scenario_id.in_(ids)) with db_read_session() as session: @@ -653,24 +646,6 @@ def get_scenarios_by_portfolio_id(portfolio_id: int) -> List[ScenarioModel]: return session_any.exec(stmt).scalars().all() -def get_scenario(scenario_id: int) -> Optional[ScenarioModel]: - stmt = select(ScenarioModel).where(ScenarioModel.id == scenario_id) - with db_read_session() as session: - session_any: Any = session # Typehint as Any to satisfy Pylance... - return session_any.exec(stmt).scalar_one_or_none() - - -def get_default_plan_ids_for_property(property_id: int) -> List[int]: - # This should in reality always return exactly 1 ID, but there's currently - # no database constraint to enforce that, so account for 0 or >1 - stmt = select(PlanModel.id).where( - (PlanModel.property_id == property_id) & (PlanModel.is_default == True) - ) - with db_read_session() as session: - session_any: Any = session # Typehint as Any to satisfy Pylance... - return session_any.exec(stmt).scalars().all() - - def get_default_scenario_ids_for_portfolio(portfolio_id: int) -> List[int]: # This should in reality always return exactly 1 ID, but there's currently # no database constraint to enforce that, so account for 0 or >1 From 63bef436d086f791c10acbdb79371d585d5016ee Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 19 Feb 2026 12:44:41 +0000 Subject: [PATCH 228/340] do batch update per portfolio not per property --- .../local_handler/invoke_local_lambda.py | 2 +- backend/categorisation/processor.py | 45 +++++++++++-------- 2 files changed, 28 insertions(+), 19 deletions(-) diff --git a/backend/categorisation/local_handler/invoke_local_lambda.py b/backend/categorisation/local_handler/invoke_local_lambda.py index 7d092d67..127d2575 100644 --- a/backend/categorisation/local_handler/invoke_local_lambda.py +++ b/backend/categorisation/local_handler/invoke_local_lambda.py @@ -10,7 +10,7 @@ payload = { "body": json.dumps( { "portfolio_id": 556, - "scenarios_to_consider": [], + "scenarios_to_consider": [1039, 1041], "scenarios_priority_order": [], } ) diff --git a/backend/categorisation/processor.py b/backend/categorisation/processor.py index f619d5fd..966ecbf5 100644 --- a/backend/categorisation/processor.py +++ b/backend/categorisation/processor.py @@ -1,5 +1,5 @@ from collections import defaultdict -from typing import Dict, List, Optional +from typing import Dict, List, Optional, Tuple from backend.app.db.functions.recommendations_functions import ( bulk_update_plans, @@ -40,6 +40,9 @@ def process_portfolio( plans_by_property: Dict[int, List[Plan]] = _group_plans_by_property(plans) + updated_plan_models: List[PlanModel] = [] + updated_scenario_models: List[ScenarioModel] = [] + for property_id, property_plans in plans_by_property.items(): if not property_plans: @@ -49,9 +52,16 @@ def process_portfolio( property_plans, scenario_priority_order ) - _update_default_flags( - property_plans, cheapest_plan - ) # TODO: we have already unset existing default(s), so this method can probably be a bit simpler now + updated_property_plan_models, updated_property_scenario_models = ( + _update_plan_and_scenario_objects(property_plans, cheapest_plan) + ) + + updated_plan_models.extend(updated_property_plan_models) + updated_scenario_models.extend(updated_property_scenario_models) + + if len(updated_plan_models) > 0: + bulk_update_plans(updated_plan_models, updated_scenario_models) + logger.info("Successfully updated Plan default values in database") def choose_cheapest_relevant_plan( @@ -106,7 +116,9 @@ def _unset_defaults_for_scenarios_not_being_considered( if id not in scenarios_to_consider: scenarios_to_unset_default.append(id) - logger.info(f"Scenarios to unset defaults: {scenarios_to_unset_default}") + logger.info( + f"Unsetting {scenarios_to_unset_default} as default scenario(s) as not included in provided list of scenarios to consider" + ) if len(scenarios_to_unset_default) > 0: plans_to_unset_default: List[int] = get_plan_ids_by_scenario_ids( @@ -164,7 +176,9 @@ def _group_plans_by_property(plans: List[Plan]) -> Dict[int, List[Plan]]: return grouped -def _update_default_flags(plans: List[Plan], cheapest_plan: Plan) -> None: +def _update_plan_and_scenario_objects( + plans: List[Plan], cheapest_plan: Plan +) -> Tuple[List[PlanModel], List[ScenarioModel]]: plans_to_update: List[Plan] = [] for plan in plans: @@ -176,17 +190,12 @@ def _update_default_flags(plans: List[Plan], cheapest_plan: Plan) -> None: plan.set_default(should_be_default) plans_to_update.append(plan) - if plans_to_update: - plan_models: List[PlanModel] = [] - scenario_models: List[ScenarioModel] = [] + plan_models: List[PlanModel] = [] + scenario_models: List[ScenarioModel] = [] - for plan in plans_to_update: - plan_model, scenario_model = plan.to_sqlalchemy() - plan_models.append(plan_model) - scenario_models.append(scenario_model) + for plan in plans_to_update: + plan_model, scenario_model = plan.to_sqlalchemy() + plan_models.append(plan_model) + scenario_models.append(scenario_model) - bulk_update_plans(plan_models, scenario_models) - logger.info("Successfully updated Plan default values") - - else: - logger.info("All plan default values already correct. Not updating") + return (plan_models, scenario_models) From 6dcb4d1d92ce12b9e157e8ad3ea494058391d5bf Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 19 Feb 2026 17:03:40 +0000 Subject: [PATCH 229/340] additional logging --- .../categorisation_trigger_request.py | 2 +- .../local_handler/invoke_local_lambda.py | 6 +++--- backend/categorisation/local_runner.py | 6 ++++-- backend/categorisation/processor.py | 20 ++++++++++--------- 4 files changed, 19 insertions(+), 15 deletions(-) diff --git a/backend/categorisation/categorisation_trigger_request.py b/backend/categorisation/categorisation_trigger_request.py index fbc2328b..44ac0ff1 100644 --- a/backend/categorisation/categorisation_trigger_request.py +++ b/backend/categorisation/categorisation_trigger_request.py @@ -9,4 +9,4 @@ class CategorisationTriggerRequest(BaseModel): scenario_priority_order: Optional[List[int]] = None -# {"portfolio_id": 556, "plans_to_consider": [1589319,1589320], "plan_priority_order": [1589319,1589320]} +# {"portfolio_id": 556, "scenarios_to_consider": [1039,1041], "scenario_priority_order": [1041,1039]} diff --git a/backend/categorisation/local_handler/invoke_local_lambda.py b/backend/categorisation/local_handler/invoke_local_lambda.py index 127d2575..1446a1e3 100644 --- a/backend/categorisation/local_handler/invoke_local_lambda.py +++ b/backend/categorisation/local_handler/invoke_local_lambda.py @@ -9,9 +9,9 @@ payload = { { "body": json.dumps( { - "portfolio_id": 556, - "scenarios_to_consider": [1039, 1041], - "scenarios_priority_order": [], + "portfolio_id": 569, + "scenarios_to_consider": [1069, 1060], + "scenario_priority_order": [1069, 1060], } ) } diff --git a/backend/categorisation/local_runner.py b/backend/categorisation/local_runner.py index 599cbbbb..f4718ffc 100644 --- a/backend/categorisation/local_runner.py +++ b/backend/categorisation/local_runner.py @@ -2,9 +2,11 @@ from backend.categorisation.processor import process_portfolio def main() -> None: - portfolio_id = 556 + portfolio_id = 569 + scenarios_to_consider = [1069, 1060] + scenario_priority_order = [1069, 1060] - process_portfolio(portfolio_id) + process_portfolio(portfolio_id, scenarios_to_consider, scenario_priority_order) if __name__ == "__main__": diff --git a/backend/categorisation/processor.py b/backend/categorisation/processor.py index 966ecbf5..5ed75d8f 100644 --- a/backend/categorisation/processor.py +++ b/backend/categorisation/processor.py @@ -37,8 +37,10 @@ def process_portfolio( ) plans: List[Plan] = _load_plans_for_portfolio(portfolio_id, scenarios_to_consider) + logger.info(f"Successfully loaded {len(plans)}") plans_by_property: Dict[int, List[Plan]] = _group_plans_by_property(plans) + logger.info("Successfully grouped plans by property") updated_plan_models: List[PlanModel] = [] updated_scenario_models: List[ScenarioModel] = [] @@ -51,6 +53,7 @@ def process_portfolio( cheapest_plan = choose_cheapest_relevant_plan( property_plans, scenario_priority_order ) + logger.info(f"Successfully found cheapest plan for Property {property_id}") updated_property_plan_models, updated_property_scenario_models = ( _update_plan_and_scenario_objects(property_plans, cheapest_plan) @@ -60,6 +63,7 @@ def process_portfolio( updated_scenario_models.extend(updated_property_scenario_models) if len(updated_plan_models) > 0: + logger.info(f"Updating {len(updated_plan_models)} Plans in database") bulk_update_plans(updated_plan_models, updated_scenario_models) logger.info("Successfully updated Plan default values in database") @@ -116,9 +120,10 @@ def _unset_defaults_for_scenarios_not_being_considered( if id not in scenarios_to_consider: scenarios_to_unset_default.append(id) - logger.info( - f"Unsetting {scenarios_to_unset_default} as default scenario(s) as not included in provided list of scenarios to consider" - ) + if len(scenarios_to_unset_default) > 0: + logger.info( + f"Unsetting {scenarios_to_unset_default} as default scenario(s) as not included in provided list of scenarios to consider" + ) if len(scenarios_to_unset_default) > 0: plans_to_unset_default: List[int] = get_plan_ids_by_scenario_ids( @@ -133,9 +138,9 @@ def _load_plans_for_portfolio( ) -> List[Plan]: if scenarios_to_consider: - logger.info(f"Getting {len(scenarios_to_consider)} plans") + logger.info(f"Getting plans for {len(scenarios_to_consider)} scenarios") plan_models: List[PlanModel] = get_plans_by_scenario_ids(scenarios_to_consider) - + logger.info(f"Got {len(plan_models)} plan models from database") else: logger.info( f"No list of Plans to consider provided. Getting all Plans for portfolio {portfolio_id}" @@ -159,11 +164,8 @@ def _load_plans_for_portfolio( plans.append( Plan.from_sqlalchemy(model, Scenario.from_sqlalchemy(scenario_model)) ) - logger.debug( - f"Successfully mapped plan {model.id} and scenario {scenario_model.id} to domain object" - ) - logger.debug(f"Got {len(plans)} plans from database") + logger.info(f"Got {len(plans)} Plans") return plans From a8d3ce599d3b5dd6ea4e092375506aa4355c7dc6 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Fri, 20 Feb 2026 10:11:31 +0000 Subject: [PATCH 230/340] =?UTF-8?q?handle=20all=20plans=20having=20zero=20?= =?UTF-8?q?cost=20=F0=9F=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../tests/test_prioritised_plan_selected.py | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/backend/categorisation/tests/test_prioritised_plan_selected.py b/backend/categorisation/tests/test_prioritised_plan_selected.py index 74eb8c69..7544eb9c 100644 --- a/backend/categorisation/tests/test_prioritised_plan_selected.py +++ b/backend/categorisation/tests/test_prioritised_plan_selected.py @@ -92,3 +92,24 @@ def test_cheapest_plan_returned_if_not_in_priority_list( # assert assert actual_default_plan.id == expected_default_plan_id + + +def test_all_plans_zero_cost__highest_priority_returned( + created_at_datetime: datetime, +) -> None: + # arrange + epc_c_plan = make_plan(created_at_datetime, True, cost_of_works=0.0, name="EPC C") + minor_works_plan = make_plan( + created_at_datetime, False, cost_of_works=0.0, name="EPC C - Minor Works" + ) + scenario_priority_order: List[int] = [4, 3] + expected_default_plan_id = 2 + + # act + actual_default_plan = choose_cheapest_relevant_plan( + plans=[epc_c_plan, minor_works_plan], + scenario_priority_order=scenario_priority_order, + ) + + # assert + assert actual_default_plan.id == expected_default_plan_id From 0e279b15cec06d80bee07e17af73cfc6151b444b Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Fri, 20 Feb 2026 10:36:22 +0000 Subject: [PATCH 231/340] =?UTF-8?q?handle=20all=20plans=20having=20zero=20?= =?UTF-8?q?cost=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/categorisation/processor.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/backend/categorisation/processor.py b/backend/categorisation/processor.py index 5ed75d8f..590d064f 100644 --- a/backend/categorisation/processor.py +++ b/backend/categorisation/processor.py @@ -94,11 +94,15 @@ def choose_cheapest_relevant_plan( for plan in eligible_plans ) - cheapest_plans: List[Plan] = [ - plan - for plan in eligible_plans - if (plan.record.cost_of_works or float("inf")) == min_cost - ] + if all(p.record.cost_of_works == 0 for p in eligible_plans): + cheapest_plans = eligible_plans + + else: + cheapest_plans: List[Plan] = [ + plan + for plan in eligible_plans + if (plan.record.cost_of_works or float("inf")) == min_cost + ] for priority_scenario_id in scenario_priority_order: for plan in cheapest_plans: From cb55338f39eafdbfb03009bbfffc094c883cb5ad Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Fri, 20 Feb 2026 10:38:29 +0000 Subject: [PATCH 232/340] =?UTF-8?q?handle=20all=20plans=20having=20null=20?= =?UTF-8?q?cost=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../tests/test_prioritised_plan_selected.py | 30 +++++++++++++++++-- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/backend/categorisation/tests/test_prioritised_plan_selected.py b/backend/categorisation/tests/test_prioritised_plan_selected.py index 7544eb9c..e2af6a63 100644 --- a/backend/categorisation/tests/test_prioritised_plan_selected.py +++ b/backend/categorisation/tests/test_prioritised_plan_selected.py @@ -1,5 +1,5 @@ from datetime import datetime -from typing import List +from typing import List, Optional import pytest from backend.app.domain.classes.plan import Plan @@ -16,7 +16,7 @@ def created_at_datetime() -> datetime: def make_plan_record( - created_at: datetime, default: bool, cost_of_works: float = 500.0 + created_at: datetime, default: bool, cost_of_works: Optional[float] = 500.0 ) -> PlanRecord: return PlanRecord( property_id=1, @@ -43,7 +43,10 @@ def make_scenario(name: str, created_at: datetime, is_default: bool) -> Scenario def make_plan( - created_at: datetime, default: bool, cost_of_works: float = 500.0, name: str = "" + created_at: datetime, + default: bool, + cost_of_works: Optional[float] = 500.0, + name: str = "", ) -> Plan: scenario = make_scenario(name, created_at, default) plan_id = 1 if default else 2 @@ -113,3 +116,24 @@ def test_all_plans_zero_cost__highest_priority_returned( # assert assert actual_default_plan.id == expected_default_plan_id + + +def test_all_plans_null_cost__highest_priority_returned( + created_at_datetime: datetime, +) -> None: + # arrange + epc_c_plan = make_plan(created_at_datetime, True, cost_of_works=None, name="EPC C") + minor_works_plan = make_plan( + created_at_datetime, False, cost_of_works=None, name="EPC C - Minor Works" + ) + scenario_priority_order: List[int] = [4, 3] + expected_default_plan_id = 2 + + # act + actual_default_plan = choose_cheapest_relevant_plan( + plans=[epc_c_plan, minor_works_plan], + scenario_priority_order=scenario_priority_order, + ) + + # assert + assert actual_default_plan.id == expected_default_plan_id From 47de308bf353207990a31657deed2303b33541d8 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Fri, 20 Feb 2026 10:39:16 +0000 Subject: [PATCH 233/340] reformatting --- backend/categorisation/processor.py | 1 - 1 file changed, 1 deletion(-) diff --git a/backend/categorisation/processor.py b/backend/categorisation/processor.py index 590d064f..e5d69dcf 100644 --- a/backend/categorisation/processor.py +++ b/backend/categorisation/processor.py @@ -96,7 +96,6 @@ def choose_cheapest_relevant_plan( if all(p.record.cost_of_works == 0 for p in eligible_plans): cheapest_plans = eligible_plans - else: cheapest_plans: List[Plan] = [ plan From 481bd1197afcd729a437fc0ec5795f0479885c21 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Fri, 20 Feb 2026 10:40:33 +0000 Subject: [PATCH 234/340] put test portfolio back into invoke lambda script --- backend/categorisation/local_handler/invoke_local_lambda.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/backend/categorisation/local_handler/invoke_local_lambda.py b/backend/categorisation/local_handler/invoke_local_lambda.py index 1446a1e3..5ed23c2d 100644 --- a/backend/categorisation/local_handler/invoke_local_lambda.py +++ b/backend/categorisation/local_handler/invoke_local_lambda.py @@ -9,9 +9,9 @@ payload = { { "body": json.dumps( { - "portfolio_id": 569, - "scenarios_to_consider": [1069, 1060], - "scenario_priority_order": [1069, 1060], + "portfolio_id": 556, + "scenarios_to_consider": [], + "scenario_priority_order": [], } ) } From b4583d3c8b8890879c1b68eb3b1c01c6717a2f30 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Fri, 20 Feb 2026 10:42:03 +0000 Subject: [PATCH 235/340] put test portfolio back in local runner --- backend/categorisation/local_runner.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/backend/categorisation/local_runner.py b/backend/categorisation/local_runner.py index f4718ffc..7de55bc0 100644 --- a/backend/categorisation/local_runner.py +++ b/backend/categorisation/local_runner.py @@ -1,12 +1,18 @@ +from typing import List + from backend.categorisation.processor import process_portfolio def main() -> None: - portfolio_id = 569 - scenarios_to_consider = [1069, 1060] - scenario_priority_order = [1069, 1060] + portfolio_id = 556 + scenarios_to_consider: List[int] = [] + scenario_priority_order: List[int] = [] - process_portfolio(portfolio_id, scenarios_to_consider, scenario_priority_order) + process_portfolio( + portfolio_id=portfolio_id, + scenarios_to_consider=scenarios_to_consider, + scenario_priority_order=scenario_priority_order, + ) if __name__ == "__main__": From 678de56def8a32d9be9fd7a35188b438feb602ce Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Fri, 20 Feb 2026 11:24:17 +0000 Subject: [PATCH 236/340] =?UTF-8?q?handle=20some=20plans=20having=20zero?= =?UTF-8?q?=20cost=20=F0=9F=9F=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../tests/test_prioritised_plan_selected.py | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/backend/categorisation/tests/test_prioritised_plan_selected.py b/backend/categorisation/tests/test_prioritised_plan_selected.py index e2af6a63..5ddc7b8f 100644 --- a/backend/categorisation/tests/test_prioritised_plan_selected.py +++ b/backend/categorisation/tests/test_prioritised_plan_selected.py @@ -118,6 +118,27 @@ def test_all_plans_zero_cost__highest_priority_returned( assert actual_default_plan.id == expected_default_plan_id +def test_some_plans_zero_cost__cheapest_returned( + created_at_datetime: datetime, +) -> None: + # arrange + epc_c_plan = make_plan(created_at_datetime, True, cost_of_works=0.0, name="EPC C") + minor_works_plan = make_plan( + created_at_datetime, False, cost_of_works=50.0, name="EPC C - Minor Works" + ) + scenario_priority_order: List[int] = [4, 3] + expected_default_plan_id = 2 + + # act + actual_default_plan = choose_cheapest_relevant_plan( + plans=[epc_c_plan, minor_works_plan], + scenario_priority_order=scenario_priority_order, + ) + + # assert + assert actual_default_plan.id == expected_default_plan_id + + def test_all_plans_null_cost__highest_priority_returned( created_at_datetime: datetime, ) -> None: From ce94fb0573f46a0e91df5b390ac1f373b51ab2a2 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Fri, 20 Feb 2026 11:35:12 +0000 Subject: [PATCH 237/340] =?UTF-8?q?handle=20some=20plans=20having=20zero?= =?UTF-8?q?=20cost=20=F0=9F=9F=A9?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/app/domain/classes/plan.py | 8 +++++ backend/categorisation/processor.py | 31 +++++++------------ .../tests/test_prioritised_plan_selected.py | 2 +- 3 files changed, 20 insertions(+), 21 deletions(-) diff --git a/backend/app/domain/classes/plan.py b/backend/app/domain/classes/plan.py index 7970abcd..351ea512 100644 --- a/backend/app/domain/classes/plan.py +++ b/backend/app/domain/classes/plan.py @@ -60,6 +60,14 @@ class Plan: case _: raise NotImplementedError + @property + def cost(self) -> float: + return ( + self.record.cost_of_works + if self.record.cost_of_works is not None + else float("inf") + ) + def to_sqlalchemy(self) -> PlanPersistence: scenario_record = self.scenario.record diff --git a/backend/categorisation/processor.py b/backend/categorisation/processor.py index e5d69dcf..e90c3b08 100644 --- a/backend/categorisation/processor.py +++ b/backend/categorisation/processor.py @@ -50,10 +50,13 @@ def process_portfolio( if not property_plans: raise ValueError(f"No plans for property {property_id}") - cheapest_plan = choose_cheapest_relevant_plan( - property_plans, scenario_priority_order - ) - logger.info(f"Successfully found cheapest plan for Property {property_id}") + try: + cheapest_plan = choose_cheapest_relevant_plan( + property_plans, scenario_priority_order + ) + except Exception: + logger.error(f"Failed to find cheapest plan for property {property_id}") + raise updated_property_plan_models, updated_property_scenario_models = ( _update_plan_and_scenario_objects(property_plans, cheapest_plan) @@ -85,23 +88,11 @@ def choose_cheapest_relevant_plan( f"All plans must have an ID, but found a plan with no ID: {plan}" ) - min_cost: float = min( - ( - plan.record.cost_of_works - if plan.record.cost_of_works is not None - else float("inf") - ) - for plan in eligible_plans - ) + min_cost: float = min(plan.cost for plan in eligible_plans) - if all(p.record.cost_of_works == 0 for p in eligible_plans): - cheapest_plans = eligible_plans - else: - cheapest_plans: List[Plan] = [ - plan - for plan in eligible_plans - if (plan.record.cost_of_works or float("inf")) == min_cost - ] + cheapest_plans: List[Plan] = [ + plan for plan in eligible_plans if plan.cost == min_cost + ] for priority_scenario_id in scenario_priority_order: for plan in cheapest_plans: diff --git a/backend/categorisation/tests/test_prioritised_plan_selected.py b/backend/categorisation/tests/test_prioritised_plan_selected.py index 5ddc7b8f..a9529a53 100644 --- a/backend/categorisation/tests/test_prioritised_plan_selected.py +++ b/backend/categorisation/tests/test_prioritised_plan_selected.py @@ -127,7 +127,7 @@ def test_some_plans_zero_cost__cheapest_returned( created_at_datetime, False, cost_of_works=50.0, name="EPC C - Minor Works" ) scenario_priority_order: List[int] = [4, 3] - expected_default_plan_id = 2 + expected_default_plan_id = 1 # act actual_default_plan = choose_cheapest_relevant_plan( From d51af4112599344df73effa02a5bebde1c717406 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 20 Feb 2026 12:12:43 +0000 Subject: [PATCH 238/340] tweaked inputs for standardised asset list --- .idea/Model.iml | 2 +- .idea/misc.xml | 2 +- asset_list/AssetList.py | 288 +++++++++++++------------ asset_list/app.py | 59 ++--- asset_list/mappings/built_form.py | 103 ++++++++- asset_list/mappings/heating_systems.py | 19 +- asset_list/mappings/property_type.py | 5 +- asset_list/mappings/roof.py | 4 +- asset_list/mappings/walls.py | 5 +- 9 files changed, 298 insertions(+), 189 deletions(-) diff --git a/.idea/Model.iml b/.idea/Model.iml index c6561970..09f2e496 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml index 50cad4ca..fb10c6b0 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py index 28e17e2a..5f354a27 100644 --- a/asset_list/AssetList.py +++ b/asset_list/AssetList.py @@ -36,14 +36,13 @@ from dotenv import load_dotenv logger = setup_logger() load_dotenv(dotenv_path="../backend/.env") - # OpenAI API Key (set this in your environment variables for security) -OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "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") +OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", + "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") class DataRemapper: def __init__(self, standard_values, standard_map=None, max_tokens=1000): - print(f"{OPENAI_API_KEY}") """ Initialize the remapper with standard values and a predefined mapping. @@ -1298,8 +1297,8 @@ class AssetList: self.standardised_asset_list[ self.ATTRIBUTE_HAS_SOLAR ] = self.standardised_asset_list[ - self.FIND_EPC_DATA_NAMES["Solar photovoltaics"] - ] | ~self.standardised_asset_list[ + self.FIND_EPC_DATA_NAMES["Solar photovoltaics"] + ] | ~self.standardised_asset_list[ self.EPC_API_DATA_NAMES["photo-supply"] ].isin( ["0.0", 0, None, "", np.nan] @@ -1317,7 +1316,7 @@ class AssetList: property_type=( str(x[self.STANDARD_PROPERTY_TYPE]).title() if str(x[self.STANDARD_PROPERTY_TYPE]).title() - in accepted_epc_property_types + in accepted_epc_property_types else ( x[self.EPC_API_DATA_NAMES["property-type"]] if not pd.isnull( @@ -1375,9 +1374,9 @@ class AssetList: self.standardised_asset_list.apply( lambda x: estimate_perimeter( floor_area=x[self.EPC_API_DATA_NAMES["total-floor-area"]] - / x[self.ATTRIBUTE_NUMBER_OF_FLOORS], + / x[self.ATTRIBUTE_NUMBER_OF_FLOORS], num_rooms=x[self.EPC_API_DATA_NAMES["number-habitable-rooms"]] - / x[self.ATTRIBUTE_NUMBER_OF_FLOORS], + / x[self.ATTRIBUTE_NUMBER_OF_FLOORS], ), axis=1, ) @@ -1462,7 +1461,7 @@ class AssetList: year_lower_bound = ( 2007 if x[self.EPC_API_DATA_NAMES["construction-age-band"]] - == "England and Wales: 2007 onwards" + == "England and Wales: 2007 onwards" else 2012 ) @@ -1517,7 +1516,7 @@ class AssetList: age_band_matches = ( "EPC Age Band Matches Year Built" if x[self.STANDARD_YEAR_BUILT] - == int(x[self.EPC_API_DATA_NAMES["construction-age-band"]]) + == int(x[self.EPC_API_DATA_NAMES["construction-age-band"]]) else "EPC Age Band is different from Year Built" ) @@ -1547,7 +1546,7 @@ class AssetList: age_band_matches = ( "EPC Age Band Matches Year Built" if (x[self.STANDARD_YEAR_BUILT] >= float(lower_date)) - and (x[self.STANDARD_YEAR_BUILT] <= float(upper_date)) + and (x[self.STANDARD_YEAR_BUILT] <= float(upper_date)) else ( "EPC Age Band is older than Year Built" if x[self.STANDARD_YEAR_BUILT] > float(upper_date) @@ -1719,22 +1718,22 @@ class AssetList: if self.non_intrusives_present: if self.new_format_non_insturives_present_v2: non_intrusives_wall_filter = ( - self.standardised_asset_list["non-intrusives: Construction"] - == "CAVITY" - ) & self.standardised_asset_list["non-intrusives: Insulated"].isin( + self.standardised_asset_list["non-intrusives: Construction"] + == "CAVITY" + ) & self.standardised_asset_list["non-intrusives: Insulated"].isin( ["EMPTY", "PARTIAL", "EMPTY CAVITY"] ) else: non_intrusives_wall_filter = ( - self.standardised_asset_list["non-intrusives: Construction"] - == "CAVITY" - ) & self.standardised_asset_list["non-intrusives: Insulated"].isin( + self.standardised_asset_list["non-intrusives: Construction"] + == "CAVITY" + ) & self.standardised_asset_list["non-intrusives: Insulated"].isin( ["EMPTY", "PARTIAL"] ) elif self.old_format_non_intrusives_present: non_intrusives_wall_filter = self.standardised_asset_list[ - "non-intrusives: WFT Findings" - ].str.lower().str.strip().isin( + "non-intrusives: WFT Findings" + ].str.lower().str.strip().isin( [ "empty cavity", "partial fill", @@ -1744,18 +1743,18 @@ class AssetList: "empty cav", ] ) | ( - ( - self.standardised_asset_list["non-intrusives: WFT Findings"] - .str.lower() - .str.strip() - .str.contains("empty cavity|partial fill") - & ~self.standardised_asset_list["non-intrusives: WFT Findings"] - .astype(str) - .str.lower() - .str.strip() - .str.contains("major access issues") - ) - ) + ( + self.standardised_asset_list["non-intrusives: WFT Findings"] + .str.lower() + .str.strip() + .str.contains("empty cavity|partial fill") + & ~self.standardised_asset_list["non-intrusives: WFT Findings"] + .astype(str) + .str.lower() + .str.strip() + .str.contains("major access issues") + ) + ) else: # We set the filter to False, as we have no non-intrusives non_intrusives_wall_filter = False @@ -1767,12 +1766,12 @@ class AssetList: ) else: year_built_filter = ( - self.standardised_asset_list[self.STANDARD_YEAR_BUILT] - <= self.EMPTY_CAVITY_YEAR_THRESHOLD - ) | ( - self.standardised_asset_list["epc_year_upper_bound"] - <= self.EMPTY_CAVITY_YEAR_THRESHOLD - ) + self.standardised_asset_list[self.STANDARD_YEAR_BUILT] + <= self.EMPTY_CAVITY_YEAR_THRESHOLD + ) | ( + self.standardised_asset_list["epc_year_upper_bound"] + <= self.EMPTY_CAVITY_YEAR_THRESHOLD + ) # Criteria: # The property isn't a bedsit @@ -1813,8 +1812,8 @@ class AssetList: ] = ( ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] & ~self.standardised_asset_list[ - "non_intrusive_indicates_empty_cavity_has_solar" - ] + "non_intrusive_indicates_empty_cavity_has_solar" + ] & ( ~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin( ["bedsit"] @@ -1890,8 +1889,8 @@ class AssetList: .str.lower() .isin(self.EPC_NO_WALL_INSULATION_DESCRIPTIONS) | self.standardised_asset_list[self.STANDARD_WALL_CONSTRUCTION].isin( - ["uninsulated cavity"] - ) + ["uninsulated cavity"] + ) ) ###################################################### @@ -1928,8 +1927,8 @@ class AssetList: extraction_wall_filter = ( extraction_wall_filter & ~self.standardised_asset_list[ - "non-intrusives: Eligibility (Red/Yellow/Green)" - ].isin(["RED"]) + "non-intrusives: Eligibility (Red/Yellow/Green)" + ].isin(["RED"]) ) self.standardised_asset_list[ @@ -2025,26 +2024,26 @@ class AssetList: self.standardised_asset_list[ "solar_epc_data_indicates_correct_heating_system" ] = ( - self.standardised_asset_list[ - self.EPC_API_DATA_NAMES["mainheat-description"] - ] - .str.lower() - .str.contains( - "air source heat pump|ground source heat pump|boiler and radiators, electric" - ) - ) | ( - self.standardised_asset_list[ - self.EPC_API_DATA_NAMES["mainheat-description"] - ] - .str.lower() - .str.contains("electric storage heaters") - & ( self.standardised_asset_list[ - self.EPC_API_DATA_NAMES["mainheatcont-description"] + self.EPC_API_DATA_NAMES["mainheat-description"] ] - == "Controls for high heat retention storage heaters" + .str.lower() + .str.contains( + "air source heat pump|ground source heat pump|boiler and radiators, electric" + ) + ) | ( + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["mainheat-description"] + ] + .str.lower() + .str.contains("electric storage heaters") + & ( + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["mainheatcont-description"] + ] + == "Controls for high heat retention storage heaters" + ) ) - ) # If the landlord has given us the heating system, we default to that on heating upgrades. Because of the # poor heating in place, if the EPC indicates that this property had a low efficiency heating system but the @@ -2052,25 +2051,25 @@ class AssetList: self.standardised_asset_list[ "solar_epc_data_indicates_requires_heating_upgrade" ] = ( - self.standardised_asset_list[ - self.EPC_API_DATA_NAMES["mainheat-description"] - ] - .str.lower() - .str.contains("electric storage heaters|room heaters") - & ( self.standardised_asset_list[ - self.EPC_API_DATA_NAMES["mainheatcont-description"] + self.EPC_API_DATA_NAMES["mainheat-description"] ] - != "Controls for high heat retention storage heaters" + .str.lower() + .str.contains("electric storage heaters|room heaters") + & ( + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["mainheatcont-description"] + ] + != "Controls for high heat retention storage heaters" + ) + ) & ( + ~self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM].isin( + ["district heating", "communal heating", "communal gas boiler"] + ) + & ~self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM] + .astype(str) + .str.contains("gas ") ) - ) & ( - ~self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM].isin( - ["district heating", "communal heating", "communal gas boiler"] - ) - & ~self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM] - .astype(str) - .str.contains("gas ") - ) # Basic check - both of the previous two shouldn't be true simultaneously if ( @@ -2150,8 +2149,8 @@ class AssetList: self.standardised_asset_list[ "solar_non_intrusives_walls_insulated" ] = self.standardised_asset_list[ - "non-intrusives: WFT Findings" - ].str.lower().str.strip().isin( + "non-intrusives: WFT Findings" + ].str.lower().str.strip().isin( [ "retro drilled", "retro filled", @@ -2160,8 +2159,8 @@ class AssetList: "retro drilled and filled", ] ) | self.standardised_asset_list[ - "non-intrusives: WFT Findings" - ].str.lower().str.strip().str.contains( + "non-intrusives: WFT Findings" + ].str.lower().str.strip().str.contains( "retro drilled" ) else: @@ -2178,14 +2177,19 @@ class AssetList: ) self.standardised_asset_list["solar_epc_walls_insulated"] = ( - self.standardised_asset_list[self.EPC_API_DATA_NAMES["walls-description"]] - .str.lower() - .str.contains("|".join(self.EPC_INSULATED_WALLS_SUBSTRINGS)) - ) | ( - self.standardised_asset_list["walls_u_value"].apply( - lambda x: x <= 0.7 if not pd.isnull(x) else False - ) - ) + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES[ + "walls-description"]] + .str.lower() + .str.contains("|".join( + self.EPC_INSULATED_WALLS_SUBSTRINGS)) + ) | ( + self.standardised_asset_list[ + "walls_u_value"].apply( + lambda x: x <= 0.7 if not pd.isnull( + x) else False + ) + ) roof_data = [] for desc in self.standardised_asset_list[ @@ -2227,20 +2231,20 @@ class AssetList: self.standardised_asset_list[ "solar_epc_loft_needs_topup" ] = self.standardised_asset_list[ - self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS - ].apply( + self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS + ].apply( lambda x: int(x) < 200 if str(x).isdigit() else False ) | ( - ( - self.standardised_asset_list["is_loft"] - | self.standardised_asset_list["is_pitched"] + ( + self.standardised_asset_list["is_loft"] + | self.standardised_asset_list["is_pitched"] + ) + & ( + self.standardised_asset_list[ + self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS + ].isin(["below average", "none"]) + ) ) - & ( - self.standardised_asset_list[ - self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS - ].isin(["below average", "none"]) - ) - ) self.standardised_asset_list["epc_has_floor_recommendation"] = ( self.standardised_asset_list["epc_has_floor_recommendation"].fillna(False) @@ -2249,15 +2253,16 @@ class AssetList: # Check if the boiler is electric # We check if it contains both the terms boiler & electric self.standardised_asset_list["has_electric_boiler"] = ( - self.standardised_asset_list[ - self.EPC_API_DATA_NAMES["mainheat-description"] - ] - .str.lower() - .isin(["boiler and radiators, electric"]) - ) | ( - self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM] - == "electric boiler" - ) + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["mainheat-description"] + ] + .str.lower() + .isin(["boiler and radiators, electric"]) + ) | ( + self.standardised_asset_list[ + self.STANDARD_HEATING_SYSTEM] + == "electric boiler" + ) #################################### # Check solar eligibility @@ -2395,11 +2400,11 @@ class AssetList: empty_cavity_map = { "non_intrusive_indicates_empty_cavity": self.EMPTY_CAVITY_NON_INTRUSIVE - + ": ", + + ": ", "non_intrusive_indicates_empty_cavity_has_solar": f"{self.EMPTY_CAVITY_NON_INTRUSIVE} - property " - "already has solar: ", + "already has solar: ", "non_intrusive_indicates_empty_cavity_no_year_filter": f"{self.EMPTY_CAVITY_NON_INTRUSIVE}, " - f"built after {self.EMPTY_CAVITY_YEAR_THRESHOLD}: ", + f"built after {self.EMPTY_CAVITY_YEAR_THRESHOLD}: ", } for variable, description in empty_cavity_map.items(): self.standardised_asset_list["cavity_reason"] = np.where( @@ -2415,8 +2420,8 @@ class AssetList: ( self.standardised_asset_list["epc_indicates_empty_cavity"] & ~self.standardised_asset_list[ - "non_intrusive_indicates_empty_cavity" - ] + "non_intrusive_indicates_empty_cavity" + ] & ( self.standardised_asset_list["non-intrusives: WFT Findings"] .str.lower() @@ -2441,8 +2446,8 @@ class AssetList: ( self.standardised_asset_list["epc_indicates_empty_cavity"] & ~self.standardised_asset_list[ - "non_intrusive_indicates_empty_cavity" - ] + "non_intrusive_indicates_empty_cavity" + ] & self.standardised_asset_list[ "non_intrusive_indicates_cavity_extraction" ] @@ -2457,8 +2462,8 @@ class AssetList: ( self.standardised_asset_list["epc_indicates_empty_cavity"] & ~self.standardised_asset_list[ - "non_intrusive_indicates_empty_cavity" - ] + "non_intrusive_indicates_empty_cavity" + ] & ( self.standardised_asset_list["non-intrusives: Insulated"] == "RETRO DRILLED" @@ -2474,8 +2479,8 @@ class AssetList: ( self.standardised_asset_list["epc_indicates_empty_cavity"] & ~self.standardised_asset_list[ - "non_intrusive_indicates_empty_cavity" - ] + "non_intrusive_indicates_empty_cavity" + ] & ( self.standardised_asset_list["non-intrusives: Insulated"] == "FILLED AT BUILD" @@ -2491,8 +2496,8 @@ class AssetList: ( self.standardised_asset_list["epc_indicates_empty_cavity"] & ~self.standardised_asset_list[ - "non_intrusive_indicates_empty_cavity" - ] + "non_intrusive_indicates_empty_cavity" + ] & pd.isnull(self.standardised_asset_list["cavity_reason"]) ), f"{self.EPC_EMPTY}: " + self.standardised_asset_list["SAP Category"], @@ -2636,7 +2641,7 @@ class AssetList: identified_work = self.standardised_asset_list[ ~pd.isnull(self.standardised_asset_list["cavity_reason"]) | ~pd.isnull(self.standardised_asset_list["solar_reason"]) - ][self.DOMNA_PROPERTY_ID].values + ][self.DOMNA_PROPERTY_ID].values if self.DOMNA_PROPERTY_ID in self.outcomes.columns: self.outcomes_for_output = self.outcomes[ @@ -2671,12 +2676,12 @@ class AssetList: blocks_of_flats = self.standardised_asset_list[ self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "block of flats" - ] + ] non_blocks_of_flats = self.standardised_asset_list[ self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] != "block of flats" - ] + ] # Produce some aggregate figures self.work_type_figures = { @@ -2719,7 +2724,7 @@ class AssetList: blocks = self.standardised_asset_list[ self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "block of flats" - ].copy() + ].copy() if blocks.empty: return @@ -2856,7 +2861,7 @@ class AssetList: self.standardised_asset_list = self.standardised_asset_list[ self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] != "block of flats" - ] + ] self.standardised_asset_list = pd.concat( [self.standardised_asset_list, expanded_blocks], ignore_index=True @@ -2936,7 +2941,7 @@ class AssetList: # find any block refs with more than 50% emptires viable_empty_blocks = self.block_analysis_df[ self.block_analysis_df["Percentage of Empties"] >= 0.50 - ] + ] if not viable_empty_blocks.empty: project_code_lookup = viable_empty_blocks[["Block Reference"]].copy() @@ -3175,7 +3180,7 @@ class AssetList: contact_details = pd.read_excel(local_filepath, sheet_name=sheet_name)[ [self.contact_detail_fields["landlord_property_id"]] + details_colnames - ] + ] contact_details = contact_details[ ~pd.isnull( contact_details[self.contact_detail_fields["landlord_property_id"]] @@ -3568,10 +3573,13 @@ class AssetList: "Non-Intrusives: Date Checked ": date_of_inspections, "Non-Intrusives: Wall Type ": non_intrusives_construction, "Non-intrusives: Insulation ": non_intrusives_insulated, - "Non-intrusives: Insulation Material ": non_intrusives_insulation_material, - "Non-Intrusives: CIGA Check Required ": non_intrusives_ciga_check_required, + "Non-intrusives: Insulation Material ": + non_intrusives_insulation_material, + "Non-Intrusives: CIGA Check Required ": + non_intrusives_ciga_check_required, "Non-Intrusives: PV Access Issues ": non_intrusives_pv_access, - "Non-Intrusives: Roof Orientation ": non_intrusives_roof_orientation, + "Non-Intrusives: Roof Orientation ": + non_intrusives_roof_orientation, "Non-Intrusives: Surveyor Notes ": non_intrusives_surveyor_notes, "Non-Intrusives: Surveyor Name ": non_intrusives_surveyor_name, "CIGA: Date Requested ": None, # TODO: Don't have this for the moment @@ -3748,8 +3756,8 @@ class AssetList: # We compare address line 1 to full address if any( df[self.STANDARD_FULL_ADDRESS] - .str.lower() - .str.contains(row["Address Line 1"].lower(), na=False) + .str.lower() + .str.contains(row["Address Line 1"].lower(), na=False) ): df = df[ df[self.STANDARD_FULL_ADDRESS] @@ -3989,7 +3997,7 @@ class AssetList: matched = matched[ matched["houseno"].astype(str) == house_no_to_match - ] + ] if matched.shape[0] == 1: lookup_i.append( { @@ -4014,7 +4022,7 @@ class AssetList: )[0] matched = matched[ matched[self.STANDARD_FULL_ADDRESS] == best_match - ] + ] lookup_i.append( { "row_id": x["row_id"], @@ -4325,7 +4333,7 @@ class AssetList: df = self.standardised_asset_list[ self.standardised_asset_list[self.STANDARD_LANDLORD_PROPERTY_ID] == row[master_id_colnames[idx]] - ] + ] if df.shape[0] == 1: matched.append( { @@ -4431,7 +4439,7 @@ class AssetList: )[1] ) > 90 - ] + ] if df.shape[0] == 0: unmatched.append(row["row_id"]) @@ -4439,8 +4447,8 @@ class AssetList: if any( df[self.STANDARD_FULL_ADDRESS] - .str.lower() - .str.contains( + .str.lower() + .str.contains( " ".join( [row[house_no_col], row["Street / Block Name"]] ).lower() @@ -4467,7 +4475,7 @@ class AssetList: row[property_type_col].split(" ")[-1].lower() ) & (df[self.STANDARD_PROPERTY_TYPE] != "block of flats") - ] + ] if df.shape[0] != 1: # We have multiple matches - it's likely because the landlord has a duplicate diff --git a/asset_list/app.py b/asset_list/app.py index 3e492118..0b792270 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -13,12 +13,11 @@ from asset_list.utils import get_data from dotenv import load_dotenv from backend.SearchEpc import SearchEpc -load_dotenv(dotenv_path="../backend/.env") +load_dotenv(dotenv_path="backend/.env") EPC_AUTH_TOKEN = os.getenv( "EPC_AUTH_TOKEN", ) - OPENAI_API_KEY = os.getenv( "OPENAI_API_KEY", ) @@ -74,24 +73,25 @@ def app(): Property UPRN """ - data_folder = "/workspaces/model/asset_list" - data_filename = "assests.xlsx" + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lifespace Rentals/Missed" + # data_filename = "For Modelling - Final - reviewed.xlsx" + data_filename = "Missed Properties - with address.xlsx" sheet_name = "Sheet1" postcode_column = "Postcode" - address1_column = "Address" - address1_method = "house_number_extraction" - fulladdress_column = None - address_cols_to_concat = ["Address"] + address1_column = "address1" + address1_method = None + fulladdress_column = "address1" + address_cols_to_concat = [] missing_postcodes_method = None landlord_year_built = None landlord_os_uprn = "UPRN" - landlord_property_type = "Archetype" - landlord_built_form = "Bedroom Count" - landlord_wall_construction = "Wall Insulation Type" - landlord_roof_construction = "Roof Type" - landlord_heating_system = "Boiler Type" + landlord_property_type = "Type" + landlord_built_form = None + landlord_wall_construction = None + landlord_roof_construction = None + landlord_heating_system = None landlord_existing_pv = None - landlord_property_id = "Tab" + landlord_property_id = "Reference" landlord_sap = None outcomes_filename = None outcomes_sheetname = None @@ -243,7 +243,7 @@ def app(): if skip is not None and not force_retrieve_data: if i <= skip: continue - chunk = asset_list.standardised_asset_list[i : i + chunk_size] + chunk = asset_list.standardised_asset_list[i: i + chunk_size] epc_data_chunk, errors_chunk, no_epc_chunk = get_data( df=chunk, row_id_name=asset_list.DOMNA_PROPERTY_ID, @@ -386,7 +386,7 @@ def app(): # Retrieve just the data we need epc_df = epc_df[ [asset_list.DOMNA_PROPERTY_ID] + list(asset_list.EPC_API_DATA_NAMES.keys()) - ].rename(columns=asset_list.EPC_API_DATA_NAMES) + ].rename(columns=asset_list.EPC_API_DATA_NAMES) # Look for columns not in the find my EPC data, which will have happened if we didn't # retrieve it in the first place @@ -403,16 +403,12 @@ def app(): find_my_epc_data[ [asset_list.DOMNA_PROPERTY_ID, "epc_has_floor_recommendation"] + list(asset_list.FIND_EPC_DATA_NAMES.keys()) - ].rename(columns=asset_list.FIND_EPC_DATA_NAMES), + ].rename(columns=asset_list.FIND_EPC_DATA_NAMES), how="left", on=asset_list.DOMNA_PROPERTY_ID, ) asset_list.merge_data(epc_df) - # asset_list.standardised_asset_list = asset_list.standardised_asset_list[ - # asset_list.standardised_asset_list["domna_full_address"] - # != "120 Airdrie Crescent, Burnley, Lancashire" - # ] asset_list.extract_attributes() asset_list.identify_worktypes() @@ -426,27 +422,6 @@ def app(): os.path.join(data_folder, ".".join(data_filename.split(".")[:-1])) + " - Standardised.xlsx" ) - # Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data - - # Determine inspections priority - # solar_jobs = asset_list.standardised_asset_list[~pd.isnull(asset_list.standardised_asset_list["solar_reason"])][ - # "domna_postcode"].unique() - # asset_list.standardised_asset_list["in_solar_area"] = asset_list.standardised_asset_list["domna_postcode"].isin( - # solar_jobs - # ) - # # Same for cav - # cavity_jobs = asset_list.standardised_asset_list[ - # ~pd.isnull(asset_list.standardised_asset_list["cavity_reason"]) - # ]["domna_postcode"].unique() - # asset_list.standardised_asset_list["in_cavity_area"] = asset_list.standardised_asset_list["domna_postcode"].isin( - # cavity_jobs - # ) - # # We prioritise properties that are in solar areas and cavity areas - # import numpy as np - # asset_list.standardised_asset_list["inspection_priority"] = np.where( - # asset_list.standardised_asset_list["in_solar_area"] | asset_list.standardised_asset_list["in_cavity_area"], - # 1, 2 - # ) with pd.ExcelWriter(filename) as writer: asset_list.standardised_asset_list.to_excel( diff --git a/asset_list/mappings/built_form.py b/asset_list/mappings/built_form.py index d6466539..4842450d 100644 --- a/asset_list/mappings/built_form.py +++ b/asset_list/mappings/built_form.py @@ -528,6 +528,107 @@ BUILT_FORM_MAPPINGS = { 'House: Semi Detached: Top Floor': 'semi-detached', 'House: End Terrace: Ground Floor': 'end-terrace', 'Maisonette: Enclosed End Terrace: Mid Floor': 'enclosed end-terrace', - 'Bungalow: EnclosedEndTerrace': 'enclosed end-terrace' + 'Bungalow: EnclosedEndTerrace': 'enclosed end-terrace', + '2 BED MID TERRACED HOUSE': 'mid-terrace', + '4 BED SEMI DETACHED-PARLOURED': 'semi-detached', + '2 BED END TERRACED HOUSE': 'end-terrace', + '3 BED MID TERRACED HOUSE': 'mid-terrace', + '3 BED SEMI DETACHED HOUSE': 'semi-detached', + '3 BED MID TERRACE - PARLOURED': 'mid-terrace', + '3 BED END TERRACE - PARLOURED': 'end-terrace', + '4 BED+ END TERRACED HOUSE': 'end-terrace', + '3 BED END TERRACED HOUSE': 'end-terrace', + '3 BED SEMI DETACHED-PARLOURED': 'semi-detached', + '4 BED+ END TERRACE - PARLOURED': 'end-terrace', + '2 BED SEMI DETACHED HOUSE': 'semi-detached', + '3 BED DETACHED HOUSE': 'detached', + '2 BED GRD FLR COTT FLT-CNT STR': 'ground floor', + '2 BED 1ST FLOOR WALKUP FLAT': 'mid-floor', + '1 BED GRD FL COTT FLAT-OWN ENT': 'ground floor', + '1 BED 1ST FL WALK UP DECK ACC': 'mid-floor', + '2 BED MAISONETTE UPPER COM ENT': 'mid-floor', + '2 BED GRD FLR COTT FLT OWN ENT': 'ground floor', + '1 BED BUNGALOW': 'unknown', + '2 BED GRD FL COTT FLT-OWN ENTR': 'ground floor', + '1 BED 1ST FL COTT FLT-CNT STR': 'mid-floor', + '1 BED GRD FL WALK UP OWN ENT': 'ground floor', + '1 BED GRD FLOOR WALKUP FLAT': 'ground floor', + '2 BED GRD FLOOR WALKUP FLAT': 'ground floor', + '2 BED 1ST FLR FLT-SHELTERED': 'mid-floor', + '2 BED BUNGALOW': 'unknown', + '2 BED GRD FLR COTT FLT(P)-1950': 'ground floor', + + 'Ground Floor Front Left': 'ground floor', + 'End-Terrace House': 'end-terrace', + 'Ground floor': 'ground floor', + 'Ground Floor Front Right': 'ground floor', + 'End Terrace (GII List)': 'end-terrace', + 'Semi Detached House': 'semi-detached', + 'Ground Floor Right': 'ground floor', + 'PB Ground Floor Flat': 'ground floor', + 'Basement and Ground Floor': 'ground floor', + 'Semi-detached bungalow': 'detached', + 'Detached Cottage': 'detached', + 'Lower & Ground Floor': 'ground floor', + 'Ground FLoor Flat': 'ground floor', + 'ground floor': 'ground floor', + 'Ground Floor Left': 'ground floor', + 'Semi-detached House': 'detached', + 'Basement & Lower Ground': 'basement', + 'Semi-Detached House': 'detached', + 'Ground floor flat -': 'ground floor', + 'Basement Flat': 'basement', + 'semi-detached bungalow': 'semi-detached', + 'Lower Ground Floor Flat': 'ground floor', + 'Ground floor Flat': 'ground floor', + 'Ground Floor flat': 'ground floor', + 'Ground': 'ground floor', + 'Semi detached Bungalow': 'semi-detached', + 'ground floor flat': 'ground floor', + 'Mid terrace House': 'mid-terrace', + 'Raised Ground Floor': 'ground floor', + 'Basement Floor': 'basement', + 'Second floor flat': 'mid-floor', + 'Fourth Floor Flat': 'mid-floor', + 'First/Second Maisonette': 'mid-floor', + 'Ground/First': 'ground floor', + 'First and Second Floor': 'mid-floor', + 'Terrace House': 'mid-terrace', + '1st/2nd Floor Maisonette': 'mid-floor', + 'Semi-det House': 'semi-detached', + 'First': 'mid-floor', + 'Ground & First Floor': 'ground floor', + 'End of Terrace House': 'end-terrace', + '2nd Floor Purpose Built': 'mid-floor', + 'First/Second Floor Maison': 'mid-floor', + 'GFF purpose built': 'ground floor', + 'Second': 'mid-floor', + 'Semi-det House (GII List)': 'semi-detached', + '3rd and 4th Floor': 'mid-floor', + 'First Floor flat': 'mid-floor', + 'Mid-Terrace House': 'mid-terrace', + '1st & 2nd Floors': 'mid-floor', + 'Ground/first floor': 'ground floor', + 'FFF purpose built': 'mid-floor', + 'Second floor': 'mid-floor', + 'Second/Third floor': 'mid-floor', + 'First floor Flat': 'mid-floor', + 'First floor': 'mid-floor', + 'Lower Ground Flat': 'basement', + 'First Floor Rear Flat': 'mid-floor', + 'First & Second Floor': 'mid-floor', + 'Ground & Lower Ground': 'basement', + 'First Floor Rear': 'mid-floor', + 'First & Second': 'mid-floor', + 'First Floor Front': 'mid-floor', + 'First & Second Floors': 'mid-floor', + 'First/Second Floor': 'mid-floor', + 'Sem-detach house': 'semi-detached', + 'Second Floor Flat (Top)': 'top-floor', + '3 FloorTerrace House': 'mid-terrace', + 'First floor flat': 'mid-floor', + 'First & Second Floor Flat': 'mid-floor', + 'First Floor Purpose Built': 'mid-floor', + 'Purpose built First Floor': 'mid-floor', } diff --git a/asset_list/mappings/heating_systems.py b/asset_list/mappings/heating_systems.py index 272d6279..5f962108 100644 --- a/asset_list/mappings/heating_systems.py +++ b/asset_list/mappings/heating_systems.py @@ -498,6 +498,23 @@ HEATING_MAPPINGS = { 'Boiler: A rated Combi, System 2: Boiler: A rated Combi': 'gas combi boiler', 'System 2: Boiler: A rated Regular Boiler, Boiler: A rated Regular Boiler': 'gas boiler, radiators', - 'Boiler: A rated Combi, System 2: Boiler: C rated Combi': 'gas combi boiler' + 'Boiler: A rated Combi, System 2: Boiler: C rated Combi': 'gas combi boiler', + + 'IDEAL ISAR HE30': 'gas combi boiler', + 'WORCESTER GREENSTAR 25 SI': 'gas combi boiler', + 'POTTERTON PROMAX COMBI 28 HE PLUS': 'gas combi boiler', + 'WORCESTER GREENSTAR 28I JUNIOR': 'gas combi boiler', + 'BAXI ASSURE 25 COMBI': 'gas combi boiler', + 'POTTERTON PROMAX COMBI 28 HE PLUS A': 'gas combi boiler', + 'WORCESTER GREENSTAR 30 SI': 'gas combi boiler', + 'POTTERTON SUPRIMA 40L': 'gas boiler, radiators', + 'POTTERTON ASSURE 30 COMBI': 'gas combi boiler', + 'POTTERTON PROMAX 28 COMBI ERP': 'gas combi boiler', + 'BAXI ASSURE 30 COMBI': 'gas combi boiler', + 'POTTERTON PROMAX 18 SYSTEM ERP': 'gas boiler, radiators', + 'POTTERTON PROMAX COMBI 33 HE PLUS A': 'gas combi boiler', + 'POTTERTON SUPRIMA 40 HE': 'gas boiler, radiators', + 'FERROLI MODENA 102': 'gas boiler, radiators', + 'POTTERTON PROMAX COMBI 24 HE PLUS A': 'gas combi boiler' } diff --git a/asset_list/mappings/property_type.py b/asset_list/mappings/property_type.py index 177a7549..71788c25 100644 --- a/asset_list/mappings/property_type.py +++ b/asset_list/mappings/property_type.py @@ -444,6 +444,9 @@ PROPERTY_MAPPING = { 'Warden Bungalow': 'bungalow', 'Warden Flat': 'flat', 'Upper Floor Flat': 'flat', - 'Extracare Scheme': 'other' + 'Extracare Scheme': 'other', + + 'SHELTERED': 'unknown', + 'PARLOUR': 'unknown', } diff --git a/asset_list/mappings/roof.py b/asset_list/mappings/roof.py index 70cc8742..192238e0 100644 --- a/asset_list/mappings/roof.py +++ b/asset_list/mappings/roof.py @@ -320,6 +320,8 @@ ROOF_CONSTRUCTION_MAPPINGS = { 'Pitched (slates or tiles) access to loft, 100mm': 'pitched insulated', 'Pitched (slates or tiles) no loft access, 200mm': 'pitched insulated', 'Pitched (slates or tiles) access to loft, 200mm': 'pitched insulated', - 'Pitched (slates or tiles) access to loft, 50mm': 'pitched less than 100mm insulation' + 'Pitched (slates or tiles) access to loft, 50mm': 'pitched less than 100mm insulation', + + 'Pitched roofs': 'pitched unknown insulation', } diff --git a/asset_list/mappings/walls.py b/asset_list/mappings/walls.py index 1a252b33..c369204d 100644 --- a/asset_list/mappings/walls.py +++ b/asset_list/mappings/walls.py @@ -369,6 +369,9 @@ WALL_CONSTRUCTION_MAPPINGS = { 'Solid Brick, As built': 'solid brick unknown insulation', 'System built, As built': 'system built unknown insulation', 'Timber frame, As built': 'timber frame unknown insulation', - 'Cavity, As built': 'cavity unknown insulation' + 'Cavity, As built': 'cavity unknown insulation', + 'FILLED CAVITY': 'filled cavity', + 'EXTERNAL': 'insulated solid brick', + 'AS BUILT': 'other' } From ec01e1d190b9eb645b9aa00dca34594b935ddf90 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Fri, 20 Feb 2026 13:39:07 +0000 Subject: [PATCH 239/340] only get most recently added plans for scenario --- .../app/db/functions/recommendations_functions.py | 15 +++++++++++++-- backend/categorisation/processor.py | 6 ++---- 2 files changed, 15 insertions(+), 6 deletions(-) diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py index 7ffcf603..900b5b9f 100644 --- a/backend/app/db/functions/recommendations_functions.py +++ b/backend/app/db/functions/recommendations_functions.py @@ -632,8 +632,19 @@ def get_plans_by_scenario_ids(ids: List[int]) -> List[PlanModel]: return session_any.exec(stmt).scalars().all() -def get_plan_ids_by_scenario_ids(scenario_ids: List[int]) -> List[int]: - stmt = select(PlanModel.id).where(PlanModel.scenario_id.in_(scenario_ids)) +def get_most_recent_plan_ids_by_scenario_ids(scenario_ids: List[int]) -> List[int]: + # NOTE: This statement works for Postgres only, because of the Distinct + stmt = ( + select(PlanModel.id) + .where(PlanModel.scenario_id.in_(scenario_ids)) + .distinct(PlanModel.scenario_id) + .order_by( + PlanModel.scenario_id, + PlanModel.created_at.desc(), + PlanModel.id.desc(), + ) + ) + with db_read_session() as session: session_any: Any = session # Typehint as Any to satisfy Pylance... return session_any.exec(stmt).scalars().all() diff --git a/backend/categorisation/processor.py b/backend/categorisation/processor.py index e90c3b08..ea12bc3b 100644 --- a/backend/categorisation/processor.py +++ b/backend/categorisation/processor.py @@ -4,7 +4,7 @@ from typing import Dict, List, Optional, Tuple from backend.app.db.functions.recommendations_functions import ( bulk_update_plans, get_default_scenario_ids_for_portfolio, - get_plan_ids_by_scenario_ids, + get_most_recent_plan_ids_by_scenario_ids, get_plans_by_portfolio_id, get_plans_by_scenario_ids, get_scenarios_by_portfolio_id, @@ -37,10 +37,8 @@ def process_portfolio( ) plans: List[Plan] = _load_plans_for_portfolio(portfolio_id, scenarios_to_consider) - logger.info(f"Successfully loaded {len(plans)}") plans_by_property: Dict[int, List[Plan]] = _group_plans_by_property(plans) - logger.info("Successfully grouped plans by property") updated_plan_models: List[PlanModel] = [] updated_scenario_models: List[ScenarioModel] = [] @@ -120,7 +118,7 @@ def _unset_defaults_for_scenarios_not_being_considered( ) if len(scenarios_to_unset_default) > 0: - plans_to_unset_default: List[int] = get_plan_ids_by_scenario_ids( + plans_to_unset_default: List[int] = get_most_recent_plan_ids_by_scenario_ids( scenarios_to_unset_default ) for plan_id in plans_to_unset_default: From 96fbd7f24c55eafb22d503fa6592d3c357237c99 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Fri, 20 Feb 2026 15:26:40 +0000 Subject: [PATCH 240/340] ensure all defaults are unset before setting new ones, refactor of processor --- .../db/functions/recommendations_functions.py | 83 ++++++------ backend/categorisation/processor.py | 119 +++++++++--------- 2 files changed, 98 insertions(+), 104 deletions(-) diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py index 900b5b9f..141ba2dd 100644 --- a/backend/app/db/functions/recommendations_functions.py +++ b/backend/app/db/functions/recommendations_functions.py @@ -1,5 +1,5 @@ -from typing import Any, Dict, List, Optional -from sqlalchemy import inspect, text, insert, delete, select, update +from typing import Any, Dict, List, Tuple +from sqlalchemy import inspect, text, insert, delete, select from sqlalchemy.orm import Session, Mapper from sqlalchemy.exc import SQLAlchemyError from sqlmodel import Session @@ -618,13 +618,6 @@ def clear_portfolio_in_batches( print("Portfolio cleared in batches.") -def get_plans_by_portfolio_id(portfolio_id: int) -> List[PlanModel]: - stmt = select(PlanModel).where(PlanModel.portfolio_id == portfolio_id) - with db_read_session() as session: - session_any: Any = session # Typehint as Any to satisfy Pylance... - return session_any.exec(stmt).scalars().all() - - def get_plans_by_scenario_ids(ids: List[int]) -> List[PlanModel]: stmt = select(PlanModel).where(PlanModel.scenario_id.in_(ids)) with db_read_session() as session: @@ -632,13 +625,36 @@ def get_plans_by_scenario_ids(ids: List[int]) -> List[PlanModel]: return session_any.exec(stmt).scalars().all() -def get_most_recent_plan_ids_by_scenario_ids(scenario_ids: List[int]) -> List[int]: +def get_most_recent_plans_by_portfolio_id(portfolio_id: int) -> List[PlanModel]: # NOTE: This statement works for Postgres only, because of the Distinct stmt = ( - select(PlanModel.id) - .where(PlanModel.scenario_id.in_(scenario_ids)) - .distinct(PlanModel.scenario_id) + select(PlanModel) + .where(PlanModel.portfolio_id == portfolio_id) + .distinct( + PlanModel.property_id, PlanModel.scenario_id + ) # one plan per property per scenario .order_by( + PlanModel.property_id, + PlanModel.scenario_id, + PlanModel.created_at.desc(), + PlanModel.id.desc(), + ) + ) + with db_read_session() as session: + session_any: Any = session # Typehint as Any to satisfy Pylance... + return session_any.exec(stmt).scalars().all() + + +def get_most_recent_plans_by_scenario_ids(scenario_ids: List[int]) -> List[PlanModel]: + # NOTE: This statement works for Postgres only, because of the Distinct + stmt = ( + select(PlanModel) + .where(PlanModel.scenario_id.in_(scenario_ids)) + .distinct( + PlanModel.property_id, PlanModel.scenario_id + ) # one plan per property per scenario + .order_by( + PlanModel.property_id, PlanModel.scenario_id, PlanModel.created_at.desc(), PlanModel.id.desc(), @@ -646,7 +662,7 @@ def get_most_recent_plan_ids_by_scenario_ids(scenario_ids: List[int]) -> List[in ) with db_read_session() as session: - session_any: Any = session # Typehint as Any to satisfy Pylance... + session_any: Any = session # Typehint as Any to satisfy Pylance return session_any.exec(stmt).scalars().all() @@ -657,39 +673,22 @@ def get_scenarios_by_portfolio_id(portfolio_id: int) -> List[ScenarioModel]: return session_any.exec(stmt).scalars().all() -def get_default_scenario_ids_for_portfolio(portfolio_id: int) -> List[int]: - # This should in reality always return exactly 1 ID, but there's currently - # no database constraint to enforce that, so account for 0 or >1 - stmt = select(ScenarioModel.id).where( +def get_default_plans_and_scenarios( + portfolio_id: int, +) -> Tuple[List[PlanModel], List[ScenarioModel]]: + plan_stmt = select(PlanModel).where( + (PlanModel.portfolio_id == portfolio_id) & (PlanModel.is_default == True) + ) + scenario_stmt = select(ScenarioModel).where( (ScenarioModel.portfolio_id == portfolio_id) & (ScenarioModel.is_default == True) ) + with db_read_session() as session: session_any: Any = session # Typehint as Any to satisfy Pylance... - return session_any.exec(stmt).scalars().all() - - -def set_plan_and_scenario_default(plan_id: int, default: bool) -> bool: - with db_session() as session: - plan: PlanModel = session.get(PlanModel, plan_id) - if not plan: - return False - - scenario_id = plan.scenario_id - - plan_mapper: Mapper[Any] = inspect(PlanModel) - scenario_mapper: Mapper[Any] = inspect(ScenarioModel) - - plan_mappings: List[Dict[str, Any]] = [{"id": plan.id, "is_default": default}] - scenario_mappings: List[Dict[str, Any]] = [ - {"id": scenario_id, "is_default": default} - ] - - session.bulk_update_mappings(plan_mapper, plan_mappings) - session.bulk_update_mappings(scenario_mapper, scenario_mappings) - session.commit() - - return True + plans: List[PlanModel] = session_any.exec(plan_stmt).scalars().all() + scenarios: List[ScenarioModel] = session_any.exec(scenario_stmt).scalars().all() + return (plans, scenarios) def bulk_update_plans( diff --git a/backend/categorisation/processor.py b/backend/categorisation/processor.py index ea12bc3b..2e4bab12 100644 --- a/backend/categorisation/processor.py +++ b/backend/categorisation/processor.py @@ -1,14 +1,12 @@ from collections import defaultdict -from typing import Dict, List, Optional, Tuple +from typing import Dict, List, Optional from backend.app.db.functions.recommendations_functions import ( bulk_update_plans, - get_default_scenario_ids_for_portfolio, - get_most_recent_plan_ids_by_scenario_ids, - get_plans_by_portfolio_id, - get_plans_by_scenario_ids, + get_default_plans_and_scenarios, + get_most_recent_plans_by_portfolio_id, + get_most_recent_plans_by_scenario_ids, get_scenarios_by_portfolio_id, - set_plan_and_scenario_default, ) from backend.app.db.models.recommendations import PlanModel, ScenarioModel from backend.app.domain.classes.plan import Plan @@ -22,29 +20,38 @@ def process_portfolio( portfolio_id: int, scenarios_to_consider: Optional[List[int]] = None, scenario_priority_order: Optional[List[int]] = None, -) -> None: +) -> None: # TODO: make this a class logger.info(f"Processing portfolio {portfolio_id}") + plans_by_id: Dict[int, Plan] = {} # TODO: make this an in-memory repository class + if scenarios_to_consider: if len(scenarios_to_consider) < 2: raise ValueError( "Cannot run auto categorisation for fewer than 2 scenarios" ) - if scenarios_to_consider is not None: - _unset_defaults_for_scenarios_not_being_considered( - portfolio_id, scenarios_to_consider - ) + # first get all plans that we're interested in + plans_for_consideration: List[Plan] = _load_plans_for_portfolio( + portfolio_id, scenarios_to_consider + ) + for plan in plans_for_consideration: + if plan.id is not None: # just in case + plans_by_id[plan.id] = plan - plans: List[Plan] = _load_plans_for_portfolio(portfolio_id, scenarios_to_consider) + # then unset existing defaults on domain objects regardless of whether they're under consideration or not + default_plans: List[Plan] = _get_default_plans(portfolio_id) + for plan in default_plans: + plan.set_default(False) + if plan.id is not None: # just in case + plans_by_id[plan.id] = plan - plans_by_property: Dict[int, List[Plan]] = _group_plans_by_property(plans) - - updated_plan_models: List[PlanModel] = [] - updated_scenario_models: List[ScenarioModel] = [] - - for property_id, property_plans in plans_by_property.items(): + # then set new defaults on domain objects under consideration + plans_for_consideration_by_property: Dict[int, List[Plan]] = ( + _group_plans_by_property(plans_for_consideration) + ) + for property_id, property_plans in plans_for_consideration_by_property.items(): if not property_plans: raise ValueError(f"No plans for property {property_id}") @@ -56,17 +63,13 @@ def process_portfolio( logger.error(f"Failed to find cheapest plan for property {property_id}") raise - updated_property_plan_models, updated_property_scenario_models = ( - _update_plan_and_scenario_objects(property_plans, cheapest_plan) - ) + property_plans = _update_plan_objects(property_plans, cheapest_plan) + for plan in property_plans: + if plan.id is not None: # just in case + plans_by_id[plan.id] = plan - updated_plan_models.extend(updated_property_plan_models) - updated_scenario_models.extend(updated_property_scenario_models) - - if len(updated_plan_models) > 0: - logger.info(f"Updating {len(updated_plan_models)} Plans in database") - bulk_update_plans(updated_plan_models, updated_scenario_models) - logger.info("Successfully updated Plan default values in database") + # then pass all domain objects to database to update (regardless of whether they've changed) + _update_plans_in_db(list(plans_by_id.values())) def choose_cheapest_relevant_plan( @@ -100,29 +103,17 @@ def choose_cheapest_relevant_plan( return cheapest_plans[0] -def _unset_defaults_for_scenarios_not_being_considered( - portfolio_id: int, scenarios_to_consider: List[int] -) -> None: - default_scenario_ids: List[int] = get_default_scenario_ids_for_portfolio( +def _get_default_plans(portfolio_id: int) -> List[Plan]: + default_plan_models, default_scenario_models = get_default_plans_and_scenarios( portfolio_id ) - scenarios_to_unset_default: List[int] = [] - for id in default_scenario_ids: - if id not in scenarios_to_consider: - scenarios_to_unset_default.append(id) - - if len(scenarios_to_unset_default) > 0: - logger.info( - f"Unsetting {scenarios_to_unset_default} as default scenario(s) as not included in provided list of scenarios to consider" + return [ + Plan.from_sqlalchemy( + p, next(s for s in default_scenario_models if s.id == p.scenario_id) ) - - if len(scenarios_to_unset_default) > 0: - plans_to_unset_default: List[int] = get_most_recent_plan_ids_by_scenario_ids( - scenarios_to_unset_default - ) - for plan_id in plans_to_unset_default: - set_plan_and_scenario_default(plan_id, False) # TODO: do this in batch + for p in default_plan_models + ] def _load_plans_for_portfolio( @@ -131,13 +122,17 @@ def _load_plans_for_portfolio( if scenarios_to_consider: logger.info(f"Getting plans for {len(scenarios_to_consider)} scenarios") - plan_models: List[PlanModel] = get_plans_by_scenario_ids(scenarios_to_consider) + plan_models: List[PlanModel] = get_most_recent_plans_by_scenario_ids( + scenarios_to_consider + ) logger.info(f"Got {len(plan_models)} plan models from database") else: logger.info( f"No list of Plans to consider provided. Getting all Plans for portfolio {portfolio_id}" ) - plan_models: List[PlanModel] = get_plans_by_portfolio_id(portfolio_id) + plan_models: List[PlanModel] = get_most_recent_plans_by_portfolio_id( + portfolio_id + ) plans: List[Plan] = [] @@ -170,26 +165,26 @@ def _group_plans_by_property(plans: List[Plan]) -> Dict[int, List[Plan]]: return grouped -def _update_plan_and_scenario_objects( - plans: List[Plan], cheapest_plan: Plan -) -> Tuple[List[PlanModel], List[ScenarioModel]]: - plans_to_update: List[Plan] = [] - +def _update_plan_objects(plans: List[Plan], cheapest_plan: Plan) -> List[Plan]: for plan in plans: should_be_default: bool = plan.id == cheapest_plan.id - if plan.record.is_default != should_be_default: - logger.info( - f"Setting Plan {plan.id} (Scenario Name: {plan.scenario.record.name}) to is_default: {should_be_default}" - ) - plan.set_default(should_be_default) - plans_to_update.append(plan) + plan.set_default(should_be_default) + if should_be_default: + logger.debug( + f"Setting Plan {plan.id} (Scenario Name: {plan.scenario.record.name}) to default" + ) + + return plans + + +def _update_plans_in_db(plans: List[Plan]) -> None: plan_models: List[PlanModel] = [] scenario_models: List[ScenarioModel] = [] - for plan in plans_to_update: + for plan in plans: plan_model, scenario_model = plan.to_sqlalchemy() plan_models.append(plan_model) scenario_models.append(scenario_model) - return (plan_models, scenario_models) + bulk_update_plans(plan_models, scenario_models) From 325e7f65e0df8a1fb7704bc828caa7e3c098412c Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Fri, 20 Feb 2026 16:13:30 +0000 Subject: [PATCH 241/340] make sure Plan object is instantiated correctly. Additional logging --- backend/categorisation/processor.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/backend/categorisation/processor.py b/backend/categorisation/processor.py index 2e4bab12..95d4de3a 100644 --- a/backend/categorisation/processor.py +++ b/backend/categorisation/processor.py @@ -46,6 +46,8 @@ def process_portfolio( if plan.id is not None: # just in case plans_by_id[plan.id] = plan + logger.info(f"Successfully unset {len(default_plans)} default plan(s)") + # then set new defaults on domain objects under consideration plans_for_consideration_by_property: Dict[int, List[Plan]] = ( _group_plans_by_property(plans_for_consideration) @@ -68,8 +70,11 @@ def process_portfolio( if plan.id is not None: # just in case plans_by_id[plan.id] = plan + logger.info("Successfully set defaults on Plan objects in memory") + # then pass all domain objects to database to update (regardless of whether they've changed) _update_plans_in_db(list(plans_by_id.values())) + logger.info(f"Successfully updated {len(plans_by_id)} Plans in database") def choose_cheapest_relevant_plan( @@ -110,7 +115,12 @@ def _get_default_plans(portfolio_id: int) -> List[Plan]: return [ Plan.from_sqlalchemy( - p, next(s for s in default_scenario_models if s.id == p.scenario_id) + p, + next( + Scenario.from_sqlalchemy(s) + for s in default_scenario_models + if s.id == p.scenario_id + ), ) for p in default_plan_models ] From 3e0444b3a7228ea165ff8de39f6c8bfdbde1fa35 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 20 Feb 2026 17:01:09 +0000 Subject: [PATCH 242/340] working on export logic --- .idea/Model.iml | 2 +- .idea/misc.xml | 2 +- .../export/property_scenarios/db_functions.py | 205 ++++++++++++++++++ .../export/property_scenarios/input_schema.py | 33 +++ backend/export/property_scenarios/main.py | 154 +++++++++++++ 5 files changed, 394 insertions(+), 2 deletions(-) create mode 100644 backend/export/property_scenarios/db_functions.py create mode 100644 backend/export/property_scenarios/input_schema.py create mode 100644 backend/export/property_scenarios/main.py diff --git a/.idea/Model.iml b/.idea/Model.iml index 09f2e496..c6561970 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml index fb10c6b0..50cad4ca 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/backend/export/property_scenarios/db_functions.py b/backend/export/property_scenarios/db_functions.py new file mode 100644 index 00000000..f527e738 --- /dev/null +++ b/backend/export/property_scenarios/db_functions.py @@ -0,0 +1,205 @@ +from typing import List, Any, Dict, Optional +import pandas as pd +from sqlalchemy import func +from sqlalchemy.orm import Session +from collections import defaultdict + +from backend.app.db.models.recommendations import ( + Recommendation, + PlanModel, + PlanRecommendations, + RecommendationMaterials, +) +from backend.app.db.models.portfolio import ( + PropertyModel, + PropertyDetailsEpcModel, +) +from utils.logger import setup_logger + +logger = setup_logger() + + +class DbMethods: + + def __init__(self, session: Session): + self.session = session + + def get_properties(self, portfolio_id: int) -> pd.DataFrame: + """ + Function to fetch the property data, for property scenario exports + :param portfolio_id: + :return: + """ + query = ( + self.session.query(PropertyModel, PropertyDetailsEpcModel) + .join( + PropertyDetailsEpcModel, + PropertyModel.id == PropertyDetailsEpcModel.property_id, + ) + .filter(PropertyModel.portfolio_id == portfolio_id) + .all() + ) + + data = [ + { + **{ + col.name: getattr(row.PropertyModel, col.name) + for col in PropertyModel.__table__.columns + }, + **{ + col.name: getattr(row.PropertyDetailsEpcModel, col.name) + for col in PropertyDetailsEpcModel.__table__.columns + }, + } + for row in query + ] + + return pd.DataFrame(data) + + def get_latest_plans( + self, + portfolio_id: int, + scenario_ids: Optional[List[int]] = None, + default_only: bool = False, + ) -> pd.DataFrame: + """ + Fetch latest plans. + + Modes: + 1) Scenario mode: latest per (scenario_id, property_id) + 2) Default mode: latest default plan per property (ignores scenario_ids) + + """ + + # ----------------------------- + # Sanity checks + # ----------------------------- + if default_only and scenario_ids: + # Override scenario_ids to make it explicit that they will be ignored in the query + scenario_ids = None + + if not default_only and not scenario_ids: + raise ValueError( + "Either scenario_ids must be provided " + "or default_only must be True." + ) + + # ----------------------------- + # Filter on just the default plans - we ignore the scenario ids. NOTE - this is specific to postgres + # and relies on DISTINCT ON behaviour. + # ----------------------------- + if default_only: + # Latest default plan per property (ignore scenarios entirely) + # DISTINCT ON (property_id) keeps the first row per property, + # ordered by created_at DESC so we get the newest one. + + plans_query = ( + self.session.query(PlanModel) + .filter(PlanModel.is_default.is_(True)) + .distinct(PlanModel.property_id) + .order_by( + PlanModel.property_id, + PlanModel.created_at.desc(), + ) + ) + + else: + # Latest plan per (scenario_id, property_id) + # DISTINCT ON (scenario_id, property_id) keeps the newest + # plan per scenario/property combination. + + plans_query = ( + self.session.query(PlanModel) + .filter(PlanModel.scenario_id.in_(scenario_ids)) + .distinct( + PlanModel.scenario_id, + PlanModel.property_id, + ) + .order_by( + PlanModel.scenario_id, + PlanModel.property_id, + PlanModel.created_at.desc(), + ) + ) + + logger.info("Fetching plans") + plans = plans_query.all() + + return pd.DataFrame( + [ + { + col.name: getattr(plan, col.name) + for col in PlanModel.__table__.columns + } + for plan in plans + ] + ) + + def get_recommendations(self, plan_ids: List[int]) -> pd.DataFrame: + + if not plan_ids: + return pd.DataFrame() + + recs_query = ( + self.session.query( + Recommendation, + PlanModel.scenario_id, + ) + .join( + PlanRecommendations, + Recommendation.id == PlanRecommendations.recommendation_id, + ) + .join(PlanModel, PlanModel.id == PlanRecommendations.plan_id) + .filter( + PlanRecommendations.plan_id.in_(plan_ids), + Recommendation.default.is_(True), + Recommendation.already_installed.is_(False), + ) + .all() + ) + + data = [ + { + **{ + col.name: getattr(r.Recommendation, col.name) + for col in Recommendation.__table__.columns + }, + "scenario_id": r.scenario_id, + } + for r in recs_query + ] + + return pd.DataFrame(data) + + def attach_materials(self, recommendations_df: pd.DataFrame) -> pd.DataFrame: + + if recommendations_df.empty: + recommendations_df["materials"] = [] + return recommendations_df + + rec_ids = recommendations_df["id"].tolist() + + materials_query = ( + self.session.query(RecommendationMaterials) + .filter(RecommendationMaterials.recommendation_id.in_(rec_ids)) + .all() + ) + + materials_map: Dict[int, List[Dict[str, Any]]] = defaultdict(list) + + for m in materials_query: + materials_map[m.recommendation_id].append( + { + "material_id": m.material_id, + "depth": m.depth, + "quantity": m.quantity, + "quantity_unit": m.quantity_unit, + "estimated_cost": m.estimated_cost, + } + ) + + recommendations_df["materials"] = recommendations_df["id"].apply( + lambda x: materials_map.get(x, []) + ) + + return recommendations_df diff --git a/backend/export/property_scenarios/input_schema.py b/backend/export/property_scenarios/input_schema.py new file mode 100644 index 00000000..4ef704a3 --- /dev/null +++ b/backend/export/property_scenarios/input_schema.py @@ -0,0 +1,33 @@ +from typing import Optional, Union, List +from pydantic import BaseModel, model_validator + + +class ExportRequest(BaseModel): + # uuid which maps to a specific export request, used for tracking and logging + task_id: Union[str, None] + # uuid which maps to a specific export operation, used for tracking and logging. subtask is the child of the + # task, where the work has been distributed across workers + subtask_id: Union[str, None] + # associated portfolio id for the export request + portfolio_id: int + # list of scenario ids to export + scenario_ids: List[int] + # boolean which will overwrite the scenario ids. If this is true, we will only export the default plan for each + # property and will ignore the scenario ids + default_plans_only: Optional[bool] = False + + @model_validator(mode="after") + def validate_default_plan_override(self): + """ + If default_plans_only is True and scenario_ids were provided, + we allow execution but make it explicit that scenario_ids + will be ignored. + """ + if self.default_plans_only and self.scenario_ids: + # We do NOT raise — we allow execution. + # We just mark the object so the handler can log/return a warning. + object.__setattr__(self, "_scenario_ids_ignored", True) + else: + object.__setattr__(self, "_scenario_ids_ignored", False) + + return self diff --git a/backend/export/property_scenarios/main.py b/backend/export/property_scenarios/main.py new file mode 100644 index 00000000..88ebf326 --- /dev/null +++ b/backend/export/property_scenarios/main.py @@ -0,0 +1,154 @@ +import json +from typing import List, Optional, Any, Mapping + +import pandas as pd +from sqlalchemy.orm import Session + +from backend.export.property_scenarios.input_schema import ExportRequest +from backend.export.property_scenarios.db_functions import DbMethods +from backend.app.db.connection import db_engine +from backend.app.utils import sap_to_epc +from utils.logger import setup_logger + +logger = setup_logger() + + +def process_export(config: ExportRequest) -> List[str]: + exported_files: List[str] = [] + + with Session(bind=db_engine) as session: + + db_methods = DbMethods(session) + + properties_df = db_methods.get_properties(config.portfolio_id) + + plans_df = db_methods.get_latest_plans( + portfolio_id=config.portfolio_id, + scenario_ids=config.scenario_ids, + default_only=config.default_plans_only, + ) + + if plans_df.empty: + return exported_files + + recommendations_df = db_methods.get_recommendations( + plans_df["id"].tolist() + ) + + recommendations_df = db_methods.attach_materials(recommendations_df) + + for scenario_id in config.scenario_ids: + + scenario_recs = recommendations_df[ + recommendations_df["scenario_id"] == scenario_id + ] + + if scenario_recs.empty: + continue + + measures_df = scenario_recs[ + ["property_id", "measure_type", "estimated_cost"] + ].drop_duplicates() + + pivot = measures_df.pivot( + index="property_id", + columns="measure_type", + values="estimated_cost", + ).reset_index() + + pivot["total_retrofit_cost"] = ( + pivot.drop(columns=["property_id"]).sum(axis=1) + ) + + post_sap = ( + scenario_recs.groupby("property_id")[["sap_points"]] + .sum() + .reset_index() + ) + + df = ( + properties_df + .merge(pivot, how="left", on="property_id") + .merge(post_sap, how="left", on="property_id") + ) + + df["sap_points"] = df["sap_points"].fillna(0) + df["predicted_post_works_sap"] = ( + df["current_sap_points"] + df["sap_points"] + ) + df["predicted_post_works_epc"] = df[ + "predicted_post_works_sap" + ].apply(sap_to_epc) + + filename = ( + f"/tmp/{config.scenario_names[scenario_id]} - " + f"{config.project_name}.xlsx" + ) + + with pd.ExcelWriter(filename) as writer: + df.to_excel(writer, sheet_name="properties", index=False) + + exported_files.append(filename) + + return exported_files + + +# ============================================================ +# Lambda Handler +# ============================================================ + +def handler(event: dict, context: Optional[Any]) -> Mapping[str, int | str]: + """ + Lambda event should have the following structure: + 1) task id - unique identifier for the export task (optional, can be used for tracking/logging) + 2) subtask id - unique identifier for the specific export operation (optional, can be used for tracking/logging) + 2) portfolio id - id of the portfolio to export + 3) scenario ids - list of scenario ids to export + 4) default_plans_only - flag indicating if we should only consider default plans for export (optional, + defaults to False) + :param event: + :param context: + :return: + """ + for record in event.get("Records", []): + try: + body_dict = json.loads(record["body"]) + + # body_dict = { + # "task_id": "test", + # "subtask_id": "test", + # "portfolio_id": 569, + # "scenario_ids": [], + # "default_plans_only": True, + # } + + logger.debug("Validating request body") + payload = ExportRequest.model_validate(body_dict) + + if payload._scenario_ids_ignored: + logger.warning( + "Received scenario_ids in request body but they will be ignored " + "because default_plans_only is set to True" + ) + + logger.debug("Successfully validated request body") + process_export(payload) + + # TODO: Need to handle the exported files - e.g. upload to s3 and email a presigned url + + return { + "statusCode": 200, + "body": json.dumps({}), + } + + except Exception as e: + logger.error(f"Failed to process record: {e}") + return { + "statusCode": 500, + "body": json.dumps({"message": "Failed to process export request"}), + } + + return { + "statusCode": 201, + "body": json.dumps({"message": "No records to process"}), + } From 31cfa47d8feb90d5aaab0cee91d2ffc214e5db2b Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Fri, 20 Feb 2026 17:10:53 +0000 Subject: [PATCH 243/340] dont worry about default scenarios --- .../db/functions/recommendations_functions.py | 11 ++--- backend/categorisation/processor.py | 47 +++++++++---------- 2 files changed, 26 insertions(+), 32 deletions(-) diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py index 141ba2dd..09d6da83 100644 --- a/backend/app/db/functions/recommendations_functions.py +++ b/backend/app/db/functions/recommendations_functions.py @@ -673,22 +673,17 @@ def get_scenarios_by_portfolio_id(portfolio_id: int) -> List[ScenarioModel]: return session_any.exec(stmt).scalars().all() -def get_default_plans_and_scenarios( +def get_default_plans( portfolio_id: int, -) -> Tuple[List[PlanModel], List[ScenarioModel]]: +) -> List[PlanModel]: plan_stmt = select(PlanModel).where( (PlanModel.portfolio_id == portfolio_id) & (PlanModel.is_default == True) ) - scenario_stmt = select(ScenarioModel).where( - (ScenarioModel.portfolio_id == portfolio_id) - & (ScenarioModel.is_default == True) - ) with db_read_session() as session: session_any: Any = session # Typehint as Any to satisfy Pylance... plans: List[PlanModel] = session_any.exec(plan_stmt).scalars().all() - scenarios: List[ScenarioModel] = session_any.exec(scenario_stmt).scalars().all() - return (plans, scenarios) + return plans def bulk_update_plans( diff --git a/backend/categorisation/processor.py b/backend/categorisation/processor.py index 95d4de3a..09db2983 100644 --- a/backend/categorisation/processor.py +++ b/backend/categorisation/processor.py @@ -3,7 +3,7 @@ from typing import Dict, List, Optional from backend.app.db.functions.recommendations_functions import ( bulk_update_plans, - get_default_plans_and_scenarios, + get_default_plans, get_most_recent_plans_by_portfolio_id, get_most_recent_plans_by_scenario_ids, get_scenarios_by_portfolio_id, @@ -23,6 +23,7 @@ def process_portfolio( ) -> None: # TODO: make this a class logger.info(f"Processing portfolio {portfolio_id}") + all_scenarios: List[Scenario] = _load_scenarios_for_portfolio(portfolio_id) plans_by_id: Dict[int, Plan] = {} # TODO: make this an in-memory repository class if scenarios_to_consider: @@ -33,14 +34,14 @@ def process_portfolio( # first get all plans that we're interested in plans_for_consideration: List[Plan] = _load_plans_for_portfolio( - portfolio_id, scenarios_to_consider + portfolio_id, all_scenarios, scenarios_to_consider ) for plan in plans_for_consideration: if plan.id is not None: # just in case plans_by_id[plan.id] = plan # then unset existing defaults on domain objects regardless of whether they're under consideration or not - default_plans: List[Plan] = _get_default_plans(portfolio_id) + default_plans: List[Plan] = _get_default_plans(portfolio_id, all_scenarios) for plan in default_plans: plan.set_default(False) if plan.id is not None: # just in case @@ -108,26 +109,28 @@ def choose_cheapest_relevant_plan( return cheapest_plans[0] -def _get_default_plans(portfolio_id: int) -> List[Plan]: - default_plan_models, default_scenario_models = get_default_plans_and_scenarios( - portfolio_id - ) +def _get_default_plans(portfolio_id: int, scenarios: List[Scenario]) -> List[Plan]: + default_plan_models = get_default_plans(portfolio_id) + + scenario_map = {s.id: s for s in scenarios} return [ - Plan.from_sqlalchemy( - p, - next( - Scenario.from_sqlalchemy(s) - for s in default_scenario_models - if s.id == p.scenario_id - ), - ) + Plan.from_sqlalchemy(p, scenario_map[p.scenario_id]) for p in default_plan_models + if p.scenario_id in scenario_map ] +def _load_scenarios_for_portfolio(portfolio_id: int) -> List[Scenario]: + scenario_models: List[ScenarioModel] = get_scenarios_by_portfolio_id(portfolio_id) + + return [Scenario.from_sqlalchemy(s) for s in scenario_models] + + def _load_plans_for_portfolio( - portfolio_id: int, scenarios_to_consider: Optional[List[int]] = None + portfolio_id: int, + all_scenarios: List[Scenario], + scenarios_to_consider: Optional[List[int]] = None, ) -> List[Plan]: if scenarios_to_consider: @@ -146,21 +149,17 @@ def _load_plans_for_portfolio( plans: List[Plan] = [] - scenarios: List[ScenarioModel] = get_scenarios_by_portfolio_id(portfolio_id) - - if not scenarios: + if not all_scenarios: raise Exception(f"No scenarios found for Portfolio {portfolio_id}") for model in plan_models: - scenario_model = next((s for s in scenarios if s.id == model.scenario_id)) - if not scenario_model: + scenario = next((s for s in all_scenarios if s.id == model.scenario_id)) + if not scenario: logger.info(f"No Scenario associated with Plan of ID {model.id}") continue - plans.append( - Plan.from_sqlalchemy(model, Scenario.from_sqlalchemy(scenario_model)) - ) + plans.append(Plan.from_sqlalchemy(model, scenario)) logger.info(f"Got {len(plans)} Plans") return plans From bf3d6f4d515c22a60be6e2154a1ac6893bc4dc00 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 23 Feb 2026 12:13:59 +0000 Subject: [PATCH 244/340] working on integration test --- .idea/Model.iml | 3 + backend/app/db/base.py | 3 + backend/app/db/models/addresses.py | 4 +- backend/app/db/models/condition.py | 4 +- backend/app/db/models/energy_assessments.py | 8 +- backend/app/db/models/epc.py | 5 +- backend/app/db/models/funding.py | 5 +- backend/app/db/models/inspections.py | 4 +- backend/app/db/models/materials.py | 3 +- .../app/db/models/non_intrusive_surveys.py | 4 +- backend/app/db/models/portfolio.py | 11 +- backend/app/db/models/recommendations.py | 7 +- backend/app/db/models/solar.py | 4 +- backend/app/db/models/users.py | 4 +- backend/app/db/models/whlg.py | 3 +- backend/export/README.md | 155 ++++++++++ .../export/property_scenarios/db_functions.py | 12 +- backend/export/property_scenarios/main.py | 148 +++++----- backend/export/tests/conftest.py | 55 ++++ backend/export/tests/test_export.py | 274 ++++++++++++++++++ pytest.ini | 2 + test.requirements.txt | 4 +- 22 files changed, 602 insertions(+), 120 deletions(-) create mode 100644 backend/app/db/base.py create mode 100644 backend/export/README.md create mode 100644 backend/export/tests/conftest.py create mode 100644 backend/export/tests/test_export.py diff --git a/.idea/Model.iml b/.idea/Model.iml index c6561970..1e51ede4 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -10,4 +10,7 @@ + + \ No newline at end of file diff --git a/backend/app/db/base.py b/backend/app/db/base.py new file mode 100644 index 00000000..59be7030 --- /dev/null +++ b/backend/app/db/base.py @@ -0,0 +1,3 @@ +from sqlalchemy.orm import declarative_base + +Base = declarative_base() diff --git a/backend/app/db/models/addresses.py b/backend/app/db/models/addresses.py index 51e9540f..a813f58d 100644 --- a/backend/app/db/models/addresses.py +++ b/backend/app/db/models/addresses.py @@ -7,9 +7,7 @@ from sqlalchemy import ( func, UniqueConstraint, ) -from sqlalchemy.orm import declarative_base - -Base = declarative_base() +from backend.app.db.base import Base class PostcodeSearch(Base): diff --git a/backend/app/db/models/condition.py b/backend/app/db/models/condition.py index 77043366..96f601a7 100644 --- a/backend/app/db/models/condition.py +++ b/backend/app/db/models/condition.py @@ -7,12 +7,12 @@ from sqlalchemy import ( String, Enum as SqlEnum, ) -from sqlalchemy.orm import declarative_base, relationship +from sqlalchemy.orm import relationship from backend.condition.domain.aspect_type import AspectType from backend.condition.domain.element_type import ElementType -Base = declarative_base() +from backend.app.db.base import Base ElementTypeDb = SqlEnum( ElementType, diff --git a/backend/app/db/models/energy_assessments.py b/backend/app/db/models/energy_assessments.py index 46912c9b..65879c39 100644 --- a/backend/app/db/models/energy_assessments.py +++ b/backend/app/db/models/energy_assessments.py @@ -1,10 +1,8 @@ -from sqlalchemy import Column, Integer, BigInteger, Text, Float, DateTime, Boolean, Date, ForeignKey -from sqlalchemy.ext.declarative import declarative_base -from sqlalchemy.dialects.postgresql import ENUM as PgEnum import enum from datetime import datetime - -Base = declarative_base() +from backend.app.db.base import Base +from sqlalchemy import Column, Integer, BigInteger, Text, Float, DateTime, Boolean, Date, ForeignKey +from sqlalchemy.dialects.postgresql import ENUM as PgEnum class EnergyAssessment(Base): diff --git a/backend/app/db/models/epc.py b/backend/app/db/models/epc.py index 5a216040..ff0b40a0 100644 --- a/backend/app/db/models/epc.py +++ b/backend/app/db/models/epc.py @@ -4,11 +4,8 @@ from sqlalchemy import ( String, JSON, TIMESTAMP, - UniqueConstraint, ) -from sqlalchemy.orm import declarative_base - -Base = declarative_base() +from backend.app.db.base import Base class EpcStore(Base): diff --git a/backend/app/db/models/funding.py b/backend/app/db/models/funding.py index a7417e14..19e8203d 100644 --- a/backend/app/db/models/funding.py +++ b/backend/app/db/models/funding.py @@ -3,20 +3,17 @@ import enum from sqlalchemy import ( Column, Integer, - String, Float, Enum, TIMESTAMP, BigInteger, ForeignKey, ) -from sqlalchemy.orm import declarative_base from sqlalchemy.sql import func +from backend.app.db.base import Base from backend.app.db.models.recommendations import PlanModel from backend.app.db.models.materials import MaterialType, Material -Base = declarative_base() - class SchemeEnum(enum.Enum): eco4 = "eco4" diff --git a/backend/app/db/models/inspections.py b/backend/app/db/models/inspections.py index 473f8a02..2a42f589 100644 --- a/backend/app/db/models/inspections.py +++ b/backend/app/db/models/inspections.py @@ -9,11 +9,9 @@ from sqlalchemy import ( Enum, ForeignKey, ) -from sqlalchemy.ext.declarative import declarative_base +from backend.app.db.base import Base from backend.app.db.models.portfolio import PropertyModel -Base = declarative_base() - # ------------------------------------------------------------------- # ENUM DEFINITIONS (equivalent to drizzle pgEnum calls) diff --git a/backend/app/db/models/materials.py b/backend/app/db/models/materials.py index 8a524491..101ac021 100644 --- a/backend/app/db/models/materials.py +++ b/backend/app/db/models/materials.py @@ -1,10 +1,9 @@ import enum from sqlalchemy import Column, Integer, String, Float, Enum, TIMESTAMP, Boolean -from sqlalchemy.orm import declarative_base from sqlalchemy.sql import func -Base = declarative_base() +from backend.app.db.base import Base class MaterialType(enum.Enum): diff --git a/backend/app/db/models/non_intrusive_surveys.py b/backend/app/db/models/non_intrusive_surveys.py index bc2d8adc..bbfb7a54 100644 --- a/backend/app/db/models/non_intrusive_surveys.py +++ b/backend/app/db/models/non_intrusive_surveys.py @@ -1,7 +1,5 @@ from sqlalchemy import Column, BigInteger, String, TIMESTAMP, ForeignKey, Integer -from sqlalchemy.orm import declarative_base - -Base = declarative_base() +from backend.app.db.base import Base class NonIntrusiveSurvey(Base): diff --git a/backend/app/db/models/portfolio.py b/backend/app/db/models/portfolio.py index f6a99a97..9eb26597 100644 --- a/backend/app/db/models/portfolio.py +++ b/backend/app/db/models/portfolio.py @@ -4,6 +4,7 @@ import datetime from sqlalchemy import ( Column, Integer, + BigInteger, Text, Boolean, Float, @@ -12,12 +13,10 @@ from sqlalchemy import ( ForeignKey, CheckConstraint, ) -from sqlalchemy.ext.declarative import declarative_base +from backend.app.db.base import Base from backend.app.db.models.users import UserModel # noqa from backend.app.db.models.materials import MaterialType -Base = declarative_base() - class PortfolioStatus(enum.Enum): SCOPING = "scoping" @@ -32,7 +31,7 @@ class PortfolioStatus(enum.Enum): NEEDS_REVIEW = "needs review" -class PortfolioGoal(enum.Enum): # TODO: Move to domain? +class PortfolioGoal(enum.Enum): # TODO: Move to domain? VALUATION_IMPROVEMENT = "Valuation Improvement" INCREASING_EPC = "Increasing EPC" REDUCING_CO2_EMISSIONS = "Reducing CO2 emissions" @@ -116,9 +115,9 @@ class PropertyModel(Base): id = Column(Integer, primary_key=True, autoincrement=True) portfolio_id = Column(Integer, ForeignKey("portfolio.id"), nullable=False) creation_status = Column(Enum(PropertyCreationStatus), nullable=False) - uprn = Column(Integer) + uprn = Column(BigInteger) landlord_property_id = Column(Text) - building_reference_number = Column(Integer) + building_reference_number = Column(BigInteger) status = Column( Enum(PortfolioStatus, values_callable=lambda x: [e.value for e in x]), nullable=False, diff --git a/backend/app/db/models/recommendations.py b/backend/app/db/models/recommendations.py index 538b11e3..9352eeb2 100644 --- a/backend/app/db/models/recommendations.py +++ b/backend/app/db/models/recommendations.py @@ -1,3 +1,4 @@ +import enum from typing import Iterable, List, NamedTuple, Optional, Type from sqlalchemy import ( Column, @@ -9,17 +10,15 @@ from sqlalchemy import ( ForeignKey, Enum, ) -from sqlalchemy.orm import declarative_base, Mapped, mapped_column +from sqlalchemy.orm import Mapped, mapped_column from sqlalchemy.sql import func from datetime import datetime +from backend.app.db.base import Base from backend.app.db.models.portfolio import Portfolio, PortfolioGoal, PropertyModel from backend.app.db.models.materials import Material from backend.app.db.models.portfolio import Epc from datatypes.enums import QuantityUnits -import enum - -Base = declarative_base() def portfolio_goal_values(enum_cls: Type[PortfolioGoal]) -> List[str]: diff --git a/backend/app/db/models/solar.py b/backend/app/db/models/solar.py index 88372bd3..dc1846f3 100644 --- a/backend/app/db/models/solar.py +++ b/backend/app/db/models/solar.py @@ -2,9 +2,7 @@ import datetime import pytz from enum import Enum as PyEnum from sqlalchemy import Column, Integer, Float, DateTime, JSON, BigInteger, ForeignKey, Enum, Boolean -from sqlalchemy.ext.declarative import declarative_base - -Base = declarative_base() +from backend.app.db.base import Base class Solar(Base): diff --git a/backend/app/db/models/users.py b/backend/app/db/models/users.py index 6e243815..7952b9b7 100644 --- a/backend/app/db/models/users.py +++ b/backend/app/db/models/users.py @@ -1,8 +1,6 @@ from sqlalchemy import Column, Integer, String, DateTime -from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.sql import func - -Base = declarative_base() +from backend.app.db.base import Base class UserModel(Base): diff --git a/backend/app/db/models/whlg.py b/backend/app/db/models/whlg.py index 29d907e4..5c5b7172 100644 --- a/backend/app/db/models/whlg.py +++ b/backend/app/db/models/whlg.py @@ -1,4 +1,3 @@ -import uuid from typing import Optional from sqlmodel import SQLModel, Field @@ -12,4 +11,4 @@ class Whlg(SQLModel, table=True): index=True, ) - postcode: str = Field(nullable=False) \ No newline at end of file + postcode: str = Field(nullable=False) diff --git a/backend/export/README.md b/backend/export/README.md new file mode 100644 index 00000000..a98154fc --- /dev/null +++ b/backend/export/README.md @@ -0,0 +1,155 @@ +# 🧪 Running Tests in PyCharm (macOS + pytest-postgresql) + +Our test suite uses `pytest` and `pytest-postgresql`, which +automatically spins up a temporary PostgreSQL instance. + +On Linux (including GitHub Actions), PostgreSQL binaries are installed +in standard system locations.\ +On macOS (Homebrew), they are not --- so PyCharm needs a small +configuration tweak to locate `pg_ctl`. + +This guide explains how to run and debug tests locally in PyCharm +without modifying test code. + +------------------------------------------------------------------------ + +## ✅ Prerequisites + +1. Install PostgreSQL via Homebrew: + +``` bash +brew install postgresql +``` + +2. Confirm `pg_ctl` exists: + +``` bash +which pg_ctl +``` + +Typical output: + + /opt/homebrew/bin/pg_ctl + +------------------------------------------------------------------------ + +# 🚀 Running Tests in PyCharm + +## Step 1 --- Create a PyCharm pytest Run Configuration + +1. Open the test file. +2. Click the green ▶ next to the test. +3. Choose **"Edit Run Configuration..."** + +You should see something like: + +- **Target:** `backend/export/tests/test_export.py` +- **Working directory:** Project root (e.g.`Model/`) + +------------------------------------------------------------------------ + +## Step 2 --- Add Required Override (macOS Only) + +In the Run Configuration: + +### ➜ "Additional Arguments" + +Add: + + --override-ini=postgresql_exec=/opt/homebrew/bin/pg_ctl + +This tells `pytest-postgresql` where `pg_ctl` lives on macOS. + +Without this, PyCharm may fail with: + + ExecutableMissingException: Could not found pg_config executable + +------------------------------------------------------------------------ + +## Step 3 --- Run or Debug + +You can now: + +- Click ▶ Run\ +- Click 🐞 Debug\ +- Set breakpoints normally + +The temporary PostgreSQL instance will start automatically. + +------------------------------------------------------------------------ + +# 🔍 Why This Is Needed + +`pytest-postgresql` defaults to a Linux-style path: + + /usr/lib/postgresql//bin/pg_ctl + +That path exists on Ubuntu (CI), but not on macOS. + +On macOS, Homebrew installs PostgreSQL in: + + /opt/homebrew/bin/ + +The `--override-ini` flag safely overrides the executable path +**locally**, without modifying: + +- test files\ +- `conftest.py`\ +- `pytest.ini`\ +- CI configuration + +This ensures: + +- ✅ Tests still work in GitHub Actions\ +- ✅ Tests still work for Linux users\ +- ✅ macOS developers can debug in PyCharm\ +- ✅ No repository-specific hacks are required + +------------------------------------------------------------------------ + +# 🛠 Optional: Using a Local `.env` File + +If you prefer not to hardcode the override in the run configuration: + +1. Create a local file: + +```{=html} + +``` + + .env.local + +2. Add: + +```{=html} + +``` + + PYTEST_ADDOPTS=--override-ini=postgresql_exec=/opt/homebrew/bin/pg_ctl + +3. In PyCharm: + - Open the Run Configuration + - Add `.env.local` under **"Paths to .env files"** + +------------------------------------------------------------------------ + +# 🧪 Running Tests via Terminal (Recommended for CI Parity) + +For normal execution outside PyCharm: + +``` bash +make test +``` + +These already work without additional configuration. + +------------------------------------------------------------------------ + +# 🧠 Summary + +Environment Works Without Override? Needs `--override-ini`? + ------------------------ ------------------------- ------------------------- +GitHub Actions (Linux) ✅ Yes ❌ No +Linux local ✅ Yes ❌ No +macOS terminal (tox) ✅ Yes ❌ No +macOS PyCharm debugger ❌ No ✅ Yes diff --git a/backend/export/property_scenarios/db_functions.py b/backend/export/property_scenarios/db_functions.py index f527e738..8b29ab0e 100644 --- a/backend/export/property_scenarios/db_functions.py +++ b/backend/export/property_scenarios/db_functions.py @@ -1,6 +1,5 @@ from typing import List, Any, Dict, Optional import pandas as pd -from sqlalchemy import func from sqlalchemy.orm import Session from collections import defaultdict @@ -95,7 +94,10 @@ class DbMethods: plans_query = ( self.session.query(PlanModel) - .filter(PlanModel.is_default.is_(True)) + .filter( + PlanModel.portfolio_id == portfolio_id, + PlanModel.is_default.is_(True) + ) .distinct(PlanModel.property_id) .order_by( PlanModel.property_id, @@ -110,7 +112,10 @@ class DbMethods: plans_query = ( self.session.query(PlanModel) - .filter(PlanModel.scenario_id.in_(scenario_ids)) + .filter( + PlanModel.portfolio_id == portfolio_id, + PlanModel.scenario_id.in_(scenario_ids) + ) .distinct( PlanModel.scenario_id, PlanModel.property_id, @@ -138,6 +143,7 @@ class DbMethods: def get_recommendations(self, plan_ids: List[int]) -> pd.DataFrame: if not plan_ids: + logger.info("No plan ids provided") return pd.DataFrame() recs_query = ( diff --git a/backend/export/property_scenarios/main.py b/backend/export/property_scenarios/main.py index 88ebf326..d2d89916 100644 --- a/backend/export/property_scenarios/main.py +++ b/backend/export/property_scenarios/main.py @@ -1,96 +1,98 @@ import json -from typing import List, Optional, Any, Mapping +from typing import Optional, Any, Mapping, Dict, Union import pandas as pd from sqlalchemy.orm import Session from backend.export.property_scenarios.input_schema import ExportRequest from backend.export.property_scenarios.db_functions import DbMethods -from backend.app.db.connection import db_engine +from backend.app.db.connection import db_read_session from backend.app.utils import sap_to_epc from utils.logger import setup_logger logger = setup_logger() -def process_export(config: ExportRequest) -> List[str]: - exported_files: List[str] = [] +def process_export(payload: ExportRequest, session: Session) -> Dict[Union[str, int], pd.DataFrame]: + export_files: Dict[Union[str, int], pd.DataFrame] = {} - with Session(bind=db_engine) as session: + db_methods = DbMethods(session) - db_methods = DbMethods(session) + properties_df = db_methods.get_properties(payload.portfolio_id) - properties_df = db_methods.get_properties(config.portfolio_id) + logger.info("Retrieved %s properties for export", len(properties_df)) - plans_df = db_methods.get_latest_plans( - portfolio_id=config.portfolio_id, - scenario_ids=config.scenario_ids, - default_only=config.default_plans_only, - ) + plans_df = db_methods.get_latest_plans( + portfolio_id=payload.portfolio_id, + scenario_ids=payload.scenario_ids, + default_only=payload.default_plans_only, + ) - if plans_df.empty: - return exported_files + logger.info("Retrieved %s plans for export", len(plans_df)) - recommendations_df = db_methods.get_recommendations( - plans_df["id"].tolist() - ) + if plans_df.empty: + return export_files - recommendations_df = db_methods.attach_materials(recommendations_df) + recommendations_df = db_methods.get_recommendations( + plans_df["id"].tolist() + ) - for scenario_id in config.scenario_ids: + recommendations_df = db_methods.attach_materials(recommendations_df) + if payload.default_plans_only: + group_keys = [None] # Single export, no scenario grouping + else: + group_keys = payload.scenario_ids + + for group_key in group_keys: + + if payload.default_plans_only: + scenario_recs = recommendations_df + export_label = "default_plans" + else: scenario_recs = recommendations_df[ - recommendations_df["scenario_id"] == scenario_id + recommendations_df["scenario_id"] == group_key ] + export_label = group_key - if scenario_recs.empty: - continue + if scenario_recs.empty: + continue - measures_df = scenario_recs[ - ["property_id", "measure_type", "estimated_cost"] - ].drop_duplicates() + measures_df: pd.DataFrame = scenario_recs[ + ["property_id", "measure_type", "estimated_cost"] + ].drop_duplicates() - pivot = measures_df.pivot( - index="property_id", - columns="measure_type", - values="estimated_cost", - ).reset_index() + pivot = measures_df.pivot( + index="property_id", + columns="measure_type", + values="estimated_cost", + ).reset_index() - pivot["total_retrofit_cost"] = ( - pivot.drop(columns=["property_id"]).sum(axis=1) - ) + pivot["total_retrofit_cost"] = ( + pivot.drop(columns=["property_id"]).sum(axis=1) + ) - post_sap = ( - scenario_recs.groupby("property_id")[["sap_points"]] - .sum() - .reset_index() - ) + post_sap = ( + scenario_recs.groupby("property_id")[["sap_points"]] + .sum() + .reset_index() + ) - df = ( - properties_df - .merge(pivot, how="left", on="property_id") - .merge(post_sap, how="left", on="property_id") - ) + df = ( + properties_df.rename(columns={"solar_pv": "existing_solar_pv"}) + .merge(pivot, how="left", on="property_id") + .merge(post_sap, how="left", on="property_id") + ) - df["sap_points"] = df["sap_points"].fillna(0) - df["predicted_post_works_sap"] = ( - df["current_sap_points"] + df["sap_points"] - ) - df["predicted_post_works_epc"] = df[ - "predicted_post_works_sap" - ].apply(sap_to_epc) + df["sap_points"] = df["sap_points"].fillna(0) + df["predicted_post_works_sap"] = df["current_sap_points"] + df["sap_points"] + df["predicted_post_works_epc"] = df[ + "predicted_post_works_sap" + ].apply(sap_to_epc) - filename = ( - f"/tmp/{config.scenario_names[scenario_id]} - " - f"{config.project_name}.xlsx" - ) + export_files[export_label] = df - with pd.ExcelWriter(filename) as writer: - df.to_excel(writer, sheet_name="properties", index=False) - - exported_files.append(filename) - - return exported_files + return export_files # ============================================================ @@ -106,22 +108,23 @@ def handler(event: dict, context: Optional[Any]) -> Mapping[str, int | str]: 3) scenario ids - list of scenario ids to export 4) default_plans_only - flag indicating if we should only consider default plans for export (optional, defaults to False) - :param event: - :param context: - :return: + + Exxample event: + body_dict = { + "task_id": "test", + "subtask_id": "test", + "portfolio_id": 569, + "scenario_ids": [], + "default_plans_only": True, + } + :param event: Lambda event containing export request details + :param context: Lambda context (not used in this handler but included for completeness) + :return: HTTP response indicating success or failure of the export operation """ for record in event.get("Records", []): try: body_dict = json.loads(record["body"]) - # body_dict = { - # "task_id": "test", - # "subtask_id": "test", - # "portfolio_id": 569, - # "scenario_ids": [], - # "default_plans_only": True, - # } - logger.debug("Validating request body") payload = ExportRequest.model_validate(body_dict) @@ -132,7 +135,8 @@ def handler(event: dict, context: Optional[Any]) -> Mapping[str, int | str]: ) logger.debug("Successfully validated request body") - process_export(payload) + with db_read_session() as session: + exported_files = process_export(payload, session) # TODO: Need to handle the exported files - e.g. upload to s3 and email a presigned url diff --git a/backend/export/tests/conftest.py b/backend/export/tests/conftest.py new file mode 100644 index 00000000..10bfa971 --- /dev/null +++ b/backend/export/tests/conftest.py @@ -0,0 +1,55 @@ +import pytest +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker +from backend.app.db.base import Base + + +@pytest.fixture(scope="function") +def engine(postgresql): + """ + Create a SQLAlchemy engine bound to the ephemeral + pytest-postgresql database. + """ + + # Build SQLAlchemy URL from psycopg connection info + connection_string = ( + f"postgresql+psycopg://" + f"{postgresql.info.user}:" + f"{postgresql.info.password}@" + f"{postgresql.info.host}:" + f"{postgresql.info.port}/" + f"{postgresql.info.dbname}" + ) + + engine = create_engine(connection_string) + + # Create tables once per test session + Base.metadata.create_all(engine) + + # Yeild will split this function into two phase. 1) setup and 2) teardown, the latter of which will run after all + # tests have completed + yield engine + + # Clean-up after entire test session + Base.metadata.drop_all(engine) + engine.dispose() + + +@pytest.fixture(scope="function") +def db_session(engine): + """ + Provides a clean transactional session per test. + + Rolls back after each test to keep isolation. + """ + + connection = engine.connect() + transaction = connection.begin() + + session = sessionmaker(bind=connection)() + + yield session + + session.close() + transaction.rollback() + connection.close() diff --git a/backend/export/tests/test_export.py b/backend/export/tests/test_export.py new file mode 100644 index 00000000..eb82333d --- /dev/null +++ b/backend/export/tests/test_export.py @@ -0,0 +1,274 @@ +import pandas as pd +import numpy as np +from pathlib import Path +import time + +from backend.export.property_scenarios.main import process_export +from backend.export.property_scenarios.input_schema import ExportRequest +from backend.app.db.models.portfolio import PropertyModel, Epc, Portfolio, PortfolioStatus, PortfolioGoal, \ + PropertyCreationStatus, PropertyDetailsEpcModel +from backend.app.db.models.recommendations import PlanModel, Recommendation, PlanRecommendations +from utils.logger import setup_logger + +FIXTURE_PATH = Path("backend/export/tests/fixtures") +logger = setup_logger() + + +def load_csv(name: str) -> pd.DataFrame: + df = pd.read_csv(FIXTURE_PATH / name) + df = df.replace({np.nan: None}) + return df + + +def test_default_export_integration(db_session): + # ---------------------------------------- + # 1) Load csvs + # ---------------------------------------- + t0 = time.perf_counter() + portfolio_df = load_csv("portfolio_569.csv") + properties_df = load_csv("properties_569.csv") + property_details_epc_df = load_csv("property_details_epc_569.csv") + plans_df = load_csv("plans_569.csv") + plan_recs_df = load_csv("plan_recs_569.csv") + recommendations_df = load_csv("recommendations_569.csv") + + # Shrink down recommendations_df to speed up the data load. For this test, we only need + # default recommendations so let's focus on those. We filter on where default is true + recommendations_df = recommendations_df[ + recommendations_df["default"] + ] + valid_rec_ids = recommendations_df["id"].unique() + + plan_recs_df = plan_recs_df[ + plan_recs_df["recommendation_id"].isin(valid_rec_ids) + ] + + logger.info( + "Loaded CSVs in %.2f seconds | properties=%s plans=%s recs=%s", + time.perf_counter() - t0, + len(properties_df), + len(plans_df), + len(recommendations_df), + ) + + logger.info("Starting database load") + db_load_t0 = time.perf_counter() + + # ---------------------------------------- + # 2) Insert test portfolio + # ---------------------------------------- + + portfolios = [] + for row in portfolio_df.itertuples(index=False): + portfolios.append( + Portfolio( + id=row.id, + name=row.name, + status=PortfolioStatus[row.status.split(".")[-1]], + goal=PortfolioGoal[row.goal.split(".")[-1]] if row.goal else None, + ) + ) + + db_session.bulk_save_objects(portfolios) + db_session.flush() + # ---------------------------------------- + # 3) Insert test property + # ---------------------------------------- + + properties = [] + + for row in properties_df.itertuples(index=False): + row_dict = row._asdict() + + row_dict["uprn"] = int(row_dict["uprn"]) if row_dict.get("uprn") else None + row_dict["building_reference_number"] = ( + int(row_dict["building_reference_number"]) + if row_dict.get("building_reference_number") + else None + ) + + prop = PropertyModel(**{ + col: row_dict[col] + for col in PropertyModel.__table__.columns.keys() + if col in row_dict + }) + + prop.creation_status = PropertyCreationStatus[ + row_dict["creation_status"].split(".")[-1] + ] + prop.status = PortfolioStatus[row_dict["status"].split(".")[-1]] + + if row_dict.get("current_epc_rating"): + prop.current_epc_rating = Epc[ + row_dict["current_epc_rating"].split(".")[-1] + ] + + properties.append(prop) + + db_session.bulk_save_objects(properties) + db_session.flush() + + # ---------------------------------------- + # 4) Insert property details - EPC + # ---------------------------------------- + + property_lookup = { + prop.uprn: prop + for prop in db_session.query(PropertyModel).all() + } + + epc_rows = [] + + for row in property_details_epc_df.itertuples(index=False): + row_dict = row._asdict() + + uprn = int(row_dict["uprn"]) if row_dict.get("uprn") else None + property_obj = property_lookup.get(uprn) + + if not property_obj: + continue # skip if property not found + + # Build only fields that exist on the model + epc_data = { + col.name: row_dict[col.name] + for col in PropertyDetailsEpcModel.__table__.columns + if col.name in row_dict and col.name not in ["id", "property_id", "portfolio_id"] + } + + epc = PropertyDetailsEpcModel( + property_id=property_obj.id, + portfolio_id=property_obj.portfolio_id, + **epc_data, + ) + + epc_rows.append(epc) + + db_session.bulk_save_objects(epc_rows) + db_session.flush() + + # ---------------------------------------- + # 4) Insert default plan + # ---------------------------------------- + + plans = [] + + for row in plans_df.itertuples(index=False): + row_dict = row._asdict() + + if row_dict.get("post_epc_rating"): + row_dict["post_epc_rating"] = Epc[ + row_dict["post_epc_rating"].split(".")[-1] + ] + + row_dict["scenario_id"] = None + + plan = PlanModel(**{ + col: row_dict[col] + for col in PlanModel.__table__.columns.keys() + if col in row_dict + }) + + plans.append(plan) + + db_session.bulk_save_objects(plans) + db_session.flush() + + # ---------------------------------------- + # 5) Insert recommendation + # ---------------------------------------- + + recs = [ + Recommendation(**{ + col: row[col] + for col in Recommendation.__table__.columns.keys() + if col in row + }) + for _, row in recommendations_df.iterrows() + ] + + db_session.bulk_save_objects(recs) + db_session.flush() + + # ---------------------------------------- + # 6) Insert PlanRecommendations + # ---------------------------------------- + links = [ + PlanRecommendations( + plan_id=row.plan_id, + recommendation_id=row.recommendation_id, + ) + for row in plan_recs_df.itertuples(index=False) + ] + + db_session.bulk_save_objects(links) + db_session.commit() + logger.info("Inserted all data in %.2f seconds", time.perf_counter() - db_load_t0) + + # ---------------------------------------- + # 6) Build payload + # ---------------------------------------- + + body_dict = { + "task_id": "test", + "subtask_id": "test", + "portfolio_id": 569, + "scenario_ids": [], + "default_plans_only": True, + } + + payload = ExportRequest.model_validate(body_dict) + + # ---------------------------------------- + # 7) Call process_export + # ---------------------------------------- + + logger.info( + "Recommendation count in DB: %s", + db_session.query(Recommendation).count() + ) + + logger.info( + "Default + not installed count: %s", + db_session.query(Recommendation) + .filter( + Recommendation.default.is_(True), + Recommendation.already_installed.is_(False) + ) + .count() + ) + + logger.info("Starting process_export") + process_t0 = time.perf_counter() + + result = process_export(payload, session=db_session) + + logger.info("process_export finished in %.2f seconds", time.perf_counter() - process_t0) + + # ---------------------------------------- + # 8) Assertions + # ---------------------------------------- + + assert "default_plans" in result + + df = result["default_plans"] + + assert not df.empty + + # This test was generated on a real portfolio and so we check the things we expect to do + + # 1) All packages are "compliant", where in this case, the properties should get to EPC C + + failed = df[df["predicted_post_works_sap"] < 69] + failed_property_types = failed["property_type"].value_counts().to_dict() + assert failed_property_types["Flat"] == 113 + assert failed_property_types["House"] == 8 + assert failed_property_types["Bungalow"] == 4 + assert failed_property_types["Maisonette"] == 1 + # Check the houses + + assert failed.shape[0] + + # Errors for me: + # - should get to EPC C: https://ara.domna.homes/portfolio/569/building-passport/661051/plans + # - Why doesn't this get to a C, under the plan?: + # https://ara.domna.homes/portfolio/569/building-passport/660447/plans/1603913 diff --git a/pytest.ini b/pytest.ini index 9c9f8234..7bef3884 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,4 +1,6 @@ [pytest] pythonpath = . +log_cli = true +log_cli_level = INFO addopts = --cov-report term-missing --cov=etl/epc --cov=recommendations --cov=backend --cov=etl/epc_clean --cov=etl/spatial testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests backend/address2UPRN/tests backend/onboarders/tests backend/categorisation/tests diff --git a/test.requirements.txt b/test.requirements.txt index d31371a6..d8b8b777 100644 --- a/test.requirements.txt +++ b/test.requirements.txt @@ -2,4 +2,6 @@ pytest mock pytest-cov pytest-mock -dotenv \ No newline at end of file +dotenv +psycopg[binary] +pytest-postgresql \ No newline at end of file From 5305643991e4986fd077c47d125cdb120ce3ff61 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 23 Feb 2026 12:44:42 +0000 Subject: [PATCH 245/340] pass needs ventilation to optimiser functon' --- backend/engine/engine.py | 10 +++++++--- .../optimiser/optimiser_functions.py | 17 ++++++++++------- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/backend/engine/engine.py b/backend/engine/engine.py index 80d6d078..6c6b0c70 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -1053,7 +1053,9 @@ async def model_engine(body: PlanTriggerRequest): property_required_measures = [m for m in recommendations[p.id] if m[0]["type"] in body.required_measures] measures_to_optimise = [m for m in recommendations[p.id] if m[0]["type"] not in body.required_measures] - ventilation_included = "ventilation" in property_measure_types + ventilation_included = ( + "ventilation" in property_measure_types or "mechanical_ventilation" in property_measure_types + ) # If a measure requiring ventilation is selected, and the property does not have ventilation, we enfore # its inclusion @@ -1177,8 +1179,10 @@ async def model_engine(body: PlanTriggerRequest): recommendations=recommendations, selected=selected, ) - # Add best practice measures (ventilation/trickle vents) - selected = optimiser_functions.add_best_practice_measures(p.id, solution, recommendations, selected) + # Add best practice measures (ventilation/trickle vents) - pass needs_ventilation flag + selected = optimiser_functions.add_best_practice_measures( + p.id, solution, recommendations, selected, needs_ventilation + ) # Final flattening - we pass what the battery SAP score would be, regardless if the battery was selected recommendations[p.id] = optimiser_functions.flatten_recommendations_with_defaults( p.id, recommendations, selected, battery_sap_score diff --git a/recommendations/optimiser/optimiser_functions.py b/recommendations/optimiser/optimiser_functions.py index d704b3fb..e916f0fd 100644 --- a/recommendations/optimiser/optimiser_functions.py +++ b/recommendations/optimiser/optimiser_functions.py @@ -1,4 +1,5 @@ import pandas as pd +from typing import List, Dict, Any, Set import backend.app.assumptions as assumptions from backend.Property import Property from backend.app.plan.schemas import PlanTriggerRequest @@ -300,7 +301,13 @@ def add_required_measures(property_id, property_required_measures, recommendatio ] -def add_best_practice_measures(property_id, solution, recommendations, selected): +def add_best_practice_measures( + property_id: int, + solution: List[Dict[str, Any]], + recommendations: Dict[int, List[List[Dict[str, Any]]]], + selected: Set[str], + needs_ventilation: bool +): """ Ensures best-practice measures like ventilation and trickle vents are included in the selected recommendations when appropriate. @@ -320,6 +327,8 @@ def add_best_practice_measures(property_id, solution, recommendations, selected) All recommendations for all properties, keyed by property id. selected : set Set of already selected recommendation IDs. + needs_ventilation : bool + Whether the property requires mechanical ventilation to accompany certain measures. Returns ------- @@ -329,12 +338,6 @@ def add_best_practice_measures(property_id, solution, recommendations, selected) # Check if any selected measure requires ventilation ventilation_selected = [r for r in solution if "+mechanical_ventilation" in r["type"]] - # If ventilation has been selected, or one of the measures needs ventilation, we need to ensure ventilation is - # included - needs_ventilation = any( - x in [r["type"] for r in solution] for x in assumptions.measures_needing_ventilation - ) or len(ventilation_selected) > 0 - if needs_ventilation: ventilation_rec = next( (r[0] for r in recommendations[property_id] if r[0]["type"] == "mechanical_ventilation"), From c514ef53dc0bf1880e75c1586e0e45bac1d57d5e Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 23 Feb 2026 13:03:03 +0000 Subject: [PATCH 246/340] Add name to plan record and include in mapping --- backend/app/domain/classes/plan.py | 2 ++ backend/app/domain/records/plan_record.py | 1 + 2 files changed, 3 insertions(+) diff --git a/backend/app/domain/classes/plan.py b/backend/app/domain/classes/plan.py index 351ea512..e7455427 100644 --- a/backend/app/domain/classes/plan.py +++ b/backend/app/domain/classes/plan.py @@ -47,6 +47,7 @@ class Plan: valuation_increase=plan_model.valuation_increase, cost_of_works=plan_model.cost_of_works, contingency_cost=plan_model.contingency_cost, + name=plan_model.name, ) return cls(record=record, scenario=scenario, id=plan_model.id) @@ -137,6 +138,7 @@ class Plan: valuation_increase=record.valuation_increase, cost_of_works=record.cost_of_works, contingency_cost=record.contingency_cost, + name=record.name, ) return PlanPersistence(plan=plan_model, scenario=scenario_model) diff --git a/backend/app/domain/records/plan_record.py b/backend/app/domain/records/plan_record.py index 2df7a7c6..63a82993 100644 --- a/backend/app/domain/records/plan_record.py +++ b/backend/app/domain/records/plan_record.py @@ -29,3 +29,4 @@ class PlanRecord: valuation_increase: Optional[float] = None cost_of_works: Optional[float] = None contingency_cost: Optional[float] = None + name: Optional[str] = None From 84aef797355146a2b5901b59adcfa6be3688fa95 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 23 Feb 2026 13:35:31 +0000 Subject: [PATCH 247/340] Added tests for checking ventilation --- backend/engine/engine.py | 7 ++- .../optimiser/optimiser_functions.py | 23 +++++++ .../tests/test_optimiser_functions.py | 60 +++++++++++++++++++ 3 files changed, 87 insertions(+), 3 deletions(-) diff --git a/backend/engine/engine.py b/backend/engine/engine.py index 6c6b0c70..dd0aebe4 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -1060,9 +1060,10 @@ async def model_engine(body: PlanTriggerRequest): # If a measure requiring ventilation is selected, and the property does not have ventilation, we enfore # its inclusion - needs_ventilation = any( - x in property_measure_types for x in assumptions.measures_needing_ventilation - ) and not p.has_ventilation and ventilation_included + needs_ventilation = optimiser_functions.check_needs_ventilation( + property_measure_types, assumptions.measures_needing_ventilation, p.has_ventilation, + ventilation_included + ) if not measures_to_optimise: # Nothing to do, we just reshape the recommendations diff --git a/recommendations/optimiser/optimiser_functions.py b/recommendations/optimiser/optimiser_functions.py index e916f0fd..c17cdf1e 100644 --- a/recommendations/optimiser/optimiser_functions.py +++ b/recommendations/optimiser/optimiser_functions.py @@ -398,3 +398,26 @@ def flatten_recommendations_with_defaults(property_id, recommendations, selected # Flatten the nested list of lists into a single list return [rec for recommendations_by_type in final_recommendations for rec in recommendations_by_type] + + +def check_needs_ventilation( + property_measure_types: Set[str], + measures_needing_ventilation: List[str], + has_ventilation: bool, + ventilation_included: bool +) -> bool: + """ + Function to check if we need to include ventilation based on the measures selected and the property + features + :param property_measure_types: The set of measure types recommended for the property + :param measures_needing_ventilation: The set of measure types that require ventilation + :param has_ventilation: Whether the property currently has ventilation + :param ventilation_included: Whether ventilation is already included in the recommended measures + :return: Boolean indicating whether ventilation needs to be included in the recommendations + + # TODO - none of the inputs of this function are well structured and so this is quite brittle - we should + consider refactoring to make this more robust + """ + return any( + x in property_measure_types for x in measures_needing_ventilation + ) and not has_ventilation and ventilation_included diff --git a/recommendations/tests/test_optimiser_functions.py b/recommendations/tests/test_optimiser_functions.py index f0ca6dac..8f898970 100644 --- a/recommendations/tests/test_optimiser_functions.py +++ b/recommendations/tests/test_optimiser_functions.py @@ -510,3 +510,63 @@ class TestStrategicOptimiser: assert opt.strategy_used.value == "case_2_solve_max_gain_under_budget" assert opt.solution_cost == 7787.068 assert opt.solution_gain == 28.8 + + +class TestCheckNeedsVentilation: + + def measure_types_includes_ventilation_no_existing_ventilation(self): + property_measure_types = {'mechanical_ventilation', 'cavity_wall_insulation', 'suspended_floor_insulation', + 'secondary_heating', 'loft_insulation', 'heating', 'low_energy_lighting'} + + measures_needing_ventilation = ['internal_wall_insulation', 'external_wall_insulation', + 'cavity_wall_insulation'] + + has_ventilation = False + + ventilation_included = True + + result = optimiser_functions.check_needs_ventilation( + property_measure_types, measures_needing_ventilation, has_ventilation, + ventilation_included + ) + + assert result == True + + def measure_types_includes_ventilation_existing_ventilation(self): + property_measure_types = {'mechanical_ventilation', 'cavity_wall_insulation', 'suspended_floor_insulation', + 'secondary_heating', 'loft_insulation', 'heating', 'low_energy_lighting'} + + measures_needing_ventilation = ['internal_wall_insulation', 'external_wall_insulation', + 'cavity_wall_insulation'] + + has_ventilation = True + + ventilation_included = True + + result = optimiser_functions.check_needs_ventilation( + property_measure_types, measures_needing_ventilation, has_ventilation, + ventilation_included + ) + + assert result == False + + def measure_types_includes_ventilation_existing_ventilation(self): + property_measure_types_without_ventilation = { + 'cavity_wall_insulation', 'suspended_floor_insulation', + 'secondary_heating', 'loft_insulation', 'heating', + 'low_energy_lighting' + } + + measures_needing_ventilation = ['internal_wall_insulation', 'external_wall_insulation', + 'cavity_wall_insulation'] + + has_ventilation = False + + ventilation_included = True + + result = optimiser_functions.check_needs_ventilation( + property_measure_types_without_ventilation, measures_needing_ventilation, has_ventilation, + ventilation_included + ) + + assert result == False From 73928c67c587ce841ea7eb684a9767eed46b3b41 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 23 Feb 2026 13:37:17 +0000 Subject: [PATCH 248/340] added future todo for measure types --- backend/engine/engine.py | 1 + 1 file changed, 1 insertion(+) diff --git a/backend/engine/engine.py b/backend/engine/engine.py index dd0aebe4..101f6ada 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -1053,6 +1053,7 @@ async def model_engine(body: PlanTriggerRequest): property_required_measures = [m for m in recommendations[p.id] if m[0]["type"] in body.required_measures] measures_to_optimise = [m for m in recommendations[p.id] if m[0]["type"] not in body.required_measures] + # TODO - formalise property measure types into an enum ventilation_included = ( "ventilation" in property_measure_types or "mechanical_ventilation" in property_measure_types ) From 694717bd34a5e9aedaeca942abbf9905fcb81e2d Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 23 Feb 2026 14:14:07 +0000 Subject: [PATCH 249/340] addressing Dan's feedback --- recommendations/optimiser/optimiser_functions.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/recommendations/optimiser/optimiser_functions.py b/recommendations/optimiser/optimiser_functions.py index c17cdf1e..ab98113c 100644 --- a/recommendations/optimiser/optimiser_functions.py +++ b/recommendations/optimiser/optimiser_functions.py @@ -403,21 +403,25 @@ def flatten_recommendations_with_defaults(property_id, recommendations, selected def check_needs_ventilation( property_measure_types: Set[str], measures_needing_ventilation: List[str], - has_ventilation: bool, - ventilation_included: bool + property_already_has_ventilation: bool, + ventilation_in_included_measures: bool ) -> bool: """ Function to check if we need to include ventilation based on the measures selected and the property features :param property_measure_types: The set of measure types recommended for the property :param measures_needing_ventilation: The set of measure types that require ventilation - :param has_ventilation: Whether the property currently has ventilation - :param ventilation_included: Whether ventilation is already included in the recommended measures + :param property_already_has_ventilation: Whether the property currently has ventilation + :param ventilation_in_included_measures: Whether ventilation is already included in the recommended + measures :return: Boolean indicating whether ventilation needs to be included in the recommendations # TODO - none of the inputs of this function are well structured and so this is quite brittle - we should consider refactoring to make this more robust """ - return any( + + needs_ventilation = any( x in property_measure_types for x in measures_needing_ventilation - ) and not has_ventilation and ventilation_included + ) + + return needs_ventilation and not has_ventilation and ventilation_included From 9dda6fb434c4c13306924f7be16a17d82bdd0ddb Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 23 Feb 2026 15:33:32 +0000 Subject: [PATCH 250/340] fixed test and variable renames in function --- recommendations/optimiser/optimiser_functions.py | 2 +- recommendations/tests/test_optimiser_functions.py | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/recommendations/optimiser/optimiser_functions.py b/recommendations/optimiser/optimiser_functions.py index ab98113c..4b0d4b94 100644 --- a/recommendations/optimiser/optimiser_functions.py +++ b/recommendations/optimiser/optimiser_functions.py @@ -424,4 +424,4 @@ def check_needs_ventilation( x in property_measure_types for x in measures_needing_ventilation ) - return needs_ventilation and not has_ventilation and ventilation_included + return needs_ventilation and not property_already_has_ventilation and ventilation_in_included_measures diff --git a/recommendations/tests/test_optimiser_functions.py b/recommendations/tests/test_optimiser_functions.py index 8f898970..debd2d88 100644 --- a/recommendations/tests/test_optimiser_functions.py +++ b/recommendations/tests/test_optimiser_functions.py @@ -143,7 +143,9 @@ class TestAddBestPracticeMeasures: ] } selected = set() - updated = optimiser_functions.add_best_practice_measures(property_id, solution, recommendations, selected) + updated = optimiser_functions.add_best_practice_measures( + property_id, solution, recommendations, selected, True + ) assert "vent1" in updated assert "trickle1" in updated @@ -273,7 +275,7 @@ class TestIncreasingEpcE2e: total_optimised_gain = sum(m["gain"] for m in solution) assert total_optimised_gain == 17.6, "Total gain of optimised measures should meet or exceed target gain" - selected = optimiser_functions.add_best_practice_measures(p.id, solution, recommendations, selected) + selected = optimiser_functions.add_best_practice_measures(p.id, solution, recommendations, selected, False) # Flatten recommendations for output flattened = optimiser_functions.flatten_recommendations_with_defaults(p.id, recommendations, selected) From 2a4fb23f5fb6677adb1485ce24145ac16f6eaa23 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Mon, 23 Feb 2026 17:40:06 +0000 Subject: [PATCH 251/340] Define new plan route and modify trigger request object --- backend/app/plan/router.py | 36 ++++++++++++------- .../categorisation_trigger_request.py | 3 ++ 2 files changed, 27 insertions(+), 12 deletions(-) diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index ea41162f..4a1b90fa 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -12,6 +12,9 @@ from backend.app.dependencies import validate_token from backend.app.plan.schemas import PlanTriggerRequest from backend.app.config import get_settings from sqlalchemy.orm import sessionmaker +from backend.categorisation.categorisation_trigger_request import ( + CategorisationTriggerRequest, +) from utils.logger import setup_logger from backend.app.db.connection import db_engine @@ -24,7 +27,7 @@ router = APIRouter( prefix="/plan", tags=["plan"], dependencies=[Depends(validate_token)], - responses={404: {"description": "Not found"}} + responses={404: {"description": "Not found"}}, ) sqs_client = boto3.client("sqs") @@ -43,6 +46,13 @@ def db_session(): session.close() +@router.post("/categoisation", status_code=202) +async def trigger_categorisation(body: CategorisationTriggerRequest): + payload = CategorisationTriggerRequest.model_validate(body) + + logger.info("API triggered with body: %s", payload) + + @router.post("/trigger", status_code=202) async def trigger_plan_entrypoint(body: PlanTriggerRequest): """ @@ -59,7 +69,10 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest): return {"message": "Invalid request"}, 400 # If file_format is domna_asset_list and type is xlsx, read and chunk it - if data.get("file_format") == "domna_asset_list" and data.get("file_type") == "xlsx": + if ( + data.get("file_format") == "domna_asset_list" + and data.get("file_type") == "xlsx" + ): try: total_rows = data.get("sheet_count", 0) @@ -88,8 +101,8 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest): "patches_file_path": body.patches_file_path, "non_invasive_recommendations_file_path": body.non_invasive_recommendations_file_path, "exclusions": body.exclusions, - "multi_plan": body.multi_plan - } + "multi_plan": body.multi_plan, + }, ) # Insert the scenario ID into the data payload data["scenario_id"] = scenario_id @@ -99,7 +112,7 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest): task_source="backend/plan/router.py:trigger_plan_entrypoint", service="plan_engine", inputs=data, - task_only=True + task_only=True, ) subtask_interface = SubTaskInterface() @@ -109,13 +122,14 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest): index_end = min((i + 1) * chunk_size, total_rows) message_payload = { - **data, "index_start": index_start, "index_end": index_end, + **data, + "index_start": index_start, + "index_end": index_end, } # Create a subtask for this chunk subtask_id = subtask_interface.create_subtask( - task_id=task_id, - inputs=message_payload + task_id=task_id, inputs=message_payload ) # Add task and subtask to message @@ -125,8 +139,7 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest): message_body = json.dumps(message_payload) response = sqs_client.send_message( - QueueUrl=settings.ENGINE_SQS_URL, - MessageBody=message_body + QueueUrl=settings.ENGINE_SQS_URL, MessageBody=message_body ) logger.info( f"Chunk {i} sent to SQS. Rows {index_start}–{index_end}. Message ID: {response.get('MessageId')}" @@ -153,8 +166,7 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest): data["subtask_id"] = str(subtask_id) message_body = json.dumps(data) response = sqs_client.send_message( - QueueUrl=settings.ENGINE_SQS_URL, - MessageBody=message_body + QueueUrl=settings.ENGINE_SQS_URL, MessageBody=message_body ) logger.info(f"SQS message sent. Message ID: {response.get('MessageId')}") except Exception as e: diff --git a/backend/categorisation/categorisation_trigger_request.py b/backend/categorisation/categorisation_trigger_request.py index 44ac0ff1..4b1b6553 100644 --- a/backend/categorisation/categorisation_trigger_request.py +++ b/backend/categorisation/categorisation_trigger_request.py @@ -8,5 +8,8 @@ class CategorisationTriggerRequest(BaseModel): scenarios_to_consider: Optional[List[int]] = None scenario_priority_order: Optional[List[int]] = None + property_bucket_index: Optional[int] = None + num_property_buckets: Optional[int] = None + # {"portfolio_id": 556, "scenarios_to_consider": [1039,1041], "scenario_priority_order": [1041,1039]} From 40f3c36dbb78922c689b8be05dbfdba827c28e2b Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 23 Feb 2026 23:26:41 +0000 Subject: [PATCH 252/340] adding further tests for filtering phase adjustments --- backend/Property.py | 2 +- recommendations/Recommendations.py | 40 +++- recommendations/SecondaryHeating.py | 3 + recommendations/tests/test_recommendations.py | 194 +++++++++++++++++- 4 files changed, 228 insertions(+), 11 deletions(-) diff --git a/backend/Property.py b/backend/Property.py index 6a84fc09..f196f49b 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -490,7 +490,7 @@ class Property: for rec_id in rec_ids: sim_epc = self.simulation_epcs[rec_id].copy() rec_impact = [x for x in impact_summary if x["recommendation_id"] == rec_id][0] - # We update all of the features that should have an impact on the kwh model + # We update all features that should have an impact on the kwh model sim_epc.update( { diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py index acd49e05..5525b7a0 100644 --- a/recommendations/Recommendations.py +++ b/recommendations/Recommendations.py @@ -499,8 +499,16 @@ class Recommendations: return predicted_appliances_cost_reduction, predicted_appliances_kwh_reduction @staticmethod - def _check_ventilation_out_of_bounds(sap_impact, ventilation_sap_limit): - return (sap_impact < ventilation_sap_limit) or (sap_impact >= 0) + def _check_ventilation_out_of_bounds(sap_impact: float, ventilation_sap_limit: float) -> bool: + """ + Checks if the SAP impact of a ventilation recommendation is out of bounds, which would indicate that the + recommendation is not appropriate. + :param sap_impact: The SAP impact of the ventilation recommendation, which is typically negative or zero + :param ventilation_sap_limit: The SAP limit for ventilation recommendations, which is typically a negative + number. E.g. -4 + :return: + """ + return (sap_impact < ventilation_sap_limit) or (sap_impact > 0) @staticmethod def _adjust_ventilation_sap(sap_impact, ventilation_sap_limit): @@ -691,7 +699,8 @@ class Recommendations: previous_phase_values: dict, current_phase_values: dict, adjustments: list, - property_instance, + property_instance: Property, + model_predicted_sap: float, ): # For the moment, we cap the number of SAP points that can be achieved by LEDs at 2 if rec["type"] == "low_energy_lighting": @@ -785,7 +794,6 @@ class Recommendations: # Update the current phase values current_phase_values["sap"] = previous_phase_values["sap"] + property_phase_impact["sap"] - elif rec["type"] == "loft_insulation": # When we have a loft insulation recommendation, where there is an extension and the existing # amount of loft insulation is already good, we limit the SAP points @@ -831,6 +839,27 @@ class Recommendations: # Update the current phase values current_phase_values["sap"] = previous_phase_values["sap"] + property_phase_impact["sap"] + elif rec["measure_type"] in ["roomstat_programmer_trvs", "time_temperature_zone_control"]: + # We trim the SAP point recommendations based on the minimum of the predicted and the survey SAP + # points + predicted_difference = model_predicted_sap - previous_phase_values["sap_prediction"] + proposed_impact = property_phase_impact["sap"] + numerically_the_same = np.isclose(proposed_impact, predicted_difference) + + if predicted_difference > 0 and (predicted_difference < proposed_impact) and not numerically_the_same: + # We constrain the impact based on what the model predicts. + # We update the proposed impact to be the predicted difference + adjustments.append( + { + "recommendation_id": rec["recommendation_id"], + "phase": rec["phase"], + # If we've made an adjustment, it will be negative + "sap_adjustment": property_phase_impact["sap"] - predicted_difference, + } + ) + property_phase_impact["sap"] = predicted_difference + # Update the current phase values + current_phase_values["sap"] = previous_phase_values["sap"] + property_phase_impact["sap"] return property_phase_impact, current_phase_values, adjustments @@ -963,7 +992,8 @@ class Recommendations: previous_phase_values=previous_phase_values, current_phase_values=current_phase_values, adjustments=adjustments, - property_instance=property_instance + property_instance=property_instance, + model_predicted_sap=phase_energy_efficiency_metrics["sap_change"], ) # Insert this information into the recommendation. diff --git a/recommendations/SecondaryHeating.py b/recommendations/SecondaryHeating.py index ee7eae1c..ef0fc2d2 100644 --- a/recommendations/SecondaryHeating.py +++ b/recommendations/SecondaryHeating.py @@ -18,6 +18,9 @@ class SecondaryHeating: def recommend(self, phase: int): # Reset self.recommendation = [] + if self.property.epc_record.secondheat_description in ["None", None]: + # No secondary heating system, so no recommendation to remove it + return if self.property.data['number-habitable-rooms'] > self.property.data['number-heated-rooms']: n_rooms = self.property.data['number-habitable-rooms'] - self.property.data['number-heated-rooms'] diff --git a/recommendations/tests/test_recommendations.py b/recommendations/tests/test_recommendations.py index e3bcbb2f..747b0b2e 100644 --- a/recommendations/tests/test_recommendations.py +++ b/recommendations/tests/test_recommendations.py @@ -373,7 +373,7 @@ def test_filter_phase_adjustment(input_data, expected): "sap_impact, limit, expected", [ (1.0, -4, True), # positive SAP not allowed - (0.0, -4, True), # zero not allowed + (0.0, -4, False), # zero is allowed (-1.0, -4, False), # valid range (-3.9, -4, False), # valid range (-4.0, -4, False), # exact lower bound allowed @@ -1476,7 +1476,9 @@ def test_lighting_and_loft_adjustment_combined(property_instance, heat_demand_pr assert adjustments2 == [ {'recommendation_id': '0_phase=0', 'phase': 0, 'sap_adjustment': np.float64(1.7)}, - {'recommendation_id': '4_phase=2', 'phase': 2, 'sap_adjustment': np.float64(4.0)} + {'recommendation_id': '4_phase=2', 'phase': 2, 'sap_adjustment': np.float64(4.0)}, + {'recommendation_id': '5_phase=3', 'phase': 3, 'sap_adjustment': np.float64(1.0)}, + {'recommendation_id': '6_phase=3', 'phase': 3, 'sap_adjustment': np.float64(1.0000000000000027)} ] @@ -1499,7 +1501,8 @@ def test_mechanical_ventilation_sap_floor(property_instance): previous_phase_values=previous_phase_values, current_phase_values=current_phase_values, adjustments=adjustments, - property_instance=property_instance + property_instance=property_instance, + model_predicted_sap=0 ) ) @@ -1538,7 +1541,8 @@ def test_mechanical_ventilation_no_floor_adjustment(property_instance): previous_phase_values=previous_phase_values, current_phase_values=current_phase_values, adjustments=adjustments, - property_instance=property_instance + property_instance=property_instance, + model_predicted_sap=0 ) ) @@ -1570,7 +1574,8 @@ def test_mechanical_ventilation_exactly_one_no_adjustment(property_instance): previous_phase_values=previous_phase_values, current_phase_values=current_phase_values, adjustments=adjustments, - property_instance=property_instance + property_instance=property_instance, + model_predicted_sap=0 ) ) @@ -1578,3 +1583,182 @@ def test_mechanical_ventilation_exactly_one_no_adjustment(property_instance): assert updated_adjustments == [] assert updated_current["sap"] == 1.0 assert updated_impact["sap"] == -1.0 + + +def test_mechanical_ventilation_sap_zero_no_adjustment(property_instance): + # Test when SAP = 0 + rec = { + "type": "mechanical_ventilation", + "recommendation_id": "mv_test", + "phase": 1, + } + + previous_phase_values = {'phase': 0, 'representative': True, 'recommendation_id': '0_phase=0', + 'measure_type': 'flat_roof_insulation', 'sap': 68.0, 'carbon': np.float64(0.5), + 'heat_demand': np.float64(300.1), 'sap_prediction': np.float64(71.7)} + current_phase_values = {'sap': 68.0, 'carbon': np.float64(0.5), 'heat_demand': np.float64(307.0)} + property_phase_impact = {'sap': 0, 'carbon': 0, 'heat_demand': np.float64(-6.899999999999977)} + adjustments = [] + + updated_impact, updated_current, updated_adjustments = ( + Recommendations._apply_measure_specific_rules( + rec=rec, + property_phase_impact=property_phase_impact, + previous_phase_values=previous_phase_values, + current_phase_values=current_phase_values, + adjustments=adjustments, + property_instance=property_instance, + model_predicted_sap=0 + ) + ) + + # SAP is already at 0 → no adjustment expected + assert updated_adjustments == [] + assert updated_current["sap"] == 68.0 + assert updated_impact["sap"] == 0 + + +def test_mv_valid_negative_no_adjustment(property_instance): + rec = {"type": "mechanical_ventilation", "recommendation_id": "mv", "phase": 1} + + previous = {"sap": 70.0} + current = {"sap": 67.0} + impact = {"sap": -3.0, "carbon": 0, "heat_demand": 0} + adjustments = [] + + updated_impact, updated_current, updated_adjustments = ( + Recommendations._apply_measure_specific_rules( + rec, impact, previous, current, adjustments, property_instance, 0 + ) + ) + + assert updated_adjustments == [] + assert updated_current["sap"] == 67.0 + assert updated_impact["sap"] == -3.0 + + +def test_mv_zero_impact_allowed(property_instance): + rec = {"type": "mechanical_ventilation", "recommendation_id": "mv", "phase": 1} + + previous = {"sap": 68.0, "sap_prediction": 71.7} + current = {"sap": 68.0} + impact = {"sap": 0.0, "carbon": 0, "heat_demand": 0} + adjustments = [] + + updated_impact, updated_current, updated_adjustments = ( + Recommendations._apply_measure_specific_rules( + rec, impact, previous, current, adjustments, property_instance, 0 + ) + ) + + assert updated_adjustments == [] + assert updated_current["sap"] == 68.0 + assert updated_impact["sap"] == 0.0 + + +def test_mv_positive_impact_corrected(property_instance): + rec = {"type": "mechanical_ventilation", "recommendation_id": "mv", "phase": 1} + + previous = {"sap": 60.0} + current = {"sap": 61.0} + impact = {"sap": 1.0, "carbon": 0, "heat_demand": 0} + adjustments = [] + + updated_impact, updated_current, updated_adjustments = ( + Recommendations._apply_measure_specific_rules( + rec, impact, previous, current, adjustments, property_instance, 0 + ) + ) + + assert len(updated_adjustments) == 1 + assert updated_current["sap"] == previous["sap"] + updated_impact["sap"] + assert updated_impact["sap"] <= 0 + + +def test_mv_below_lower_bound_corrected(property_instance): + rec = {"type": "mechanical_ventilation", "recommendation_id": "mv", "phase": 1} + + previous = {"sap": 70.0} + current = {"sap": 64.0} + impact = {"sap": -6.0, "carbon": 0, "heat_demand": 0} + adjustments = [] + + updated_impact, updated_current, updated_adjustments = ( + Recommendations._apply_measure_specific_rules( + rec, impact, previous, current, adjustments, property_instance, 0 + ) + ) + + assert len(updated_adjustments) == 1 + assert updated_impact["sap"] >= -4 + + +def test_mv_floor_triggered(property_instance): + rec = {"type": "mechanical_ventilation", "recommendation_id": "mv", "phase": 1} + + previous = {"sap": 2.0} + current = {"sap": 0.5} + impact = {"sap": -1.5, "carbon": 0, "heat_demand": 0} + adjustments = [] + + updated_impact, updated_current, updated_adjustments = ( + Recommendations._apply_measure_specific_rules( + rec, impact, previous, current, adjustments, property_instance, 0 + ) + ) + + assert updated_current["sap"] == 1.0 + assert updated_adjustments[0]["sap_adjustment"] > 0 + + +def test_mv_exactly_one_no_floor(property_instance): + rec = {"type": "mechanical_ventilation", "recommendation_id": "mv", "phase": 1} + + previous = {"sap": 2.0} + current = {"sap": 1.0} + impact = {"sap": -1.0, "carbon": 0, "heat_demand": 0} + adjustments = [] + + updated_impact, updated_current, updated_adjustments = ( + Recommendations._apply_measure_specific_rules( + rec, impact, previous, current, adjustments, property_instance, 0 + ) + ) + + assert updated_adjustments == [] + assert updated_current["sap"] == 1.0 + + +def test_lighting_no_cap(property_instance): + rec = {"type": "low_energy_lighting", "recommendation_id": "led", "phase": 1, + "co2_equivalent_savings": 0} + + previous = {"sap": 60.0, "carbon": 2.0} + current = {"sap": 61.0, "carbon": 2.0} + impact = {"sap": 1.0, "carbon": 0, "heat_demand": 0} + adjustments = [] + + updated_impact, updated_current, updated_adjustments = ( + Recommendations._apply_measure_specific_rules( + rec, impact, previous, current, adjustments, property_instance, 0 + ) + ) + + assert updated_adjustments == [] + + +def test_filter_phase_adjustments(): + example_adjustments = [ + {'recommendation_id': '0_phase=0', 'phase': 0, 'sap_adjustment': np.float64(1.7)}, + {'recommendation_id': '4_phase=2', 'phase': 2, 'sap_adjustment': np.float64(4.0)}, + {'recommendation_id': '5_phase=3', 'phase': 3, 'sap_adjustment': np.float64(1.0)}, + {'recommendation_id': '6_phase=3', 'phase': 3, 'sap_adjustment': np.float64(1.0000000000000027)} + ] + + res = Recommendations._filter_phase_adjustment(example_adjustments) + + assert res == [ + {'recommendation_id': '0_phase=0', 'phase': 0, 'sap_adjustment': np.float64(1.7)}, + {'recommendation_id': '4_phase=2', 'phase': 2, 'sap_adjustment': np.float64(4.0)}, + {'recommendation_id': '6_phase=3', 'phase': 3, 'sap_adjustment': np.float64(1.0000000000000027)} + ] From b14c81fa8334b38baee6c5540cbfdec808483ccb Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 24 Feb 2026 00:32:03 +0000 Subject: [PATCH 253/340] allow slightly negative impact on cost savings --- recommendations/optimiser/optimiser_functions.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/recommendations/optimiser/optimiser_functions.py b/recommendations/optimiser/optimiser_functions.py index 4b0d4b94..a5cbf90d 100644 --- a/recommendations/optimiser/optimiser_functions.py +++ b/recommendations/optimiser/optimiser_functions.py @@ -79,14 +79,14 @@ def prepare_input_measures( # if recs[0]["type"] == "solar_pv": # recs = [r for r in recs if ~r["has_battery"]] - # Only include measures with non-negative cost savings + # Only include measures with non-negative cost savings - we allow for a minor negative impact if eco_measures: recs_to_append = [ - rec for rec in recs if (rec["energy_cost_savings"] >= 0) or (rec["measure_type"] in eco_measures) + rec for rec in recs if (rec["energy_cost_savings"] >= -10) or (rec["measure_type"] in eco_measures) ] else: recs_to_append = [ - rec for rec in recs if (rec["energy_cost_savings"] >= 0) + rec for rec in recs if (rec["energy_cost_savings"] >= -10) ] if not recs_to_append: continue From 5646376d1e564bb92b9e67e40828554d5cb693db Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 24 Feb 2026 09:59:42 +0000 Subject: [PATCH 254/340] pass min and max IDs to consider to processor and db functions --- .../db/functions/recommendations_functions.py | 77 ++++++++++++++++--- backend/app/plan/router.py | 58 ++++++++++---- .../categorisation_trigger_request.py | 4 +- backend/categorisation/handler/handler.py | 2 + backend/categorisation/processor.py | 29 +++++-- 5 files changed, 135 insertions(+), 35 deletions(-) diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py index 09d6da83..ed3fb435 100644 --- a/backend/app/db/functions/recommendations_functions.py +++ b/backend/app/db/functions/recommendations_functions.py @@ -1,5 +1,14 @@ -from typing import Any, Dict, List, Tuple -from sqlalchemy import inspect, text, insert, delete, select +from typing import Any, Dict, List, Optional +from sqlalchemy import ( + ColumnElement, + and_, + func, + inspect, + text, + insert, + delete, + select, +) from sqlalchemy.orm import Session, Mapper from sqlalchemy.exc import SQLAlchemyError from sqlmodel import Session @@ -625,11 +634,22 @@ def get_plans_by_scenario_ids(ids: List[int]) -> List[PlanModel]: return session_any.exec(stmt).scalars().all() -def get_most_recent_plans_by_portfolio_id(portfolio_id: int) -> List[PlanModel]: +def get_most_recent_plans_by_portfolio_id( + portfolio_id: int, + min_property_id: Optional[int] = None, + max_property_id: Optional[int] = None, +) -> List[PlanModel]: + filters = [PlanModel.portfolio_id == portfolio_id] + + if min_property_id is not None: + filters.append(PlanModel.property_id >= min_property_id) + if max_property_id is not None: + filters.append(PlanModel.property_id <= max_property_id) + # NOTE: This statement works for Postgres only, because of the Distinct stmt = ( select(PlanModel) - .where(PlanModel.portfolio_id == portfolio_id) + .where(and_(*filters)) .distinct( PlanModel.property_id, PlanModel.scenario_id ) # one plan per property per scenario @@ -645,11 +665,27 @@ def get_most_recent_plans_by_portfolio_id(portfolio_id: int) -> List[PlanModel]: return session_any.exec(stmt).scalars().all() -def get_most_recent_plans_by_scenario_ids(scenario_ids: List[int]) -> List[PlanModel]: +def get_most_recent_plans_by_scenario_ids( + scenario_ids: List[int], + min_property_id: Optional[int] = None, + max_property_id: Optional[int] = None, +) -> List[PlanModel]: + if not scenario_ids: + return [] + + # Base filter: scenario_id in provided list + filters: List[ColumnElement[bool]] = [PlanModel.scenario_id.in_(scenario_ids)] + + # Add optional property ID range filters + if min_property_id is not None: + filters.append(PlanModel.property_id >= min_property_id) + if max_property_id is not None: + filters.append(PlanModel.property_id <= max_property_id) + # NOTE: This statement works for Postgres only, because of the Distinct stmt = ( select(PlanModel) - .where(PlanModel.scenario_id.in_(scenario_ids)) + .where(and_(*filters)) .distinct( PlanModel.property_id, PlanModel.scenario_id ) # one plan per property per scenario @@ -673,16 +709,37 @@ def get_scenarios_by_portfolio_id(portfolio_id: int) -> List[ScenarioModel]: return session_any.exec(stmt).scalars().all() +def get_scenarios_count_by_portfolio_id(portfolio_id: int) -> int: + stmt = ( + select(func.count()) + .select_from(ScenarioModel) + .where(ScenarioModel.portfolio_id == portfolio_id) + ) + with db_read_session() as session: + session_any: Any = session # Typehint as Any to satisfy Pylance... + return session_any.exec(stmt).scalar_one() + + def get_default_plans( portfolio_id: int, + min_property_id: Optional[int] = None, + max_property_id: Optional[int] = None, ) -> List[PlanModel]: - plan_stmt = select(PlanModel).where( - (PlanModel.portfolio_id == portfolio_id) & (PlanModel.is_default == True) - ) + filters: List[ColumnElement[bool]] = [ + PlanModel.portfolio_id == portfolio_id, + PlanModel.is_default.is_(True), + ] + + if min_property_id is not None: + filters.append(PlanModel.property_id >= min_property_id) + if max_property_id is not None: + filters.append(PlanModel.property_id <= max_property_id) + + stmt = select(PlanModel).where(and_(*filters)) with db_read_session() as session: session_any: Any = session # Typehint as Any to satisfy Pylance... - plans: List[PlanModel] = session_any.exec(plan_stmt).scalars().all() + plans: List[PlanModel] = session_any.exec(stmt).scalars().all() return plans diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 4a1b90fa..e9c06e40 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -1,3 +1,5 @@ +from typing import List + import boto3 import json import math @@ -18,7 +20,11 @@ from backend.categorisation.categorisation_trigger_request import ( from utils.logger import setup_logger from backend.app.db.connection import db_engine -from backend.app.db.functions.recommendations_functions import create_scenario +from backend.app.db.functions.recommendations_functions import ( + create_scenario, + get_property_ids, + get_scenarios_count_by_portfolio_id, +) from backend.app.db.functions.tasks.Tasks import TasksInterface, SubTaskInterface logger = setup_logger() @@ -33,25 +39,43 @@ router = APIRouter( sqs_client = boto3.client("sqs") -@contextmanager -def db_session(): - session = Session(db_engine) - try: - yield session - session.commit() - except Exception: - session.rollback() - raise - finally: - session.close() - - -@router.post("/categoisation", status_code=202) -async def trigger_categorisation(body: CategorisationTriggerRequest): - payload = CategorisationTriggerRequest.model_validate(body) +@router.post("/categorisation", status_code=202) +async def trigger_categorisation( + body: CategorisationTriggerRequest, +) -> dict[str, int]: + payload: CategorisationTriggerRequest = CategorisationTriggerRequest.model_validate( + body + ) logger.info("API triggered with body: %s", payload) + property_ids: List[int] = get_property_ids(payload.portfolio_id) + property_ids.sort() + + num_scenarios: int = get_scenarios_count_by_portfolio_id(payload.portfolio_id) + batch_size: int = math.ceil(1000 / num_scenarios) + num_property_buckets: int = max(1, math.ceil(len(property_ids) / batch_size)) + + bucket_requests: List[CategorisationTriggerRequest] = [] + + for bucket_index in range(num_property_buckets): + bucket_property_ids: List[int] = [ + pid for pid in property_ids if pid % num_property_buckets == bucket_index + ] + bucket_request: CategorisationTriggerRequest = CategorisationTriggerRequest( + portfolio_id=payload.portfolio_id, + scenarios_to_consider=payload.scenarios_to_consider, + scenario_priority_order=payload.scenario_priority_order, + min_property_id=min(bucket_property_ids), + max_property_id=max(bucket_property_ids), + ) + + bucket_requests.append(bucket_request) + + # Dispatch requests to lambdas + + return {"num_buckets": len(bucket_requests)} + @router.post("/trigger", status_code=202) async def trigger_plan_entrypoint(body: PlanTriggerRequest): diff --git a/backend/categorisation/categorisation_trigger_request.py b/backend/categorisation/categorisation_trigger_request.py index 4b1b6553..6a0c872c 100644 --- a/backend/categorisation/categorisation_trigger_request.py +++ b/backend/categorisation/categorisation_trigger_request.py @@ -8,8 +8,8 @@ class CategorisationTriggerRequest(BaseModel): scenarios_to_consider: Optional[List[int]] = None scenario_priority_order: Optional[List[int]] = None - property_bucket_index: Optional[int] = None - num_property_buckets: Optional[int] = None + min_property_id: Optional[int] = None + max_property_id: Optional[int] = None # {"portfolio_id": 556, "scenarios_to_consider": [1039,1041], "scenario_priority_order": [1041,1039]} diff --git a/backend/categorisation/handler/handler.py b/backend/categorisation/handler/handler.py index 9fb235d5..fea62342 100644 --- a/backend/categorisation/handler/handler.py +++ b/backend/categorisation/handler/handler.py @@ -29,6 +29,8 @@ def handler(event: Mapping[str, Any], context: Any) -> None: payload.portfolio_id, payload.scenarios_to_consider, payload.scenario_priority_order, + payload.min_property_id, + payload.max_property_id, ) except Exception as e: diff --git a/backend/categorisation/processor.py b/backend/categorisation/processor.py index 09db2983..00c20ec1 100644 --- a/backend/categorisation/processor.py +++ b/backend/categorisation/processor.py @@ -20,6 +20,8 @@ def process_portfolio( portfolio_id: int, scenarios_to_consider: Optional[List[int]] = None, scenario_priority_order: Optional[List[int]] = None, + min_property_id: Optional[int] = None, + max_property_id: Optional[int] = None, ) -> None: # TODO: make this a class logger.info(f"Processing portfolio {portfolio_id}") @@ -34,14 +36,20 @@ def process_portfolio( # first get all plans that we're interested in plans_for_consideration: List[Plan] = _load_plans_for_portfolio( - portfolio_id, all_scenarios, scenarios_to_consider + portfolio_id, + all_scenarios, + scenarios_to_consider, + min_property_id, + max_property_id, ) for plan in plans_for_consideration: if plan.id is not None: # just in case plans_by_id[plan.id] = plan # then unset existing defaults on domain objects regardless of whether they're under consideration or not - default_plans: List[Plan] = _get_default_plans(portfolio_id, all_scenarios) + default_plans: List[Plan] = _get_default_plans( + portfolio_id, all_scenarios, min_property_id, max_property_id + ) for plan in default_plans: plan.set_default(False) if plan.id is not None: # just in case @@ -109,8 +117,15 @@ def choose_cheapest_relevant_plan( return cheapest_plans[0] -def _get_default_plans(portfolio_id: int, scenarios: List[Scenario]) -> List[Plan]: - default_plan_models = get_default_plans(portfolio_id) +def _get_default_plans( + portfolio_id: int, + scenarios: List[Scenario], + min_property_id: Optional[int] = None, + max_property_id: Optional[int] = None, +) -> List[Plan]: + default_plan_models = get_default_plans( + portfolio_id, min_property_id, max_property_id + ) scenario_map = {s.id: s for s in scenarios} @@ -131,12 +146,14 @@ def _load_plans_for_portfolio( portfolio_id: int, all_scenarios: List[Scenario], scenarios_to_consider: Optional[List[int]] = None, + min_property_id: Optional[int] = None, + max_property_id: Optional[int] = None, ) -> List[Plan]: if scenarios_to_consider: logger.info(f"Getting plans for {len(scenarios_to_consider)} scenarios") plan_models: List[PlanModel] = get_most_recent_plans_by_scenario_ids( - scenarios_to_consider + scenarios_to_consider, min_property_id, max_property_id ) logger.info(f"Got {len(plan_models)} plan models from database") else: @@ -144,7 +161,7 @@ def _load_plans_for_portfolio( f"No list of Plans to consider provided. Getting all Plans for portfolio {portfolio_id}" ) plan_models: List[PlanModel] = get_most_recent_plans_by_portfolio_id( - portfolio_id + portfolio_id, min_property_id, max_property_id ) plans: List[Plan] = [] From 76b648c861b7f26ab8af8c7d411015cdc09934b3 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 24 Feb 2026 14:33:29 +0000 Subject: [PATCH 255/340] implement trigger_categorisation API --- backend/app/config.py | 11 +++++-- backend/app/db/functions/tasks/Tasks.py | 13 +++++--- backend/app/plan/router.py | 36 +++++++++++++++++----- backend/app/tasks/router.py | 41 ++++++++++++++----------- 4 files changed, 67 insertions(+), 34 deletions(-) diff --git a/backend/app/config.py b/backend/app/config.py index feb312b4..22e4e302 100644 --- a/backend/app/config.py +++ b/backend/app/config.py @@ -1,5 +1,6 @@ import os from functools import lru_cache +from pathlib import Path from pydantic_settings import BaseSettings, SettingsConfigDict from typing import Optional @@ -8,12 +9,16 @@ def resolve_env_file() -> Optional[str]: env = os.getenv("ENVIRONMENT", "local") if env == "local": - return "backend/.env" + env_file: Path = Path("backend/.env").resolve() # resolve to full path + print("USING ENV FILE:", env_file) + return str(env_file) if env == "test": - return "backend/.env.test" + env_file: Path = Path("backend/.env.test").resolve() + print("USING ENV FILE:", env_file) + return str(env_file) - # prod = no env file + print("NO ENV FILE") return None diff --git a/backend/app/db/functions/tasks/Tasks.py b/backend/app/db/functions/tasks/Tasks.py index d1ab9536..13229447 100644 --- a/backend/app/db/functions/tasks/Tasks.py +++ b/backend/app/db/functions/tasks/Tasks.py @@ -25,7 +25,12 @@ class SubTaskInterface: # -------------------------------------------------------- # CREATE SUBTASK # -------------------------------------------------------- - def create_subtask(self, task_id: UUID, inputs: Optional[Dict[str, Any]] = None, status=None): + def create_subtask( + self, + task_id: UUID, + inputs: Optional[Dict[str, Any]] = None, + status: Optional[str] = None, + ): now = datetime.now(timezone.utc) with get_db_session() as session: @@ -177,9 +182,7 @@ class SubTaskInterface: if not task: return - subtasks = session.exec( - select(SubTask).where(SubTask.task_id == task_id) - ).all() + subtasks = session.exec(select(SubTask).where(SubTask.task_id == task_id)).all() statuses = [s.status.lower() for s in subtasks] now = datetime.now(timezone.utc) @@ -211,7 +214,7 @@ class SubTaskInterface: subtask_id: UUID, status: str, outputs: Optional[Dict[str, Any]], - cloud_logs_url: Optional[str] + cloud_logs_url: Optional[str], ): now = datetime.now(timezone.utc) diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index e9c06e40..cdf2873d 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -1,4 +1,5 @@ from typing import List +from uuid import UUID import boto3 import json @@ -36,13 +37,14 @@ router = APIRouter( responses={404: {"description": "Not found"}}, ) -sqs_client = boto3.client("sqs") +settings = get_settings() +sqs_client = boto3.client("sqs", settings.AWS_DEFAULT_REGION) @router.post("/categorisation", status_code=202) async def trigger_categorisation( body: CategorisationTriggerRequest, -) -> dict[str, int]: +) -> dict[str, str]: payload: CategorisationTriggerRequest = CategorisationTriggerRequest.model_validate( body ) @@ -56,7 +58,16 @@ async def trigger_categorisation( batch_size: int = math.ceil(1000 / num_scenarios) num_property_buckets: int = max(1, math.ceil(len(property_ids) / batch_size)) - bucket_requests: List[CategorisationTriggerRequest] = [] + # Create task + task_id, _ = TasksInterface.create_task( + task_source="backend/plan/router.py:trigger_categorisation", + service="plan_engine", + inputs=payload.model_dump(), + task_only=True, + ) + + # Dispatch requests to lambdas + subtask_interface = SubTaskInterface() for bucket_index in range(num_property_buckets): bucket_property_ids: List[int] = [ @@ -69,12 +80,23 @@ async def trigger_categorisation( min_property_id=min(bucket_property_ids), max_property_id=max(bucket_property_ids), ) + # Create sub-task for each + subtask_id: UUID = subtask_interface.create_subtask( + task_id=task_id, inputs=bucket_request.model_dump() + ) - bucket_requests.append(bucket_request) + response = sqs_client.send_message( + QueueUrl="categorisation-queue-dev", + MessageBody=bucket_request.model_dump_json(), + ) - # Dispatch requests to lambdas + logger.info( + f"Chunk {bucket_index} sent to SQS. Property IDs {min(bucket_property_ids)}–{max(bucket_property_ids)}. Message ID: {response.get('MessageId')}" + ) - return {"num_buckets": len(bucket_requests)} + await asyncio.sleep(0.05) # Small delay to avoid SQS throttling + + return {"message": "Categorisation jobs distributed"} @router.post("/trigger", status_code=202) @@ -84,8 +106,6 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest): """ logger.info("API triggered with body: %s", body) - settings = get_settings() - try: data = body.model_dump() except Exception as e: diff --git a/backend/app/tasks/router.py b/backend/app/tasks/router.py index 90b62dd1..1c266f2c 100644 --- a/backend/app/tasks/router.py +++ b/backend/app/tasks/router.py @@ -9,7 +9,7 @@ from backend.app.tasks.schema import ( CreateSubTaskRequest, UpdateSubTaskStatusRequest, FinalizeSubTaskRequest, - TaskSqsTriggerRequest + TaskSqsTriggerRequest, ) # Correct location of interfaces @@ -51,18 +51,18 @@ async def get_task(task_id: UUID): if not task: raise HTTPException(status_code=404, detail="Task not found") - subtasks = session.exec( - select(SubTask).where(SubTask.taskId == task_id) - ).all() + subtasks = session.exec(select(SubTask).where(SubTask.taskId == task_id)).all() formatted = [] for st in subtasks: - formatted.append({ - **st.dict(), - "inputs": json.loads(st.inputs) if st.inputs else None, - "outputs": json.loads(st.outputs) if st.outputs else None, - "cloud_logs_url": st.cloudLogsURL, - }) + formatted.append( + { + **st.dict(), + "inputs": json.loads(st.inputs) if st.inputs else None, + "outputs": json.loads(st.outputs) if st.outputs else None, + "cloud_logs_url": st.cloudLogsURL, + } + ) return { "task": task, @@ -111,7 +111,10 @@ async def update_subtask_status(subtask_id: UUID, req: UpdateSubTaskStatusReques # === # Sub task is complete -@router.post("/subtask/{subtask_id}/finalize", summary="Finalize a subtask with status, outputs, logs") +@router.post( + "/subtask/{subtask_id}/finalize", + summary="Finalize a subtask with status, outputs, logs", +) async def finalize_subtask(subtask_id: UUID, req: FinalizeSubTaskRequest): subtasks = SubTaskInterface() @@ -120,7 +123,7 @@ async def finalize_subtask(subtask_id: UUID, req: FinalizeSubTaskRequest): subtask_id=subtask_id, status=req.status, outputs=req.outputs, - cloud_logs_url=req.cloud_logs_url + cloud_logs_url=req.cloud_logs_url, ) return { @@ -142,9 +145,10 @@ from backend.app.tasks.schema import TaskSqsTriggerRequest from backend.app.db.functions.tasks.Tasks import TasksInterface, SubTaskInterface from backend.app.config import get_settings -sqs = boto3.client("sqs") -@router.post("/trigger", summary="Create task + subtask and publish to SQS", status_code=202) +@router.post( + "/trigger", summary="Create task + subtask and publish to SQS", status_code=202 +) async def trigger_task(req: TaskSqsTriggerRequest): """ Creates a Task + SubTask, then pushes the SubTask into SQS so a Lambda can process it. @@ -152,11 +156,12 @@ async def trigger_task(req: TaskSqsTriggerRequest): """ settings = get_settings() + sqs = boto3.client("sqs", settings.AWS_DEFAULT_REGION) tasks = TasksInterface() # ---- Normalize empty inputs ---- - inputs = req.inputs or {} # ensures {} even if null + inputs = req.inputs or {} # ensures {} even if null # ---- 1. Create Task + SubTask ---- task_id, subtask_id = tasks.create_task( @@ -174,8 +179,8 @@ async def trigger_task(req: TaskSqsTriggerRequest): try: response = sqs.send_message( QueueUrl=f"https://sqs.{settings.AWS_REGION}.amazonaws.com/" - f"{settings.AWS_ACCOUNT_ID}/lambda-example-queue", - MessageBody=json.dumps(sqs_payload) + f"{settings.AWS_ACCOUNT_ID}/lambda-example-queue", + MessageBody=json.dumps(sqs_payload), ) except Exception as e: raise HTTPException(status_code=500, detail=f"SQS error: {e}") @@ -186,4 +191,4 @@ async def trigger_task(req: TaskSqsTriggerRequest): "subtask_id": subtask_id, "sqs_message_id": response.get("MessageId"), "inputs_sent": inputs, - } \ No newline at end of file + } From 2ffd09bdd2668213a83f5dfa84bc8544e7f5e135 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 24 Feb 2026 15:28:18 +0000 Subject: [PATCH 256/340] update subtask when processor completes --- .devcontainer/backend/requirements.txt | 3 +- .../categorisation_trigger_request.py | 3 ++ backend/categorisation/handler/handler.py | 8 +---- backend/categorisation/local_runner.py | 11 +++++-- backend/categorisation/processor.py | 30 +++++++++++++++---- 5 files changed, 39 insertions(+), 16 deletions(-) diff --git a/.devcontainer/backend/requirements.txt b/.devcontainer/backend/requirements.txt index 9814c8d4..c84332dd 100644 --- a/.devcontainer/backend/requirements.txt +++ b/.devcontainer/backend/requirements.txt @@ -19,4 +19,5 @@ pytest==9.0.2 pytest-cov==7.0.0 ipykernel>=6.25,<7 # Formatting -black==26.1.0 \ No newline at end of file +black==26.1.0 +boto3-stubs \ No newline at end of file diff --git a/backend/categorisation/categorisation_trigger_request.py b/backend/categorisation/categorisation_trigger_request.py index 6a0c872c..17a5d916 100644 --- a/backend/categorisation/categorisation_trigger_request.py +++ b/backend/categorisation/categorisation_trigger_request.py @@ -11,5 +11,8 @@ class CategorisationTriggerRequest(BaseModel): min_property_id: Optional[int] = None max_property_id: Optional[int] = None + task_id: Optional[str] = None + subtask_id: Optional[str] = None + # {"portfolio_id": 556, "scenarios_to_consider": [1039,1041], "scenario_priority_order": [1041,1039]} diff --git a/backend/categorisation/handler/handler.py b/backend/categorisation/handler/handler.py index fea62342..eb532624 100644 --- a/backend/categorisation/handler/handler.py +++ b/backend/categorisation/handler/handler.py @@ -25,13 +25,7 @@ def handler(event: Mapping[str, Any], context: Any) -> None: logger.debug("Successfully validated request body") - process_portfolio( - payload.portfolio_id, - payload.scenarios_to_consider, - payload.scenario_priority_order, - payload.min_property_id, - payload.max_property_id, - ) + process_portfolio(payload) except Exception as e: logger.info("Handler exception") diff --git a/backend/categorisation/local_runner.py b/backend/categorisation/local_runner.py index 7de55bc0..384ce5ef 100644 --- a/backend/categorisation/local_runner.py +++ b/backend/categorisation/local_runner.py @@ -1,5 +1,8 @@ from typing import List +from backend.categorisation.categorisation_trigger_request import ( + CategorisationTriggerRequest, +) from backend.categorisation.processor import process_portfolio @@ -9,9 +12,11 @@ def main() -> None: scenario_priority_order: List[int] = [] process_portfolio( - portfolio_id=portfolio_id, - scenarios_to_consider=scenarios_to_consider, - scenario_priority_order=scenario_priority_order, + CategorisationTriggerRequest( + portfolio_id=portfolio_id, + scenarios_to_consider=scenarios_to_consider, + scenario_priority_order=scenario_priority_order, + ) ) diff --git a/backend/categorisation/processor.py b/backend/categorisation/processor.py index 00c20ec1..7a7d48ca 100644 --- a/backend/categorisation/processor.py +++ b/backend/categorisation/processor.py @@ -1,5 +1,7 @@ +import time from collections import defaultdict from typing import Dict, List, Optional +from uuid import UUID from backend.app.db.functions.recommendations_functions import ( bulk_update_plans, @@ -8,22 +10,31 @@ from backend.app.db.functions.recommendations_functions import ( get_most_recent_plans_by_scenario_ids, get_scenarios_by_portfolio_id, ) +from backend.app.db.functions.tasks.Tasks import SubTaskInterface from backend.app.db.models.recommendations import PlanModel, ScenarioModel from backend.app.domain.classes.plan import Plan from backend.app.domain.classes.scenario import Scenario +from backend.app.plan.utils import build_cloudwatch_log_url +from backend.categorisation.categorisation_trigger_request import ( + CategorisationTriggerRequest, +) from utils.logger import setup_logger logger = setup_logger() def process_portfolio( - portfolio_id: int, - scenarios_to_consider: Optional[List[int]] = None, - scenario_priority_order: Optional[List[int]] = None, - min_property_id: Optional[int] = None, - max_property_id: Optional[int] = None, + body: CategorisationTriggerRequest, ) -> None: # TODO: make this a class + portfolio_id: int = body.portfolio_id + scenarios_to_consider: Optional[List[int]] = body.scenarios_to_consider + scenario_priority_order: Optional[List[int]] = body.scenario_priority_order + min_property_id: Optional[int] = body.min_property_id + max_property_id: Optional[int] = body.max_property_id + subtask_id: Optional[str] = body.subtask_id + logger.info(f"Processing portfolio {portfolio_id}") + start_ms = int(time.time() * 1000) all_scenarios: List[Scenario] = _load_scenarios_for_portfolio(portfolio_id) plans_by_id: Dict[int, Plan] = {} # TODO: make this an in-memory repository class @@ -85,6 +96,15 @@ def process_portfolio( _update_plans_in_db(list(plans_by_id.values())) logger.info(f"Successfully updated {len(plans_by_id)} Plans in database") + # Mark the subtask as successful + if subtask_id: + cloud_logs_url = build_cloudwatch_log_url(start_ms) + SubTaskInterface().update_subtask_status( + subtask_id=UUID(subtask_id), + status="complete", + cloud_logs_url=cloud_logs_url, + ) + def choose_cheapest_relevant_plan( plans: List[Plan], scenario_priority_order: Optional[List[int]] = None From 1c8be836ab2d15a713deb697f537e9ecf8de469b Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 24 Feb 2026 16:00:12 +0000 Subject: [PATCH 257/340] FIx imports in dockerfile --- backend/categorisation/handler/Dockerfile | 21 +++++++++++-------- .../categorisation/handler/requirements.txt | 6 +++++- 2 files changed, 17 insertions(+), 10 deletions(-) diff --git a/backend/categorisation/handler/Dockerfile b/backend/categorisation/handler/Dockerfile index 7811ee4a..ce08ba96 100644 --- a/backend/categorisation/handler/Dockerfile +++ b/backend/categorisation/handler/Dockerfile @@ -29,19 +29,22 @@ RUN pip install --no-cache-dir -r requirements.txt # Copy application code # ----------------------------- COPY utils/ utils/ -COPY backend/categorisation/ backend/categorisation/ -COPY backend/app/db/ backend/app/db/ -COPY backend/app/domain/ backend/app/domain/ -COPY backend/addresses/ backend/addresses/ +COPY backend/ backend/ COPY datatypes/ datatypes/ +# COPY backend/categorisation/ backend/categorisation/ +# COPY backend/app/db/ backend/app/db/ +# COPY backend/app/domain/ backend/app/domain/ +# COPY backend/addresses/ backend/addresses/ -COPY backend/app/db/connection.py backend/app/db/connection.py +# COPY backend/app/db/connection.py backend/app/db/connection.py -COPY backend/app/config.py backend/app/config.py -COPY backend/app/utils.py backend/app/utils.py +# COPY backend/app/config.py backend/app/config.py +# COPY backend/app/utils.py backend/app/utils.py +# COPY backend/app/plan/utils.py backend/app/plan/utils.py -COPY backend/__init__.py backend/__init__.py -COPY backend/app/__init__.py backend/app/__init__.py +# COPY backend/__init__.py backend/__init__.py +# COPY backend/app/__init__.py backend/app/__init__.py +# COPY backend/app/plan/__init__.py backend/app/plan/__init__.py # ----------------------------- diff --git a/backend/categorisation/handler/requirements.txt b/backend/categorisation/handler/requirements.txt index e277b094..cbc2687a 100644 --- a/backend/categorisation/handler/requirements.txt +++ b/backend/categorisation/handler/requirements.txt @@ -3,4 +3,8 @@ pydantic-settings psycopg2-binary==2.9.10 # Not used but needed to satisfy imports -pytz==2024.2 \ No newline at end of file +pytz==2024.2 +msgpack==1.1.0 +numpy<2 +pandas==2.2.3 +starlette \ No newline at end of file From b9640d189450fd06d6c7fba5e18f80b93eaad69b Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 24 Feb 2026 16:10:10 +0000 Subject: [PATCH 258/340] delete commented out lines from dockerfile --- backend/categorisation/handler/Dockerfile | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/backend/categorisation/handler/Dockerfile b/backend/categorisation/handler/Dockerfile index ce08ba96..751d42d5 100644 --- a/backend/categorisation/handler/Dockerfile +++ b/backend/categorisation/handler/Dockerfile @@ -31,20 +31,6 @@ RUN pip install --no-cache-dir -r requirements.txt COPY utils/ utils/ COPY backend/ backend/ COPY datatypes/ datatypes/ -# COPY backend/categorisation/ backend/categorisation/ -# COPY backend/app/db/ backend/app/db/ -# COPY backend/app/domain/ backend/app/domain/ -# COPY backend/addresses/ backend/addresses/ - -# COPY backend/app/db/connection.py backend/app/db/connection.py - -# COPY backend/app/config.py backend/app/config.py -# COPY backend/app/utils.py backend/app/utils.py -# COPY backend/app/plan/utils.py backend/app/plan/utils.py - -# COPY backend/__init__.py backend/__init__.py -# COPY backend/app/__init__.py backend/app/__init__.py -# COPY backend/app/plan/__init__.py backend/app/plan/__init__.py # ----------------------------- From 5c9a8b55f29abccefb27007f0beb8becb7b33b9f Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 24 Feb 2026 16:10:53 +0000 Subject: [PATCH 259/340] revert temp changes to config.py --- backend/app/config.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/backend/app/config.py b/backend/app/config.py index 22e4e302..feb312b4 100644 --- a/backend/app/config.py +++ b/backend/app/config.py @@ -1,6 +1,5 @@ import os from functools import lru_cache -from pathlib import Path from pydantic_settings import BaseSettings, SettingsConfigDict from typing import Optional @@ -9,16 +8,12 @@ def resolve_env_file() -> Optional[str]: env = os.getenv("ENVIRONMENT", "local") if env == "local": - env_file: Path = Path("backend/.env").resolve() # resolve to full path - print("USING ENV FILE:", env_file) - return str(env_file) + return "backend/.env" if env == "test": - env_file: Path = Path("backend/.env.test").resolve() - print("USING ENV FILE:", env_file) - return str(env_file) + return "backend/.env.test" - print("NO ENV FILE") + # prod = no env file return None From eea435d6417752b7521ec5ce32f03d0225a944db Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 24 Feb 2026 17:08:21 +0000 Subject: [PATCH 260/340] get fast api app running locally --- backend/README.md | 2 +- backend/app/config.py | 11 ++++-- backend/app/plan/router.py | 69 +++++++++++++++++++++----------------- 3 files changed, 48 insertions(+), 34 deletions(-) diff --git a/backend/README.md b/backend/README.md index 005d6fc4..2ea6f153 100644 --- a/backend/README.md +++ b/backend/README.md @@ -172,7 +172,7 @@ For instance, if your server is running locally on port 8000, you can use curl to get a dummy token: ```commandline -curl http://localhost:8000/dummy-token +curl http://localhost:8000/local/dummy-token ``` You will receive a response containing the dummy JWT diff --git a/backend/app/config.py b/backend/app/config.py index feb312b4..d23dcd33 100644 --- a/backend/app/config.py +++ b/backend/app/config.py @@ -1,5 +1,6 @@ import os from functools import lru_cache +from pathlib import Path from pydantic_settings import BaseSettings, SettingsConfigDict from typing import Optional @@ -7,11 +8,17 @@ from typing import Optional def resolve_env_file() -> Optional[str]: env = os.getenv("ENVIRONMENT", "local") + backend_dir = Path(__file__).resolve().parents[1] + if env == "local": - return "backend/.env" + env_file = backend_dir / ".env" + print("USING ENV FILE:", env_file) + return str(env_file) if env == "test": - return "backend/.env.test" + env_file = backend_dir / ".env.test" + print("USING ENV FILE:", env_file) + return str(env_file) # prod = no env file return None diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index cdf2873d..f45daea3 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -38,6 +38,7 @@ router = APIRouter( ) settings = get_settings() +print("CONNECTION TO SQS IN REGION", settings.AWS_DEFAULT_REGION) sqs_client = boto3.client("sqs", settings.AWS_DEFAULT_REGION) @@ -55,46 +56,52 @@ async def trigger_categorisation( property_ids.sort() num_scenarios: int = get_scenarios_count_by_portfolio_id(payload.portfolio_id) - batch_size: int = math.ceil(1000 / num_scenarios) + batch_size: int = ( + math.ceil(1000 / num_scenarios) if num_scenarios > 1000 else num_scenarios + ) num_property_buckets: int = max(1, math.ceil(len(property_ids) / batch_size)) + print("num_scenarios", num_scenarios) + print("batch_size", batch_size) + print("num_property_buckets", num_property_buckets) + # Create task - task_id, _ = TasksInterface.create_task( - task_source="backend/plan/router.py:trigger_categorisation", - service="plan_engine", - inputs=payload.model_dump(), - task_only=True, - ) + # task_id, _ = TasksInterface.create_task( + # task_source="backend/plan/router.py:trigger_categorisation", + # service="plan_engine", + # inputs=payload.model_dump(), + # task_only=True, + # ) # Dispatch requests to lambdas - subtask_interface = SubTaskInterface() + # subtask_interface = SubTaskInterface() - for bucket_index in range(num_property_buckets): - bucket_property_ids: List[int] = [ - pid for pid in property_ids if pid % num_property_buckets == bucket_index - ] - bucket_request: CategorisationTriggerRequest = CategorisationTriggerRequest( - portfolio_id=payload.portfolio_id, - scenarios_to_consider=payload.scenarios_to_consider, - scenario_priority_order=payload.scenario_priority_order, - min_property_id=min(bucket_property_ids), - max_property_id=max(bucket_property_ids), - ) - # Create sub-task for each - subtask_id: UUID = subtask_interface.create_subtask( - task_id=task_id, inputs=bucket_request.model_dump() - ) + # for bucket_index in range(num_property_buckets): + # bucket_property_ids: List[int] = [ + # pid for pid in property_ids if pid % num_property_buckets == bucket_index + # ] + # bucket_request: CategorisationTriggerRequest = CategorisationTriggerRequest( + # portfolio_id=payload.portfolio_id, + # scenarios_to_consider=payload.scenarios_to_consider, + # scenario_priority_order=payload.scenario_priority_order, + # min_property_id=min(bucket_property_ids), + # max_property_id=max(bucket_property_ids), + # ) + # # Create sub-task for each + # subtask_id: UUID = subtask_interface.create_subtask( + # task_id=task_id, inputs=bucket_request.model_dump() + # ) - response = sqs_client.send_message( - QueueUrl="categorisation-queue-dev", - MessageBody=bucket_request.model_dump_json(), - ) + # response = sqs_client.send_message( + # QueueUrl="categorisation-queue-dev", + # MessageBody=bucket_request.model_dump_json(), + # ) - logger.info( - f"Chunk {bucket_index} sent to SQS. Property IDs {min(bucket_property_ids)}–{max(bucket_property_ids)}. Message ID: {response.get('MessageId')}" - ) + # logger.info( + # f"Chunk {bucket_index} sent to SQS. Property IDs {min(bucket_property_ids)}–{max(bucket_property_ids)}. Message ID: {response.get('MessageId')}" + # ) - await asyncio.sleep(0.05) # Small delay to avoid SQS throttling + # await asyncio.sleep(0.05) # Small delay to avoid SQS throttling return {"message": "Categorisation jobs distributed"} From 5680defa59fb415aacf2fcbc316475cd0ad8e221 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 24 Feb 2026 17:35:41 +0000 Subject: [PATCH 261/340] add concurrency to categorisation lambda --- infrastructure/terraform/lambda/categorisation/variables.tf | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/infrastructure/terraform/lambda/categorisation/variables.tf b/infrastructure/terraform/lambda/categorisation/variables.tf index e4bab243..347964de 100644 --- a/infrastructure/terraform/lambda/categorisation/variables.tf +++ b/infrastructure/terraform/lambda/categorisation/variables.tf @@ -17,6 +17,11 @@ variable "image_digest" { description = "Image digest (sha256:...)" } +variable "maximum_concurrency" { + type = number + default = 10 # null if you don't want to set it for this handler + description = "Maximum number of concurrent Lambda invocations from SQS (2-1000). null = no limit." +} locals { image_uri = "${var.ecr_repo_url}@${var.image_digest}" From de360ab8660761861b736742854b1ac8b677ece4 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 24 Feb 2026 19:34:51 +0000 Subject: [PATCH 262/340] fixed issue when phase is 0 --- .idea/Model.iml | 2 +- .idea/misc.xml | 2 +- recommendations/Recommendations.py | 4 +++- 3 files changed, 5 insertions(+), 3 deletions(-) diff --git a/.idea/Model.iml b/.idea/Model.iml index c6561970..e1ca1b70 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml index 50cad4ca..b1ee5ffa 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py index 5525b7a0..80cc06b4 100644 --- a/recommendations/Recommendations.py +++ b/recommendations/Recommendations.py @@ -582,6 +582,7 @@ class Recommendations: if rec_phase == starting_phase: return { "sap": float(property_instance.data["current-energy-efficiency"]), + "sap_prediction": float(property_instance.data["current-energy-efficiency"]), "carbon": float(property_instance.data["co2-emissions-current"]), "heat_demand": float(property_instance.data["energy-consumption-current"]), } @@ -599,12 +600,13 @@ class Recommendations: if not previous_phase_reps: return { "sap": float(property_instance.data["current-energy-efficiency"]), + "sap_prediction": float(property_instance.data["current-energy-efficiency"]), "carbon": float(property_instance.data["co2-emissions-current"]), "heat_demand": float(property_instance.data["energy-consumption-current"]), } # Median fallback (including zero-length case) - keys = ("sap", "carbon", "heat_demand") + keys = ("sap", "sap_prediction", "carbon", "heat_demand") return { key: np.median([item[key] for item in previous_phase_reps]) for key in keys From 088ea1e1c2fec9a93baf238c8cd62f5fce78317d Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 24 Feb 2026 19:54:31 +0000 Subject: [PATCH 263/340] zero gain --- .../optimiser/funding_optimiser.py | 10 + .../tests/test_optimiser_functions.py | 276 +++++++++++++++++- recommendations/tests/test_recommendations.py | 7 +- 3 files changed, 287 insertions(+), 6 deletions(-) diff --git a/recommendations/optimiser/funding_optimiser.py b/recommendations/optimiser/funding_optimiser.py index 324e2c74..69a6bc48 100644 --- a/recommendations/optimiser/funding_optimiser.py +++ b/recommendations/optimiser/funding_optimiser.py @@ -655,6 +655,11 @@ def optimise_with_scenarios( 1) With air source heat pump AND required insulation """ + # Universally handle zero gain + if target_gain is not None: + if target_gain <= 0: + return pd.DataFrame([]) + solutions = [] paths = [] # Produce the unique list of measure types @@ -770,6 +775,11 @@ def optimise_with_scenarios( for fixed in fixed_selections: + if target_gain is not None: + if target_gain <= 0: + # If we don't have any gain, we don't actually need to do this + continue + # fixed = [(gi, oi, opt), ...] fixed_items = [opt for (_, _, opt) in fixed] fixed_groups = {gi for (gi, _, _) in fixed} diff --git a/recommendations/tests/test_optimiser_functions.py b/recommendations/tests/test_optimiser_functions.py index debd2d88..08541c21 100644 --- a/recommendations/tests/test_optimiser_functions.py +++ b/recommendations/tests/test_optimiser_functions.py @@ -3,9 +3,19 @@ import numpy as np from types import SimpleNamespace from recommendations.tests.test_data.measures_to_optimise import measures_to_optimise from recommendations.optimiser import optimiser_functions +from recommendations.optimiser.funding_optimiser import optimise_with_scenarios from recommendations.optimiser.GainOptimiser import GainOptimiser from recommendations.optimiser.CostOptimiser import CostOptimiser -from recommendations.optimiser.StrategicOptimiser import StrategicOptimiser, Strategies +from recommendations.optimiser.StrategicOptimiser import StrategicOptimiser + + +@pytest.fixture +def property_instance(): + return SimpleNamespace( + id="P1", + has_ventilation=False, + data={"current-energy-efficiency": "52"}, + ) class TestPrepareInputMeasures: @@ -48,8 +58,9 @@ class TestPrepareInputMeasures: def test_filters_out_negative_cost_savings(self): recs = [ [{"recommendation_id": "bad1", "type": "loft_insulation", "total": 200, "kwh_savings": 100, - "energy_cost_savings": -5, "has_battery": False, - "partial_project_funding": 0, "partial_project_score": 0, "uplift_project_score": 0, }], + "energy_cost_savings": -100, "has_battery": False, + "partial_project_funding": 0, "partial_project_score": 0, "uplift_project_score": 0, + "measure_type": "roof_insulation"}], ] measures = optimiser_functions.prepare_input_measures(recs, goal="Energy Savings", needs_ventilation=False) assert measures == [] # should skip negative cost saving recs @@ -572,3 +583,262 @@ class TestCheckNeedsVentilation: ) assert result == False + + +class TestOptimiseWithScenarios: + + def test_zero_gain(self, property_instance): + input_measures = [[{'id': '0_phase=0', 'cost': 16901.01977922431, 'gain': np.float64(2.0), + 'type': 'internal_wall_insulation+mechanical_ventilation', 'innovation_uplift': 0, + 'cost_minus_uplift': 16901.01977922431, 'raw_cost': 16341.019779224309, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': False, 'array_size': 0}], + [{'id': '1_phase=1', 'cost': 1197.0, 'gain': 0, 'type': 'loft_insulation', + 'innovation_uplift': 0, 'cost_minus_uplift': 1197.0, 'raw_cost': 1197.0, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': False, 'array_size': 0}, + {'id': '2_phase=1', 'cost': 1026.0, 'gain': 0, 'type': 'loft_insulation', + 'innovation_uplift': 0, 'cost_minus_uplift': 1026.0, 'raw_cost': 1026.0, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': False, 'array_size': 0}, + {'id': '3_phase=1', 'cost': 855.0, 'gain': 0, 'type': 'loft_insulation', + 'innovation_uplift': 0, 'cost_minus_uplift': 855.0, 'raw_cost': 855.0, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': False, 'array_size': 0}], + [{'id': '5_phase=3', 'cost': 5343.75, 'gain': 1, 'type': 'suspended_floor_insulation', + 'innovation_uplift': 0, 'cost_minus_uplift': 5343.75, 'raw_cost': 5343.75, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': False, 'array_size': 0}], + [{'id': '6_phase=4', 'cost': 1009.5600000000001, 'gain': np.float64(0.9000000000000057), + 'type': 'time_temperature_zone_control', 'innovation_uplift': 0, + 'cost_minus_uplift': 1009.5600000000001, 'raw_cost': 1009.5600000000001, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': False, 'array_size': 0}, + {'id': '7_phase=4', 'cost': 18979.9, 'gain': np.float64(6.9), 'type': 'air_source_heat_pump', + 'innovation_uplift': 0, 'cost_minus_uplift': 18979.9, 'raw_cost': 18979.9, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': False, 'array_size': 0}], + [{'id': '8_phase=5', 'cost': 5420.0, 'gain': np.float64(9.0), 'type': 'solar_pv', + 'innovation_uplift': 0, 'cost_minus_uplift': 5420.0, 'raw_cost': 5420.0, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': False, 'array_size': 3.6}, + {'id': '9_phase=5', 'cost': 6210.0, 'gain': np.float64(9.0), 'type': 'solar_pv', + 'innovation_uplift': 0, 'cost_minus_uplift': 6210.0, 'raw_cost': 6210.0, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': True, 'array_size': 3.6}, + {'id': '10_phase=5', 'cost': 6820.0, 'gain': np.float64(9.0), 'type': 'solar_pv', + 'innovation_uplift': 0, 'cost_minus_uplift': 6820.0, 'raw_cost': 6820.0, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': True, 'array_size': 3.6}, + {'id': '11_phase=5', 'cost': 7202.0, 'gain': np.float64(10.0), 'type': 'solar_pv', + 'innovation_uplift': 0, 'cost_minus_uplift': 7202.0, 'raw_cost': 7202.0, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': False, 'array_size': 3.915}, + {'id': '12_phase=5', 'cost': 6495.0, 'gain': np.float64(10.0), 'type': 'solar_pv', + 'innovation_uplift': 0, 'cost_minus_uplift': 6495.0, 'raw_cost': 6495.0, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': False, 'array_size': 3.92}, + {'id': '13_phase=5', 'cost': 7285.0, 'gain': np.float64(10.0), 'type': 'solar_pv', + 'innovation_uplift': 0, 'cost_minus_uplift': 7285.0, 'raw_cost': 7285.0, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': True, 'array_size': 3.92}, + {'id': '14_phase=5', 'cost': 7895.0, 'gain': np.float64(10.0), 'type': 'solar_pv', + 'innovation_uplift': 0, 'cost_minus_uplift': 7895.0, 'raw_cost': 7895.0, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': True, 'array_size': 3.92}, + {'id': '15_phase=5', 'cost': 5520.0, 'gain': np.float64(10.0), 'type': 'solar_pv', + 'innovation_uplift': 0, 'cost_minus_uplift': 5520.0, 'raw_cost': 5520.0, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': False, 'array_size': 4.0}, + {'id': '16_phase=5', 'cost': 6310.0, 'gain': np.float64(10.0), 'type': 'solar_pv', + 'innovation_uplift': 0, 'cost_minus_uplift': 6310.0, 'raw_cost': 6310.0, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': True, 'array_size': 4.0}, + {'id': '17_phase=5', 'cost': 6920.0, 'gain': np.float64(10.0), 'type': 'solar_pv', + 'innovation_uplift': 0, 'cost_minus_uplift': 6920.0, 'raw_cost': 6920.0, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': True, 'array_size': 4.0}, + {'id': '18_phase=5', 'cost': 5840.0, 'gain': np.float64(13.0), 'type': 'solar_pv', + 'innovation_uplift': 0, 'cost_minus_uplift': 5840.0, 'raw_cost': 5840.0, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': False, 'array_size': 5.2}, + {'id': '19_phase=5', 'cost': 6630.0, 'gain': np.float64(13.0), 'type': 'solar_pv', + 'innovation_uplift': 0, 'cost_minus_uplift': 6630.0, 'raw_cost': 6630.0, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': True, 'array_size': 5.2}, + {'id': '20_phase=5', 'cost': 7240.0, 'gain': np.float64(13.0), 'type': 'solar_pv', + 'innovation_uplift': 0, 'cost_minus_uplift': 7240.0, 'raw_cost': 7240.0, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': True, 'array_size': 5.2}, + {'id': '21_phase=5', 'cost': 8630.0, 'gain': np.float64(14.0), 'type': 'solar_pv', + 'innovation_uplift': 0, 'cost_minus_uplift': 8630.0, 'raw_cost': 8630.0, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': False, 'array_size': 5.655}, + {'id': '22_phase=5', 'cost': 7660.0, 'gain': np.float64(14.0), 'type': 'solar_pv', + 'innovation_uplift': 0, 'cost_minus_uplift': 7660.0, 'raw_cost': 7660.0, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': False, 'array_size': 5.66}, + {'id': '23_phase=5', 'cost': 8470.0, 'gain': np.float64(14.0), 'type': 'solar_pv', + 'innovation_uplift': 0, 'cost_minus_uplift': 8470.0, 'raw_cost': 8470.0, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': True, 'array_size': 5.66}, + {'id': '24_phase=5', 'cost': 9090.0, 'gain': np.float64(14.0), 'type': 'solar_pv', + 'innovation_uplift': 0, 'cost_minus_uplift': 9090.0, 'raw_cost': 9090.0, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': True, 'array_size': 5.66}, + {'id': '25_phase=5', 'cost': 7240.0, 'gain': np.float64(12.0), 'type': 'solar_pv', + 'innovation_uplift': 0, 'cost_minus_uplift': 7240.0, 'raw_cost': 7240.0, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': False, 'array_size': 4.79}, + {'id': '26_phase=5', 'cost': 8050.0, 'gain': np.float64(12.0), 'type': 'solar_pv', + 'innovation_uplift': 0, 'cost_minus_uplift': 8050.0, 'raw_cost': 8050.0, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': True, 'array_size': 4.79}, + {'id': '27_phase=5', 'cost': 8660.0, 'gain': np.float64(12.0), 'type': 'solar_pv', + 'innovation_uplift': 0, 'cost_minus_uplift': 8660.0, 'raw_cost': 8660.0, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': True, 'array_size': 4.79}, + {'id': '28_phase=5', 'cost': 5740.0, 'gain': np.float64(12.0), 'type': 'solar_pv', + 'innovation_uplift': 0, 'cost_minus_uplift': 5740.0, 'raw_cost': 5740.0, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': False, 'array_size': 4.8}, + {'id': '29_phase=5', 'cost': 6530.0, 'gain': np.float64(12.0), 'type': 'solar_pv', + 'innovation_uplift': 0, 'cost_minus_uplift': 6530.0, 'raw_cost': 6530.0, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': True, 'array_size': 4.8}, + {'id': '30_phase=5', 'cost': 7140.0, 'gain': np.float64(12.0), 'type': 'solar_pv', + 'innovation_uplift': 0, 'cost_minus_uplift': 7140.0, 'raw_cost': 7140.0, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': True, 'array_size': 4.8}, + {'id': '31_phase=5', 'cost': 8360.0, 'gain': np.float64(13.0), 'type': 'solar_pv', + 'innovation_uplift': 0, 'cost_minus_uplift': 8360.0, 'raw_cost': 8360.0, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': False, 'array_size': 5.22}, + {'id': '32_phase=5', 'cost': 7470.0, 'gain': np.float64(13.0), 'type': 'solar_pv', + 'innovation_uplift': 0, 'cost_minus_uplift': 7470.0, 'raw_cost': 7470.0, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': False, 'array_size': 5.22}, + {'id': '33_phase=5', 'cost': 8280.0, 'gain': np.float64(13.0), 'type': 'solar_pv', + 'innovation_uplift': 0, 'cost_minus_uplift': 8280.0, 'raw_cost': 8280.0, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': True, 'array_size': 5.22}, + {'id': '34_phase=5', 'cost': 8890.0, 'gain': np.float64(13.0), 'type': 'solar_pv', + 'innovation_uplift': 0, 'cost_minus_uplift': 8890.0, 'raw_cost': 8890.0, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': True, 'array_size': 5.22}, + {'id': '35_phase=5', 'cost': 5892.21, 'gain': np.float64(13.0), 'type': 'solar_pv', + 'innovation_uplift': 0, 'cost_minus_uplift': 5892.21, 'raw_cost': 5892.21, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': False, 'array_size': 5.34}, + {'id': '36_phase=5', 'cost': 5320.0, 'gain': np.float64(8.0), 'type': 'solar_pv', + 'innovation_uplift': 0, 'cost_minus_uplift': 5320.0, 'raw_cost': 5320.0, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': False, 'array_size': 3.2}, + {'id': '37_phase=5', 'cost': 6110.0, 'gain': np.float64(8.0), 'type': 'solar_pv', + 'innovation_uplift': 0, 'cost_minus_uplift': 6110.0, 'raw_cost': 6110.0, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': True, 'array_size': 3.2}, + {'id': '38_phase=5', 'cost': 6720.0, 'gain': np.float64(8.0), 'type': 'solar_pv', + 'innovation_uplift': 0, 'cost_minus_uplift': 6720.0, 'raw_cost': 6720.0, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': True, 'array_size': 3.2}, + {'id': '39_phase=5', 'cost': 6932.0, 'gain': np.float64(9.0), 'type': 'solar_pv', + 'innovation_uplift': 0, 'cost_minus_uplift': 6932.0, 'raw_cost': 6932.0, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': False, 'array_size': 3.48}, + {'id': '40_phase=5', 'cost': 6295.0, 'gain': np.float64(9.0), 'type': 'solar_pv', + 'innovation_uplift': 0, 'cost_minus_uplift': 6295.0, 'raw_cost': 6295.0, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': False, 'array_size': 3.48}, + {'id': '41_phase=5', 'cost': 7085.0, 'gain': np.float64(9.0), 'type': 'solar_pv', + 'innovation_uplift': 0, 'cost_minus_uplift': 7085.0, 'raw_cost': 7085.0, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': True, 'array_size': 3.48}, + {'id': '42_phase=5', 'cost': 7695.0, 'gain': np.float64(9.0), 'type': 'solar_pv', + 'innovation_uplift': 0, 'cost_minus_uplift': 7695.0, 'raw_cost': 7695.0, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': True, 'array_size': 3.48}, + {'id': '43_phase=5', 'cost': 5640.0, 'gain': np.float64(11.0), 'type': 'solar_pv', + 'innovation_uplift': 0, 'cost_minus_uplift': 5640.0, 'raw_cost': 5640.0, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': False, 'array_size': 4.4}, + {'id': '44_phase=5', 'cost': 6430.0, 'gain': np.float64(11.0), 'type': 'solar_pv', + 'innovation_uplift': 0, 'cost_minus_uplift': 6430.0, 'raw_cost': 6430.0, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': True, 'array_size': 4.4}, + {'id': '45_phase=5', 'cost': 7040.0, 'gain': np.float64(11.0), 'type': 'solar_pv', + 'innovation_uplift': 0, 'cost_minus_uplift': 7040.0, 'raw_cost': 7040.0, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': True, 'array_size': 4.4}, + {'id': '46_phase=5', 'cost': 8090.0, 'gain': np.float64(12.0), 'type': 'solar_pv', + 'innovation_uplift': 0, 'cost_minus_uplift': 8090.0, 'raw_cost': 8090.0, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': False, 'array_size': 4.785}, + {'id': '47_phase=5', 'cost': 7240.0, 'gain': np.float64(12.0), 'type': 'solar_pv', + 'innovation_uplift': 0, 'cost_minus_uplift': 7240.0, 'raw_cost': 7240.0, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': False, 'array_size': 4.79}, + {'id': '48_phase=5', 'cost': 8050.0, 'gain': np.float64(12.0), 'type': 'solar_pv', + 'innovation_uplift': 0, 'cost_minus_uplift': 8050.0, 'raw_cost': 8050.0, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': True, 'array_size': 4.79}, + {'id': '49_phase=5', 'cost': 8660.0, 'gain': np.float64(12.0), 'type': 'solar_pv', + 'innovation_uplift': 0, 'cost_minus_uplift': 8660.0, 'raw_cost': 8660.0, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': True, 'array_size': 4.79}, + {'id': '50_phase=5', 'cost': 5520.0, 'gain': np.float64(10.0), 'type': 'solar_pv', + 'innovation_uplift': 0, 'cost_minus_uplift': 5520.0, 'raw_cost': 5520.0, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': False, 'array_size': 4.0}, + {'id': '51_phase=5', 'cost': 6310.0, 'gain': np.float64(10.0), 'type': 'solar_pv', + 'innovation_uplift': 0, 'cost_minus_uplift': 6310.0, 'raw_cost': 6310.0, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': True, 'array_size': 4.0}, + {'id': '52_phase=5', 'cost': 6920.0, 'gain': np.float64(10.0), 'type': 'solar_pv', + 'innovation_uplift': 0, 'cost_minus_uplift': 6920.0, 'raw_cost': 6920.0, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': True, 'array_size': 4.0}, + {'id': '53_phase=5', 'cost': 7820.0, 'gain': np.float64(11.0), 'type': 'solar_pv', + 'innovation_uplift': 0, 'cost_minus_uplift': 7820.0, 'raw_cost': 7820.0, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': False, 'array_size': 4.35}, + {'id': '54_phase=5', 'cost': 6675.0, 'gain': np.float64(11.0), 'type': 'solar_pv', + 'innovation_uplift': 0, 'cost_minus_uplift': 6675.0, 'raw_cost': 6675.0, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': False, 'array_size': 4.35}, + {'id': '55_phase=5', 'cost': 7485.0, 'gain': np.float64(11.0), 'type': 'solar_pv', + 'innovation_uplift': 0, 'cost_minus_uplift': 7485.0, 'raw_cost': 7485.0, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': True, 'array_size': 4.35}, + {'id': '56_phase=5', 'cost': 8095.0, 'gain': np.float64(11.0), 'type': 'solar_pv', + 'innovation_uplift': 0, 'cost_minus_uplift': 8095.0, 'raw_cost': 8095.0, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': True, 'array_size': 4.35}, + {'id': '57_phase=5', 'cost': 5640.0, 'gain': np.float64(11.0), 'type': 'solar_pv', + 'innovation_uplift': 0, 'cost_minus_uplift': 5640.0, 'raw_cost': 5640.0, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': False, 'array_size': 4.4}, + {'id': '58_phase=5', 'cost': 6430.0, 'gain': np.float64(11.0), 'type': 'solar_pv', + 'innovation_uplift': 0, 'cost_minus_uplift': 6430.0, 'raw_cost': 6430.0, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': True, 'array_size': 4.4}, + {'id': '59_phase=5', 'cost': 7040.0, 'gain': np.float64(11.0), 'type': 'solar_pv', + 'innovation_uplift': 0, 'cost_minus_uplift': 7040.0, 'raw_cost': 7040.0, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': True, 'array_size': 4.4}, + {'id': '60_phase=5', 'cost': 5692.21, 'gain': np.float64(11.0), 'type': 'solar_pv', + 'innovation_uplift': 0, 'cost_minus_uplift': 5692.21, 'raw_cost': 5692.21, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': False, 'array_size': 4.45}]] + + solutions = optimise_with_scenarios( + p=property_instance, + input_measures=input_measures, + budget=None, + target_gain=0, + enforce_heat_pump_insulation=True, + enforce_fabric_first=False, + already_installed_sap=0, # To be passed to output + ) + + assert solutions.empty diff --git a/recommendations/tests/test_recommendations.py b/recommendations/tests/test_recommendations.py index 747b0b2e..2218cd16 100644 --- a/recommendations/tests/test_recommendations.py +++ b/recommendations/tests/test_recommendations.py @@ -401,7 +401,7 @@ def test_adjust_ventilation_sap(sap_impact, limit, expected): ) == expected -def test_get_previous_phase_values_starting_phase(property_instance): +def test_get_previous_phase_values_phase_0_starting_phase_0(property_instance): result = Recommendations._get_previous_phase_values( rec_phase=0, starting_phase=0, @@ -411,6 +411,7 @@ def test_get_previous_phase_values_starting_phase(property_instance): assert result == { "sap": 65.0, + "sap_prediction": 65.0, "carbon": 2.4, "heat_demand": 284.0, } @@ -441,8 +442,8 @@ def test_get_previous_phase_values_single_rep(property_instance): def test_get_previous_phase_values_median(property_instance): impact_summary = [ - {"phase": 1, "representative": True, "sap": 70, "carbon": 2.0, "heat_demand": 250}, - {"phase": 1, "representative": True, "sap": 74, "carbon": 1.6, "heat_demand": 230}, + {"phase": 1, "representative": True, "sap": 70, "carbon": 2.0, "heat_demand": 250, "sap_prediction": 70}, + {"phase": 1, "representative": True, "sap": 74, "carbon": 1.6, "heat_demand": 230, "sap_prediction": 74}, ] result = Recommendations._get_previous_phase_values( From d85c44f03925ed4431278e18e61b382379d3e36c Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 24 Feb 2026 20:11:48 +0000 Subject: [PATCH 264/340] fixing incorrect condition in best practice measures --- .idea/Model.iml | 2 +- .idea/misc.xml | 2 +- recommendations/optimiser/optimiser_functions.py | 11 +++++++---- recommendations/tests/test_optimiser_functions.py | 4 ++-- 4 files changed, 11 insertions(+), 8 deletions(-) diff --git a/.idea/Model.iml b/.idea/Model.iml index e1ca1b70..c6561970 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml index b1ee5ffa..50cad4ca 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/recommendations/optimiser/optimiser_functions.py b/recommendations/optimiser/optimiser_functions.py index a5cbf90d..6fd70c20 100644 --- a/recommendations/optimiser/optimiser_functions.py +++ b/recommendations/optimiser/optimiser_functions.py @@ -306,7 +306,6 @@ def add_best_practice_measures( solution: List[Dict[str, Any]], recommendations: Dict[int, List[List[Dict[str, Any]]]], selected: Set[str], - needs_ventilation: bool ): """ Ensures best-practice measures like ventilation and trickle vents are included @@ -327,8 +326,6 @@ def add_best_practice_measures( All recommendations for all properties, keyed by property id. selected : set Set of already selected recommendation IDs. - needs_ventilation : bool - Whether the property requires mechanical ventilation to accompany certain measures. Returns ------- @@ -338,7 +335,13 @@ def add_best_practice_measures( # Check if any selected measure requires ventilation ventilation_selected = [r for r in solution if "+mechanical_ventilation" in r["type"]] - if needs_ventilation: + # If ventilation has been selected, or one of the measures needs ventilation, we need to ensure ventilation is + # included + measures_selected_needing_ventilation = any( + x in [r["type"] for r in solution] for x in assumptions.measures_needing_ventilation + ) + + if measures_selected_needing_ventilation or len(ventilation_selected) > 0: ventilation_rec = next( (r[0] for r in recommendations[property_id] if r[0]["type"] == "mechanical_ventilation"), None diff --git a/recommendations/tests/test_optimiser_functions.py b/recommendations/tests/test_optimiser_functions.py index 08541c21..0a31ae2c 100644 --- a/recommendations/tests/test_optimiser_functions.py +++ b/recommendations/tests/test_optimiser_functions.py @@ -155,7 +155,7 @@ class TestAddBestPracticeMeasures: } selected = set() updated = optimiser_functions.add_best_practice_measures( - property_id, solution, recommendations, selected, True + property_id, solution, recommendations, selected ) assert "vent1" in updated assert "trickle1" in updated @@ -286,7 +286,7 @@ class TestIncreasingEpcE2e: total_optimised_gain = sum(m["gain"] for m in solution) assert total_optimised_gain == 17.6, "Total gain of optimised measures should meet or exceed target gain" - selected = optimiser_functions.add_best_practice_measures(p.id, solution, recommendations, selected, False) + selected = optimiser_functions.add_best_practice_measures(p.id, solution, recommendations, selected) # Flatten recommendations for output flattened = optimiser_functions.flatten_recommendations_with_defaults(p.id, recommendations, selected) From 54b00a1671d5fec57ffdd692513d7d8609f45fe8 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 24 Feb 2026 20:13:55 +0000 Subject: [PATCH 265/340] removed incorrect ventilation input --- backend/engine/engine.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/backend/engine/engine.py b/backend/engine/engine.py index 101f6ada..8f6eca3f 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -1182,9 +1182,7 @@ async def model_engine(body: PlanTriggerRequest): ) # Add best practice measures (ventilation/trickle vents) - pass needs_ventilation flag - selected = optimiser_functions.add_best_practice_measures( - p.id, solution, recommendations, selected, needs_ventilation - ) + selected = optimiser_functions.add_best_practice_measures(p.id, solution, recommendations, selected) # Final flattening - we pass what the battery SAP score would be, regardless if the battery was selected recommendations[p.id] = optimiser_functions.flatten_recommendations_with_defaults( p.id, recommendations, selected, battery_sap_score From a9621054366c8730184a5b83f0ba2a7fc94e3a5b Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Wed, 25 Feb 2026 10:55:26 +0000 Subject: [PATCH 266/340] udpate example payload in local lambda invoker --- backend/categorisation/local_handler/invoke_local_lambda.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/backend/categorisation/local_handler/invoke_local_lambda.py b/backend/categorisation/local_handler/invoke_local_lambda.py index 5ed23c2d..8504ff55 100644 --- a/backend/categorisation/local_handler/invoke_local_lambda.py +++ b/backend/categorisation/local_handler/invoke_local_lambda.py @@ -12,6 +12,8 @@ payload = { "portfolio_id": 556, "scenarios_to_consider": [], "scenario_priority_order": [], + "min_property_id": 653150, + "max_property_id": 653150, } ) } From 3d18827961468d18dad0ab7f595391e1ea198197 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 25 Feb 2026 13:36:02 +0000 Subject: [PATCH 267/340] added plan name to export --- backend/export/property_scenarios/db_functions.py | 2 ++ backend/export/property_scenarios/main.py | 6 +++--- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/backend/export/property_scenarios/db_functions.py b/backend/export/property_scenarios/db_functions.py index 8b29ab0e..a27806c2 100644 --- a/backend/export/property_scenarios/db_functions.py +++ b/backend/export/property_scenarios/db_functions.py @@ -150,6 +150,7 @@ class DbMethods: self.session.query( Recommendation, PlanModel.scenario_id, + PlanModel.name ) .join( PlanRecommendations, @@ -171,6 +172,7 @@ class DbMethods: for col in Recommendation.__table__.columns }, "scenario_id": r.scenario_id, + "plan_name": r.name, } for r in recs_query ] diff --git a/backend/export/property_scenarios/main.py b/backend/export/property_scenarios/main.py index d2d89916..56886a8b 100644 --- a/backend/export/property_scenarios/main.py +++ b/backend/export/property_scenarios/main.py @@ -59,17 +59,17 @@ def process_export(payload: ExportRequest, session: Session) -> Dict[Union[str, continue measures_df: pd.DataFrame = scenario_recs[ - ["property_id", "measure_type", "estimated_cost"] + ["property_id", "measure_type", "plan_name", "estimated_cost"] ].drop_duplicates() pivot = measures_df.pivot( - index="property_id", + index=["property_id", "plan_name"], columns="measure_type", values="estimated_cost", ).reset_index() pivot["total_retrofit_cost"] = ( - pivot.drop(columns=["property_id"]).sum(axis=1) + pivot.drop(columns=["property_id", "plan_name"]).sum(axis=1) ) post_sap = ( From 43796d339ecab4bb7257236dba7117aea2e31579 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 25 Feb 2026 13:49:12 +0000 Subject: [PATCH 268/340] revert load dotenv path for sal --- asset_list/app.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/asset_list/app.py b/asset_list/app.py index 0b792270..a97bb8e0 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -13,7 +13,7 @@ from asset_list.utils import get_data from dotenv import load_dotenv from backend.SearchEpc import SearchEpc -load_dotenv(dotenv_path="backend/.env") +load_dotenv(dotenv_path="../backend/.env") EPC_AUTH_TOKEN = os.getenv( "EPC_AUTH_TOKEN", ) From 042140afecd1cc739a27ec74e249dad7fe15498a Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 25 Feb 2026 16:10:07 +0000 Subject: [PATCH 269/340] added export to pytest.ini --- pytest.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pytest.ini b/pytest.ini index 7bef3884..608d5e0c 100644 --- a/pytest.ini +++ b/pytest.ini @@ -3,4 +3,4 @@ pythonpath = . log_cli = true log_cli_level = INFO addopts = --cov-report term-missing --cov=etl/epc --cov=recommendations --cov=backend --cov=etl/epc_clean --cov=etl/spatial -testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests backend/address2UPRN/tests backend/onboarders/tests backend/categorisation/tests +testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests backend/address2UPRN/tests backend/onboarders/tests backend/categorisation/tests backend/export/tests From 2e5ae82d3b23eecda9ae07853d809041892ca5b4 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 25 Feb 2026 16:13:19 +0000 Subject: [PATCH 270/340] added additional testing packages to dev container --- .devcontainer/backend/requirements.txt | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.devcontainer/backend/requirements.txt b/.devcontainer/backend/requirements.txt index 9814c8d4..8fbb6120 100644 --- a/.devcontainer/backend/requirements.txt +++ b/.devcontainer/backend/requirements.txt @@ -18,5 +18,7 @@ sqlmodel pytest==9.0.2 pytest-cov==7.0.0 ipykernel>=6.25,<7 +dotenv +psycopg[binary] # Formatting black==26.1.0 \ No newline at end of file From bf865811c05a9bd26dc86b8cd8727cadf5a5d7ac Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 25 Feb 2026 16:20:04 +0000 Subject: [PATCH 271/340] added handler typing --- backend/export/property_scenarios/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/backend/export/property_scenarios/main.py b/backend/export/property_scenarios/main.py index 56886a8b..eb97df2f 100644 --- a/backend/export/property_scenarios/main.py +++ b/backend/export/property_scenarios/main.py @@ -99,7 +99,7 @@ def process_export(payload: ExportRequest, session: Session) -> Dict[Union[str, # Lambda Handler # ============================================================ -def handler(event: dict, context: Optional[Any]) -> Mapping[str, int | str]: +def handler(event: Mapping[str, Any], context: Optional[Any]) -> Mapping[str, int | str]: """ Lambda event should have the following structure: 1) task id - unique identifier for the export task (optional, can be used for tracking/logging) From e645f90b0efda881201c79874e609945b4c3f374 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 25 Feb 2026 16:45:52 +0000 Subject: [PATCH 272/340] fixing stict typing issues --- .gitignore | 5 ++- .idea/watcherTasks.xml | 25 +++++++++++ .../export/property_scenarios/input_schema.py | 7 +++- backend/export/property_scenarios/main.py | 41 +++++++++---------- pyproject.toml | 3 ++ pyrightconfig.json | 8 ++++ 6 files changed, 65 insertions(+), 24 deletions(-) create mode 100644 .idea/watcherTasks.xml create mode 100644 pyproject.toml create mode 100644 pyrightconfig.json diff --git a/.gitignore b/.gitignore index 6268360b..68e66052 100644 --- a/.gitignore +++ b/.gitignore @@ -279,4 +279,7 @@ cache/ *.png *.pptx -local_data* \ No newline at end of file +local_data* + +# pyright local config +pyrightconfig.json \ No newline at end of file diff --git a/.idea/watcherTasks.xml b/.idea/watcherTasks.xml new file mode 100644 index 00000000..2a14ba99 --- /dev/null +++ b/.idea/watcherTasks.xml @@ -0,0 +1,25 @@ + + + + + + + + \ No newline at end of file diff --git a/backend/export/property_scenarios/input_schema.py b/backend/export/property_scenarios/input_schema.py index 4ef704a3..2d925fc0 100644 --- a/backend/export/property_scenarios/input_schema.py +++ b/backend/export/property_scenarios/input_schema.py @@ -1,5 +1,5 @@ from typing import Optional, Union, List -from pydantic import BaseModel, model_validator +from pydantic import BaseModel, model_validator, PrivateAttr class ExportRequest(BaseModel): @@ -15,7 +15,10 @@ class ExportRequest(BaseModel): # boolean which will overwrite the scenario ids. If this is true, we will only export the default plan for each # property and will ignore the scenario ids default_plans_only: Optional[bool] = False - + + # Private attribute to indicate whether scenario_ids should be ignored due to default_plans_only being True + _scenario_ids_ignored: bool = PrivateAttr(default=False) + @model_validator(mode="after") def validate_default_plan_override(self): """ diff --git a/backend/export/property_scenarios/main.py b/backend/export/property_scenarios/main.py index eb97df2f..50754f6f 100644 --- a/backend/export/property_scenarios/main.py +++ b/backend/export/property_scenarios/main.py @@ -1,5 +1,5 @@ import json -from typing import Optional, Any, Mapping, Dict, Union +from typing import Optional, Any, Mapping, Dict, Union, List import pandas as pd from sqlalchemy.orm import Session @@ -13,6 +13,12 @@ from utils.logger import setup_logger logger = setup_logger() +def choose_group_keys(payload: ExportRequest) -> List[int]: + if payload.default_plans_only: + return [] # Single export, no scenario grouping + return payload.scenario_ids or [] + + def process_export(payload: ExportRequest, session: Session) -> Dict[Union[str, int], pd.DataFrame]: export_files: Dict[Union[str, int], pd.DataFrame] = {} @@ -22,33 +28,28 @@ def process_export(payload: ExportRequest, session: Session) -> Dict[Union[str, logger.info("Retrieved %s properties for export", len(properties_df)) - plans_df = db_methods.get_latest_plans( + plans_df: pd.DataFrame = db_methods.get_latest_plans( portfolio_id=payload.portfolio_id, scenario_ids=payload.scenario_ids, - default_only=payload.default_plans_only, + default_only=bool(payload.default_plans_only), ) logger.info("Retrieved %s plans for export", len(plans_df)) if plans_df.empty: return export_files - - recommendations_df = db_methods.get_recommendations( - plans_df["id"].tolist() - ) + plan_ids: List[int] = plans_df["id"].tolist() + recommendations_df: pd.DataFrame = db_methods.get_recommendations(plan_ids) recommendations_df = db_methods.attach_materials(recommendations_df) - if payload.default_plans_only: - group_keys = [None] # Single export, no scenario grouping - else: - group_keys = payload.scenario_ids + group_keys: List[Union[str, int]] = choose_group_keys(payload) for group_key in group_keys: if payload.default_plans_only: scenario_recs = recommendations_df - export_label = "default_plans" + export_label: Union[str, int] = "default_plans" else: scenario_recs = recommendations_df[ recommendations_df["scenario_id"] == group_key @@ -62,7 +63,7 @@ def process_export(payload: ExportRequest, session: Session) -> Dict[Union[str, ["property_id", "measure_type", "plan_name", "estimated_cost"] ].drop_duplicates() - pivot = measures_df.pivot( + pivot: pd.DataFrame = measures_df.pivot( index=["property_id", "plan_name"], columns="measure_type", values="estimated_cost", @@ -72,13 +73,13 @@ def process_export(payload: ExportRequest, session: Session) -> Dict[Union[str, pivot.drop(columns=["property_id", "plan_name"]).sum(axis=1) ) - post_sap = ( + post_sap: pd.DataFrame = ( scenario_recs.groupby("property_id")[["sap_points"]] .sum() .reset_index() ) - df = ( + df: pd.DataFrame = ( properties_df.rename(columns={"solar_pv": "existing_solar_pv"}) .merge(pivot, how="left", on="property_id") .merge(post_sap, how="left", on="property_id") @@ -86,9 +87,7 @@ def process_export(payload: ExportRequest, session: Session) -> Dict[Union[str, df["sap_points"] = df["sap_points"].fillna(0) df["predicted_post_works_sap"] = df["current_sap_points"] + df["sap_points"] - df["predicted_post_works_epc"] = df[ - "predicted_post_works_sap" - ].apply(sap_to_epc) + df["predicted_post_works_epc"] = df["predicted_post_works_sap"].apply(sap_to_epc) export_files[export_label] = df @@ -99,7 +98,7 @@ def process_export(payload: ExportRequest, session: Session) -> Dict[Union[str, # Lambda Handler # ============================================================ -def handler(event: Mapping[str, Any], context: Optional[Any]) -> Mapping[str, int | str]: +def handler(event: Mapping[str, Any], context: Optional[Any]) -> Mapping[str, Union[int, str]]: """ Lambda event should have the following structure: 1) task id - unique identifier for the export task (optional, can be used for tracking/logging) @@ -128,7 +127,7 @@ def handler(event: Mapping[str, Any], context: Optional[Any]) -> Mapping[str, in logger.debug("Validating request body") payload = ExportRequest.model_validate(body_dict) - if payload._scenario_ids_ignored: + if getattr(payload, "_scenario_ids_ignored", False): logger.warning( "Received scenario_ids in request body but they will be ignored " "because default_plans_only is set to True" @@ -139,7 +138,7 @@ def handler(event: Mapping[str, Any], context: Optional[Any]) -> Mapping[str, in exported_files = process_export(payload, session) # TODO: Need to handle the exported files - e.g. upload to s3 and email a presigned url - + _ = exported_files return { "statusCode": 200, "body": json.dumps({}), diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..72ec3f0c --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,3 @@ +[tool.pyright] +reportUnknownMemberType = false +reportUnknownVariableType = false \ No newline at end of file diff --git a/pyrightconfig.json b/pyrightconfig.json new file mode 100644 index 00000000..d4e0e2a4 --- /dev/null +++ b/pyrightconfig.json @@ -0,0 +1,8 @@ +{ + "typeCheckingMode": "strict", + "venvPath": "/Users/khalimconn-kowlessar/opt/anaconda3/envs/", + "venv": "Fastapi-backend", + "include": [ + "." + ] +} \ No newline at end of file From f13bffec7c857f49e0c14b9eb5e59e90affdc48b Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 25 Feb 2026 16:51:26 +0000 Subject: [PATCH 273/340] implementing scenario_ids_ignored feedback --- backend/export/property_scenarios/input_schema.py | 4 ++++ backend/export/property_scenarios/main.py | 12 ++---------- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/backend/export/property_scenarios/input_schema.py b/backend/export/property_scenarios/input_schema.py index 2d925fc0..f6fa5965 100644 --- a/backend/export/property_scenarios/input_schema.py +++ b/backend/export/property_scenarios/input_schema.py @@ -34,3 +34,7 @@ class ExportRequest(BaseModel): object.__setattr__(self, "_scenario_ids_ignored", False) return self + + @property + def scenario_ids_ignored(self) -> bool: + return self._scenario_ids_ignored diff --git a/backend/export/property_scenarios/main.py b/backend/export/property_scenarios/main.py index 50754f6f..50fd808b 100644 --- a/backend/export/property_scenarios/main.py +++ b/backend/export/property_scenarios/main.py @@ -100,15 +100,7 @@ def process_export(payload: ExportRequest, session: Session) -> Dict[Union[str, def handler(event: Mapping[str, Any], context: Optional[Any]) -> Mapping[str, Union[int, str]]: """ - Lambda event should have the following structure: - 1) task id - unique identifier for the export task (optional, can be used for tracking/logging) - 2) subtask id - unique identifier for the specific export operation (optional, can be used for tracking/logging) - 2) portfolio id - id of the portfolio to export - 3) scenario ids - list of scenario ids to export - 4) default_plans_only - flag indicating if we should only consider default plans for export (optional, - defaults to False) - - Exxample event: + Example event: body_dict = { "task_id": "test", "subtask_id": "test", @@ -127,7 +119,7 @@ def handler(event: Mapping[str, Any], context: Optional[Any]) -> Mapping[str, Un logger.debug("Validating request body") payload = ExportRequest.model_validate(body_dict) - if getattr(payload, "_scenario_ids_ignored", False): + if payload.scenario_ids_ignored: logger.warning( "Received scenario_ids in request body but they will be ignored " "because default_plans_only is set to True" From 1717e7b4c2308c947c1728b1fc647ab5659fcfe7 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 25 Feb 2026 17:30:39 +0000 Subject: [PATCH 274/340] fixing typing issues --- backend/app/db/base.py | 6 +- backend/app/db/models/recommendations.py | 46 +++++++-- .../export/property_scenarios/db_functions.py | 95 ++++++++++--------- 3 files changed, 93 insertions(+), 54 deletions(-) diff --git a/backend/app/db/base.py b/backend/app/db/base.py index 59be7030..fa2b68a5 100644 --- a/backend/app/db/base.py +++ b/backend/app/db/base.py @@ -1,3 +1,5 @@ -from sqlalchemy.orm import declarative_base +from sqlalchemy.orm import DeclarativeBase -Base = declarative_base() + +class Base(DeclarativeBase): + pass diff --git a/backend/app/db/models/recommendations.py b/backend/app/db/models/recommendations.py index 9352eeb2..27d03303 100644 --- a/backend/app/db/models/recommendations.py +++ b/backend/app/db/models/recommendations.py @@ -54,19 +54,47 @@ class Recommendation(Base): class RecommendationMaterials(Base): __tablename__ = "recommendation_materials" - id = Column(BigInteger, primary_key=True, autoincrement=True) - recommendation_id = Column( - BigInteger, ForeignKey("recommendation.id"), nullable=False + id: Mapped[int] = mapped_column( + BigInteger, primary_key=True, autoincrement=True ) - material_id = Column(BigInteger, ForeignKey(Material.id), nullable=False) - created_at = Column(TIMESTAMP, nullable=False, server_default=func.now()) - depth = Column(Float, nullable=False) - quantity = Column(Float, nullable=False) - quantity_unit = Column( + + recommendation_id: Mapped[int] = mapped_column( + BigInteger, + ForeignKey("recommendation.id"), + nullable=False, + ) + + material_id: Mapped[int] = mapped_column( + BigInteger, + ForeignKey(Material.id), + nullable=False, + ) + + created_at: Mapped[datetime] = mapped_column( + TIMESTAMP, + nullable=False, + server_default=func.now(), + ) + + depth: Mapped[float] = mapped_column( + Float, + nullable=False, + ) + + quantity: Mapped[float] = mapped_column( + Float, + nullable=False, + ) + + quantity_unit: Mapped[QuantityUnits] = mapped_column( Enum(QuantityUnits, values_callable=lambda x: [e.value for e in x]), nullable=False, ) - estimated_cost = Column(Float, nullable=False) + + estimated_cost: Mapped[float] = mapped_column( + Float, + nullable=False, + ) class PlanTypeEnum(enum.Enum): # TODO: move this to domain? diff --git a/backend/export/property_scenarios/db_functions.py b/backend/export/property_scenarios/db_functions.py index a27806c2..1527a989 100644 --- a/backend/export/property_scenarios/db_functions.py +++ b/backend/export/property_scenarios/db_functions.py @@ -1,6 +1,8 @@ -from typing import List, Any, Dict, Optional +from typing import List, Any, Dict, Optional, Tuple, Sequence import pandas as pd +from sqlalchemy import select from sqlalchemy.orm import Session +from sqlalchemy.engine import Row from collections import defaultdict from backend.app.db.models.recommendations import ( @@ -20,7 +22,7 @@ logger = setup_logger() class DbMethods: - def __init__(self, session: Session): + def __init__(self, session: Session) -> None: self.session = session def get_properties(self, portfolio_id: int) -> pd.DataFrame: @@ -29,28 +31,31 @@ class DbMethods: :param portfolio_id: :return: """ - query = ( - self.session.query(PropertyModel, PropertyDetailsEpcModel) + stmt = ( + select(PropertyModel, PropertyDetailsEpcModel) .join( PropertyDetailsEpcModel, PropertyModel.id == PropertyDetailsEpcModel.property_id, ) - .filter(PropertyModel.portfolio_id == portfolio_id) - .all() + .where(PropertyModel.portfolio_id == portfolio_id) ) - data = [ + rows: Sequence[Row[Tuple[PropertyModel, PropertyDetailsEpcModel]]] = ( + self.session.execute(stmt).all() + ) + + data: List[Dict[str, Any]] = [ { **{ - col.name: getattr(row.PropertyModel, col.name) - for col in PropertyModel.__table__.columns + col.name: getattr(property_model, col.name) + for col in PropertyModel.__table__.columns.values() }, **{ - col.name: getattr(row.PropertyDetailsEpcModel, col.name) - for col in PropertyDetailsEpcModel.__table__.columns + col.name: getattr(epc_model, col.name) + for col in PropertyDetailsEpcModel.__table__.columns.values() }, } - for row in query + for property_model, epc_model in rows ] return pd.DataFrame(data) @@ -92,11 +97,11 @@ class DbMethods: # DISTINCT ON (property_id) keeps the first row per property, # ordered by created_at DESC so we get the newest one. - plans_query = ( - self.session.query(PlanModel) - .filter( + stmt = ( + select(PlanModel) + .where( PlanModel.portfolio_id == portfolio_id, - PlanModel.is_default.is_(True) + PlanModel.is_default.is_(True), ) .distinct(PlanModel.property_id) .order_by( @@ -110,11 +115,13 @@ class DbMethods: # DISTINCT ON (scenario_id, property_id) keeps the newest # plan per scenario/property combination. - plans_query = ( - self.session.query(PlanModel) - .filter( + assert scenario_ids is not None + + stmt = ( + select(PlanModel) + .where( PlanModel.portfolio_id == portfolio_id, - PlanModel.scenario_id.in_(scenario_ids) + PlanModel.scenario_id.in_(scenario_ids), ) .distinct( PlanModel.scenario_id, @@ -128,13 +135,14 @@ class DbMethods: ) logger.info("Fetching plans") - plans = plans_query.all() + + plans: Sequence[PlanModel] = self.session.scalars(stmt).all() return pd.DataFrame( [ { col.name: getattr(plan, col.name) - for col in PlanModel.__table__.columns + for col in PlanModel.__table__.columns.values() } for plan in plans ] @@ -146,35 +154,34 @@ class DbMethods: logger.info("No plan ids provided") return pd.DataFrame() - recs_query = ( - self.session.query( - Recommendation, - PlanModel.scenario_id, - PlanModel.name - ) + stmt = ( + select(Recommendation, PlanModel.scenario_id, PlanModel.name) .join( PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id, ) .join(PlanModel, PlanModel.id == PlanRecommendations.plan_id) - .filter( + .where( PlanRecommendations.plan_id.in_(plan_ids), Recommendation.default.is_(True), Recommendation.already_installed.is_(False), ) - .all() ) - data = [ + rows: Sequence[Tuple[Recommendation, Optional[int], Optional[str]]] = ( + self.session.execute(stmt).tuples().all() + ) + + data: List[Dict[str, Any]] = [ { **{ - col.name: getattr(r.Recommendation, col.name) - for col in Recommendation.__table__.columns + col.name: getattr(rec_model, col.name) + for col in Recommendation.__table__.columns.values() }, - "scenario_id": r.scenario_id, - "plan_name": r.name, + "scenario_id": scenario_id, + "plan_name": plan_name, } - for r in recs_query + for rec_model, scenario_id, plan_name in rows ] return pd.DataFrame(data) @@ -185,12 +192,14 @@ class DbMethods: recommendations_df["materials"] = [] return recommendations_df - rec_ids = recommendations_df["id"].tolist() + rec_ids: List[int] = [int(x) for x in recommendations_df["id"].tolist()] - materials_query = ( - self.session.query(RecommendationMaterials) - .filter(RecommendationMaterials.recommendation_id.in_(rec_ids)) - .all() + stmt = select(RecommendationMaterials).where( + RecommendationMaterials.recommendation_id.in_(rec_ids) + ) + + materials_query: Sequence[RecommendationMaterials] = ( + self.session.scalars(stmt).all() ) materials_map: Dict[int, List[Dict[str, Any]]] = defaultdict(list) @@ -206,8 +215,8 @@ class DbMethods: } ) - recommendations_df["materials"] = recommendations_df["id"].apply( - lambda x: materials_map.get(x, []) + recommendations_df["materials"] = recommendations_df["id"].astype(int).apply( + lambda x: materials_map.get(int(x), []) ) return recommendations_df From add53a194901ae161db6f60f712756e0ef060e36 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 26 Feb 2026 12:58:33 +0000 Subject: [PATCH 275/340] Correct maths and prepare to trigger sqs from api --- backend/app/db/functions/tasks/Tasks.py | 8 +- backend/app/plan/router.py | 75 +++++---- backend/app/plan/utils.py | 126 +++++++++------ .../categorisation_trigger_request.py | 1 - backend/categorisation/handler/handler.py | 4 +- .../categorisation/handler/requirements.txt | 4 +- backend/categorisation/processor.py | 144 ++++++++++-------- 7 files changed, 218 insertions(+), 144 deletions(-) diff --git a/backend/app/db/functions/tasks/Tasks.py b/backend/app/db/functions/tasks/Tasks.py index 13229447..0f987f3b 100644 --- a/backend/app/db/functions/tasks/Tasks.py +++ b/backend/app/db/functions/tasks/Tasks.py @@ -61,8 +61,12 @@ class SubTaskInterface: # UPDATE STATUS (in progress, complete, failed) # -------------------------------------------------------- def update_subtask_status( - self, subtask_id: UUID, status: str, outputs=None, cloud_logs_url=None - ): + self, + subtask_id: UUID, + status: str, + outputs: Optional[Dict[str, str]] = None, + cloud_logs_url: Optional[str] = None, + ) -> SubTask: """ Update the status of a subtask, and recalculate the parent task progress. :param subtask_id: UUID of the subtask to update diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index f45daea3..1ecd1f40 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -52,18 +52,20 @@ async def trigger_categorisation( logger.info("API triggered with body: %s", payload) - property_ids: List[int] = get_property_ids(payload.portfolio_id) + property_ids: list[int] = get_property_ids(payload.portfolio_id) property_ids.sort() num_scenarios: int = get_scenarios_count_by_portfolio_id(payload.portfolio_id) - batch_size: int = ( - math.ceil(1000 / num_scenarios) if num_scenarios > 1000 else num_scenarios - ) - num_property_buckets: int = max(1, math.ceil(len(property_ids) / batch_size)) + total_plans_to_update: int = len(property_ids) * num_scenarios - print("num_scenarios", num_scenarios) - print("batch_size", batch_size) - print("num_property_buckets", num_property_buckets) + max_writes_per_batch: int = 1000 + properties_per_batch: int = max(1, max_writes_per_batch // num_scenarios) + + num_property_batches: int = math.ceil(len(property_ids) / properties_per_batch) + + print("total_plans_to_update", total_plans_to_update) + print("properties_per_batch", properties_per_batch) + print("num_property_buckets", num_property_batches) # Create task # task_id, _ = TasksInterface.create_task( @@ -76,32 +78,43 @@ async def trigger_categorisation( # Dispatch requests to lambdas # subtask_interface = SubTaskInterface() - # for bucket_index in range(num_property_buckets): - # bucket_property_ids: List[int] = [ - # pid for pid in property_ids if pid % num_property_buckets == bucket_index - # ] - # bucket_request: CategorisationTriggerRequest = CategorisationTriggerRequest( - # portfolio_id=payload.portfolio_id, - # scenarios_to_consider=payload.scenarios_to_consider, - # scenario_priority_order=payload.scenario_priority_order, - # min_property_id=min(bucket_property_ids), - # max_property_id=max(bucket_property_ids), - # ) - # # Create sub-task for each - # subtask_id: UUID = subtask_interface.create_subtask( - # task_id=task_id, inputs=bucket_request.model_dump() - # ) + for batch_index in range(num_property_batches): - # response = sqs_client.send_message( - # QueueUrl="categorisation-queue-dev", - # MessageBody=bucket_request.model_dump_json(), - # ) + start: int = batch_index * properties_per_batch + end: int = start + properties_per_batch - # logger.info( - # f"Chunk {bucket_index} sent to SQS. Property IDs {min(bucket_property_ids)}–{max(bucket_property_ids)}. Message ID: {response.get('MessageId')}" - # ) + batch_property_ids: List[int] = property_ids[start:end] - # await asyncio.sleep(0.05) # Small delay to avoid SQS throttling + if not batch_property_ids: + continue + + # bucket_property_ids: List[int] = [ + # pid for pid in property_ids if pid % num_buckets == bucket_index + # ] + # bucket_request: CategorisationTriggerRequest = CategorisationTriggerRequest( + # portfolio_id=payload.portfolio_id, + # scenarios_to_consider=payload.scenarios_to_consider, + # scenario_priority_order=payload.scenario_priority_order, + # min_property_id=min(bucket_property_ids), + # max_property_id=max(bucket_property_ids), + # ) + # # Create sub-task for each + # subtask_id: UUID = subtask_interface.create_subtask( + # task_id=task_id, inputs=bucket_request.model_dump() + # ) + # bucket_request.subtask_id = str(subtask_id) + + # response = sqs_client.send_message( + # QueueUrl="categorisation-queue-dev", + # MessageBody=bucket_request.model_dump_json(), + # ) + + logger.info( + # f"Chunk {bucket_index} sent to SQS. Property IDs {min(bucket_property_ids)}–{max(bucket_property_ids)}. Message ID: {response.get('MessageId')}" + f"Chunk {batch_index} sent to SQS. Property IDs {min(batch_property_ids)}–{max(batch_property_ids)}" + ) + + await asyncio.sleep(0.05) # Small delay to avoid SQS throttling return {"message": "Categorisation jobs distributed"} diff --git a/backend/app/plan/utils.py b/backend/app/plan/utils.py index 10d7fb06..2237c38e 100644 --- a/backend/app/plan/utils.py +++ b/backend/app/plan/utils.py @@ -1,5 +1,6 @@ import ast import os +from typing import Optional import msgpack from uuid import UUID from utils.s3 import read_from_s3 @@ -24,7 +25,7 @@ def get_cleaned(): cleaned = read_from_s3( s3_file_name="cleaned_epc_data/cleaned.bson", - bucket_name=get_settings().DATA_BUCKET + bucket_name=get_settings().DATA_BUCKET, ) cleaned = msgpack.unpackb(cleaned, raw=False) @@ -56,32 +57,45 @@ def extract_property_request_data( ): patch_has_uprn = "uprn" in patches[0] if patches else True if patch_has_uprn: - patch = next(( - x for x in patches if str(x["uprn"]) == str(address.uprn) - ), {}) + patch = next((x for x in patches if str(x["uprn"]) == str(address.uprn)), {}) else: - patch = next(( - x for x in patches if (x["address"] == address.address) and (x["postcode"] == address.postcode) - ), {}) + patch = next( + ( + x + for x in patches + if (x["address"] == address.address) + and (x["postcode"] == address.postcode) + ), + {}, + ) # Because we have some non-invasive recommendations that match on address and postcode, but not UPRN # we need to check existence of uprn - has_uprn = "uprn" in non_invasive_recommendations[0] if non_invasive_recommendations else False + has_uprn = ( + "uprn" in non_invasive_recommendations[0] + if non_invasive_recommendations + else False + ) if has_uprn: has_uprn = non_invasive_recommendations[0]["uprn"] not in ["", None] if has_uprn: - property_non_invasive_recommendations = next(( - x for x in non_invasive_recommendations if - (str(x["uprn"]) == str(uprn)) - ), {}) + property_non_invasive_recommendations = next( + (x for x in non_invasive_recommendations if (str(x["uprn"]) == str(uprn))), + {}, + ) # We patch the non-invasive recs that are ['cavity_extract_and_refill'] else: - property_non_invasive_recommendations = next(( - x for x in non_invasive_recommendations if - (x["address"] == address.address) and (x["postcode"] == address.postcode) - ), {}) + property_non_invasive_recommendations = next( + ( + x + for x in non_invasive_recommendations + if (x["address"] == address.address) + and (x["postcode"] == address.postcode) + ), + {}, + ) if isinstance(property_non_invasive_recommendations.get("recommendations"), str): property_non_invasive_recommendations["recommendations"] = ast.literal_eval( @@ -90,7 +104,11 @@ def extract_property_request_data( transformed = [] for rec in property_non_invasive_recommendations["recommendations"]: if isinstance(rec, str): - transformed.append({"type": rec, }) + transformed.append( + { + "type": rec, + } + ) else: transformed.append(rec) @@ -102,26 +120,36 @@ def extract_property_request_data( valuation_has_uprn = valuation_data[0]["uprn"] not in ["", None] if valuation_has_uprn: - property_valuation = next(( - float(x["valuation"]) for x in valuation_data if - (str(x["uprn"]) == str(uprn)) - ), None) + property_valuation = next( + ( + float(x["valuation"]) + for x in valuation_data + if (str(x["uprn"]) == str(uprn)) + ), + None, + ) else: - property_valuation = next(( - float(x["valuation"]) for x in valuation_data if - (x["address"] == address.address) and (x["postcode"] == address.postcode) - ), None) + property_valuation = next( + ( + float(x["valuation"]) + for x in valuation_data + if (x["address"] == address.address) + and (x["postcode"] == address.postcode) + ), + None, + ) # Return data class to give a structured format return PropertyRequestData( patch=patch, non_invasive_recommendations=property_non_invasive_recommendations, - valuation=property_valuation + valuation=property_valuation, ) -def parse_eco_packages(addr: Address, prepared_epc) -> tuple[list[str], int, str, list[str]] | tuple[ - None, None, None, list]: +def parse_eco_packages( + addr: Address, prepared_epc +) -> tuple[list[str], int, str, list[str]] | tuple[None, None, None, list]: solar_identification = addr.solar_reason cavity_identification = addr.cavity_reason if not solar_identification and not cavity_identification: @@ -140,47 +168,51 @@ def parse_eco_packages(addr: Address, prepared_epc) -> tuple[list[str], int, str "Solar Eligible": { "measures": ["solar_pv", "loft_insulation", "mechanical_ventilation"], "target_sap": 86, # High B - "plan_type": "solar_eco4" + "plan_type": "solar_eco4", }, "Solar Eligible, Solid Wall Uninsulated, EPC E or Below": { "measures": ["solar_pv", "loft_insulation", "mechanical_ventilation"], "target_sap": 86, # High B - "plan_type": "solar_eco4" + "plan_type": "solar_eco4", }, "Solar Eligible, Needs Heating Upgrade": { - "measures": ["solar_pv", "loft_insulation", "high_heat_retention_storage_heaters", - "mechanical_ventilation"], + "measures": [ + "solar_pv", + "loft_insulation", + "high_heat_retention_storage_heaters", + "mechanical_ventilation", + ], "target_sap": 86, # High B - "plan_type": "solar_hhrsh_eco4" + "plan_type": "solar_hhrsh_eco4", }, "Non-Intrusive Data Shows Empty Cavity": { "measures": ["cavity_wall_insulation", "mechanical_ventilation"], "target_sap": 69, # Low C - "plan_type": "empty_cavity_eco" + "plan_type": "empty_cavity_eco", }, - 'Non-Intrusive Data Shows Empty Cavity, built after 2002': { + "Non-Intrusive Data Shows Empty Cavity, built after 2002": { "measures": ["cavity_wall_insulation", "mechanical_ventilation"], "target_sap": 69, # Low C - "plan_type": "empty_cavity_eco" + "plan_type": "empty_cavity_eco", }, "EPC Shows Empty Cavity, inspections show retro drilled": { # EPC Indicates it's empty, so we simulate a fill "measures": ["cavity_wall_insulation", "mechanical_ventilation"], "target_sap": 69, # Low C - "plan_type": "extraction_eco" + "plan_type": "extraction_eco", }, "EPC Shows Empty Cavity, inspections show filled at build": { # EPC Indicates it's empty, so we simulate a fill "measures": ["cavity_wall_insulation", "mechanical_ventilation"], "target_sap": 69, # Low C - "plan_type": "extraction_eco" + "plan_type": "extraction_eco", }, "EPC Shows Empty Cavity": { # EPC Indicates it's empty, so we simulate a fill "measures": ["cavity_wall_insulation", "mechanical_ventilation"], "target_sap": 69, # Low C - "plan_type": "empty_cavity_eco" - } + "plan_type": "empty_cavity_eco", + }, } # Always prioritise solar @@ -232,15 +264,21 @@ def build_cloudwatch_log_url(start_ms: int) -> str: ) -def handle_error(msg, e, subtask_id, status=500, start_ms=None): +def handle_error( + msg: str, + exception: Exception, + subtask_id: str, + status_code: int = 500, + start_ms: Optional[int] = None, +): # When the pipeline fails, handles error process cloud_logs_url = build_cloudwatch_log_url(start_ms) SubTaskInterface().update_subtask_status( subtask_id=UUID(subtask_id), status="failed", - outputs=str(e), - cloud_logs_url=cloud_logs_url + outputs=str(exception), + cloud_logs_url=cloud_logs_url, ) logger.error(msg, exc_info=True) - return Response(status_code=status, content=msg) + return Response(status_code=status_code, content=msg) diff --git a/backend/categorisation/categorisation_trigger_request.py b/backend/categorisation/categorisation_trigger_request.py index 17a5d916..62879b5d 100644 --- a/backend/categorisation/categorisation_trigger_request.py +++ b/backend/categorisation/categorisation_trigger_request.py @@ -11,7 +11,6 @@ class CategorisationTriggerRequest(BaseModel): min_property_id: Optional[int] = None max_property_id: Optional[int] = None - task_id: Optional[str] = None subtask_id: Optional[str] = None diff --git a/backend/categorisation/handler/handler.py b/backend/categorisation/handler/handler.py index eb532624..a1f69ea6 100644 --- a/backend/categorisation/handler/handler.py +++ b/backend/categorisation/handler/handler.py @@ -1,6 +1,9 @@ import json +import time from typing import Any, Mapping +from backend.app.db.functions.tasks.Tasks import SubTaskInterface +from backend.app.plan.utils import build_cloudwatch_log_url from backend.categorisation.categorisation_trigger_request import ( CategorisationTriggerRequest, ) @@ -26,7 +29,6 @@ def handler(event: Mapping[str, Any], context: Any) -> None: logger.debug("Successfully validated request body") process_portfolio(payload) - except Exception as e: logger.info("Handler exception") logger.error(f"Failed to process record: {e}") diff --git a/backend/categorisation/handler/requirements.txt b/backend/categorisation/handler/requirements.txt index cbc2687a..6e737772 100644 --- a/backend/categorisation/handler/requirements.txt +++ b/backend/categorisation/handler/requirements.txt @@ -1,10 +1,10 @@ sqlmodel pydantic-settings psycopg2-binary==2.9.10 +starlette # Not used but needed to satisfy imports pytz==2024.2 msgpack==1.1.0 numpy<2 -pandas==2.2.3 -starlette \ No newline at end of file +pandas==2.2.3 \ No newline at end of file diff --git a/backend/categorisation/processor.py b/backend/categorisation/processor.py index 7a7d48ca..a212aac9 100644 --- a/backend/categorisation/processor.py +++ b/backend/categorisation/processor.py @@ -2,6 +2,7 @@ import time from collections import defaultdict from typing import Dict, List, Optional from uuid import UUID +from starlette.responses import Response from backend.app.db.functions.recommendations_functions import ( bulk_update_plans, @@ -14,7 +15,7 @@ from backend.app.db.functions.tasks.Tasks import SubTaskInterface from backend.app.db.models.recommendations import PlanModel, ScenarioModel from backend.app.domain.classes.plan import Plan from backend.app.domain.classes.scenario import Scenario -from backend.app.plan.utils import build_cloudwatch_log_url +from backend.app.plan.utils import build_cloudwatch_log_url, handle_error from backend.categorisation.categorisation_trigger_request import ( CategorisationTriggerRequest, ) @@ -25,7 +26,7 @@ logger = setup_logger() def process_portfolio( body: CategorisationTriggerRequest, -) -> None: # TODO: make this a class +) -> Response: # TODO: make this a class portfolio_id: int = body.portfolio_id scenarios_to_consider: Optional[List[int]] = body.scenarios_to_consider scenario_priority_order: Optional[List[int]] = body.scenario_priority_order @@ -36,74 +37,91 @@ def process_portfolio( logger.info(f"Processing portfolio {portfolio_id}") start_ms = int(time.time() * 1000) - all_scenarios: List[Scenario] = _load_scenarios_for_portfolio(portfolio_id) - plans_by_id: Dict[int, Plan] = {} # TODO: make this an in-memory repository class + try: - if scenarios_to_consider: - if len(scenarios_to_consider) < 2: - raise ValueError( - "Cannot run auto categorisation for fewer than 2 scenarios" - ) + all_scenarios: List[Scenario] = _load_scenarios_for_portfolio(portfolio_id) + plans_by_id: Dict[int, Plan] = ( + {} + ) # TODO: make this an in-memory repository class - # first get all plans that we're interested in - plans_for_consideration: List[Plan] = _load_plans_for_portfolio( - portfolio_id, - all_scenarios, - scenarios_to_consider, - min_property_id, - max_property_id, - ) - for plan in plans_for_consideration: - if plan.id is not None: # just in case - plans_by_id[plan.id] = plan + if scenarios_to_consider: + if len(scenarios_to_consider) < 2: + raise ValueError( + "Cannot run auto categorisation for fewer than 2 scenarios" + ) - # then unset existing defaults on domain objects regardless of whether they're under consideration or not - default_plans: List[Plan] = _get_default_plans( - portfolio_id, all_scenarios, min_property_id, max_property_id - ) - for plan in default_plans: - plan.set_default(False) - if plan.id is not None: # just in case - plans_by_id[plan.id] = plan - - logger.info(f"Successfully unset {len(default_plans)} default plan(s)") - - # then set new defaults on domain objects under consideration - plans_for_consideration_by_property: Dict[int, List[Plan]] = ( - _group_plans_by_property(plans_for_consideration) - ) - - for property_id, property_plans in plans_for_consideration_by_property.items(): - if not property_plans: - raise ValueError(f"No plans for property {property_id}") - - try: - cheapest_plan = choose_cheapest_relevant_plan( - property_plans, scenario_priority_order - ) - except Exception: - logger.error(f"Failed to find cheapest plan for property {property_id}") - raise - - property_plans = _update_plan_objects(property_plans, cheapest_plan) - for plan in property_plans: + # first get all plans that we're interested in + plans_for_consideration: List[Plan] = _load_plans_for_portfolio( + portfolio_id, + all_scenarios, + scenarios_to_consider, + min_property_id, + max_property_id, + ) + for plan in plans_for_consideration: if plan.id is not None: # just in case plans_by_id[plan.id] = plan - logger.info("Successfully set defaults on Plan objects in memory") - - # then pass all domain objects to database to update (regardless of whether they've changed) - _update_plans_in_db(list(plans_by_id.values())) - logger.info(f"Successfully updated {len(plans_by_id)} Plans in database") - - # Mark the subtask as successful - if subtask_id: - cloud_logs_url = build_cloudwatch_log_url(start_ms) - SubTaskInterface().update_subtask_status( - subtask_id=UUID(subtask_id), - status="complete", - cloud_logs_url=cloud_logs_url, + # then unset existing defaults on domain objects regardless of whether they're under consideration or not + default_plans: List[Plan] = _get_default_plans( + portfolio_id, all_scenarios, min_property_id, max_property_id ) + for plan in default_plans: + plan.set_default(False) + if plan.id is not None: # just in case + plans_by_id[plan.id] = plan + + logger.info(f"Successfully unset {len(default_plans)} default plan(s)") + + # then set new defaults on domain objects under consideration + plans_for_consideration_by_property: Dict[int, List[Plan]] = ( + _group_plans_by_property(plans_for_consideration) + ) + + for property_id, property_plans in plans_for_consideration_by_property.items(): + if not property_plans: + raise ValueError(f"No plans for property {property_id}") + + try: + cheapest_plan = choose_cheapest_relevant_plan( + property_plans, scenario_priority_order + ) + except Exception: + logger.error(f"Failed to find cheapest plan for property {property_id}") + raise + + property_plans = _update_plan_objects(property_plans, cheapest_plan) + for plan in property_plans: + if plan.id is not None: # just in case + plans_by_id[plan.id] = plan + + logger.info("Successfully set defaults on Plan objects in memory") + + # then pass all domain objects to database to update (regardless of whether they've changed) + _update_plans_in_db(list(plans_by_id.values())) + + # Mark the subtask as successful + if body.subtask_id: + cloud_logs_url = build_cloudwatch_log_url(start_ms) + SubTaskInterface().update_subtask_status( + subtask_id=UUID(subtask_id), + status="complete", + cloud_logs_url=cloud_logs_url, + ) + logger.info(f"Successfully updated {len(plans_by_id)} Plans in database") + + return Response(status_code=200) + except Exception as e: + if subtask_id: + return handle_error( + "Exception during Categorisation processing.", + e, + subtask_id, + 500, + start_ms, + ) + + raise def choose_cheapest_relevant_plan( From 432f050e69751ea27db6b26aefccc4a719bd3e69 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 26 Feb 2026 14:15:34 +0000 Subject: [PATCH 276/340] trigger lambda and subtasks from api --- .devcontainer/backend/devcontainer.json | 3 +- backend/app/plan/router.py | 56 ++++++++++++------------- 2 files changed, 30 insertions(+), 29 deletions(-) diff --git a/.devcontainer/backend/devcontainer.json b/.devcontainer/backend/devcontainer.json index 3727d8a3..ac654ac1 100644 --- a/.devcontainer/backend/devcontainer.json +++ b/.devcontainer/backend/devcontainer.json @@ -6,7 +6,8 @@ "workspaceFolder": "/workspaces/model", "postStartCommand": "bash .devcontainer/backend/post-install.sh", "mounts": [ - "source=${localEnv:HOME},target=/home/vscode,type=bind" + // "source=${localEnv:HOME},target=/home/vscode,type=bind", + "source=${localEnv:HOME}/.aws,target=/home/vscode/.aws,type=bind,consistency=cached" ], "customizations": { "vscode": { diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 1ecd1f40..08138ff1 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -68,15 +68,15 @@ async def trigger_categorisation( print("num_property_buckets", num_property_batches) # Create task - # task_id, _ = TasksInterface.create_task( - # task_source="backend/plan/router.py:trigger_categorisation", - # service="plan_engine", - # inputs=payload.model_dump(), - # task_only=True, - # ) + task_id, _ = TasksInterface.create_task( + task_source="backend/plan/router.py:trigger_categorisation", + service="plan_engine", + inputs=payload.model_dump(), + task_only=True, + ) # Dispatch requests to lambdas - # subtask_interface = SubTaskInterface() + subtask_interface = SubTaskInterface() for batch_index in range(num_property_batches): @@ -88,30 +88,30 @@ async def trigger_categorisation( if not batch_property_ids: continue - # bucket_property_ids: List[int] = [ - # pid for pid in property_ids if pid % num_buckets == bucket_index - # ] - # bucket_request: CategorisationTriggerRequest = CategorisationTriggerRequest( - # portfolio_id=payload.portfolio_id, - # scenarios_to_consider=payload.scenarios_to_consider, - # scenario_priority_order=payload.scenario_priority_order, - # min_property_id=min(bucket_property_ids), - # max_property_id=max(bucket_property_ids), - # ) - # # Create sub-task for each - # subtask_id: UUID = subtask_interface.create_subtask( - # task_id=task_id, inputs=bucket_request.model_dump() - # ) - # bucket_request.subtask_id = str(subtask_id) + batch_property_ids: List[int] = [ + pid for pid in property_ids if pid % num_property_batches == batch_index + ] + batch_request: CategorisationTriggerRequest = CategorisationTriggerRequest( + portfolio_id=payload.portfolio_id, + scenarios_to_consider=payload.scenarios_to_consider, + scenario_priority_order=payload.scenario_priority_order, + min_property_id=min(batch_property_ids), + max_property_id=max(batch_property_ids), + ) + # Create sub-task for each + subtask_id: UUID = subtask_interface.create_subtask( + task_id=task_id, inputs=batch_request.model_dump() + ) + batch_request.subtask_id = str(subtask_id) - # response = sqs_client.send_message( - # QueueUrl="categorisation-queue-dev", - # MessageBody=bucket_request.model_dump_json(), - # ) + response = sqs_client.send_message( + QueueUrl="categorisation-queue-dev", + MessageBody=batch_request.model_dump_json(), + ) logger.info( - # f"Chunk {bucket_index} sent to SQS. Property IDs {min(bucket_property_ids)}–{max(bucket_property_ids)}. Message ID: {response.get('MessageId')}" - f"Chunk {batch_index} sent to SQS. Property IDs {min(batch_property_ids)}–{max(batch_property_ids)}" + f"Chunk {batch_index} sent to SQS. Property IDs {min(batch_property_ids)}–{max(batch_property_ids)}. Message ID: {response.get('MessageId')}" + # f"Chunk {batch_index} sent to SQS. Property IDs {min(batch_property_ids)}–{max(batch_property_ids)}" ) await asyncio.sleep(0.05) # Small delay to avoid SQS throttling From b7f7c40552e9bda485e0a9a45fa0fd874031fdd4 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 26 Feb 2026 14:27:52 +0000 Subject: [PATCH 277/340] remove accidental redefinition of variable. add extra information to log --- backend/app/plan/router.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 08138ff1..6c691cc1 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -88,9 +88,6 @@ async def trigger_categorisation( if not batch_property_ids: continue - batch_property_ids: List[int] = [ - pid for pid in property_ids if pid % num_property_batches == batch_index - ] batch_request: CategorisationTriggerRequest = CategorisationTriggerRequest( portfolio_id=payload.portfolio_id, scenarios_to_consider=payload.scenarios_to_consider, @@ -110,7 +107,7 @@ async def trigger_categorisation( ) logger.info( - f"Chunk {batch_index} sent to SQS. Property IDs {min(batch_property_ids)}–{max(batch_property_ids)}. Message ID: {response.get('MessageId')}" + f"Chunk {batch_index} sent to SQS. {len(batch_property_ids)} Property IDs in batch (total {len(property_ids)}). Property IDs {min(batch_property_ids)}–{max(batch_property_ids)}. Message ID: {response.get('MessageId')}" # f"Chunk {batch_index} sent to SQS. Property IDs {min(batch_property_ids)}–{max(batch_property_ids)}" ) From bb51d16f7f41d3c7064deb55beffa60eb08fa278 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 26 Feb 2026 16:38:44 +0000 Subject: [PATCH 278/340] get the task statuses to update --- backend/app/plan/router.py | 1 - backend/app/plan/utils.py | 4 ++++ .../local_handler/invoke_local_lambda.py | 7 ++++--- backend/categorisation/processor.py | 11 +++++++++-- 4 files changed, 17 insertions(+), 6 deletions(-) diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 6c691cc1..0e6b2aa3 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -108,7 +108,6 @@ async def trigger_categorisation( logger.info( f"Chunk {batch_index} sent to SQS. {len(batch_property_ids)} Property IDs in batch (total {len(property_ids)}). Property IDs {min(batch_property_ids)}–{max(batch_property_ids)}. Message ID: {response.get('MessageId')}" - # f"Chunk {batch_index} sent to SQS. Property IDs {min(batch_property_ids)}–{max(batch_property_ids)}" ) await asyncio.sleep(0.05) # Small delay to avoid SQS throttling diff --git a/backend/app/plan/utils.py b/backend/app/plan/utils.py index 2237c38e..7dfe5538 100644 --- a/backend/app/plan/utils.py +++ b/backend/app/plan/utils.py @@ -246,9 +246,13 @@ def build_cloudwatch_log_url(start_ms: int) -> str: Build a CloudWatch Logs URL for the current Lambda invocation, including timestamp window from start_ms to end_ms (epoch ms). """ + logger.info("Building cloudwatch logs URL") region = os.environ["AWS_REGION"] + logger.info("Building cloudwatch logs URL: Got AWS region") log_group = os.environ["AWS_LAMBDA_LOG_GROUP_NAME"] + logger.info("Building cloudwatch logs URL: Got lambda log group name") log_stream = os.environ["AWS_LAMBDA_LOG_STREAM_NAME"] + logger.info("Building cloudwatch logs URL: Got lambda log stream name") # CloudWatch console requires / encoded as $252F encoded_group = log_group.replace("/", "$252F") diff --git a/backend/categorisation/local_handler/invoke_local_lambda.py b/backend/categorisation/local_handler/invoke_local_lambda.py index 8504ff55..0897e7f2 100644 --- a/backend/categorisation/local_handler/invoke_local_lambda.py +++ b/backend/categorisation/local_handler/invoke_local_lambda.py @@ -9,11 +9,12 @@ payload = { { "body": json.dumps( { - "portfolio_id": 556, + "portfolio_id": 569, "scenarios_to_consider": [], "scenario_priority_order": [], - "min_property_id": 653150, - "max_property_id": 653150, + "min_property_id": 660418, + "max_property_id": 660917, + "subtask_id": "6a0bcbac-ddab-435f-8708-8acd4662b067", } ) } diff --git a/backend/categorisation/processor.py b/backend/categorisation/processor.py index a212aac9..88bc121e 100644 --- a/backend/categorisation/processor.py +++ b/backend/categorisation/processor.py @@ -36,6 +36,14 @@ def process_portfolio( logger.info(f"Processing portfolio {portfolio_id}") start_ms = int(time.time() * 1000) + cloud_logs_url = build_cloudwatch_log_url(start_ms) + + if body.subtask_id: + SubTaskInterface().update_subtask_status( + subtask_id=UUID(subtask_id), + status="in progress", + cloud_logs_url=cloud_logs_url, + ) try: @@ -101,14 +109,13 @@ def process_portfolio( _update_plans_in_db(list(plans_by_id.values())) # Mark the subtask as successful + logger.info(f"Successfully updated {len(plans_by_id)} Plans in database") if body.subtask_id: - cloud_logs_url = build_cloudwatch_log_url(start_ms) SubTaskInterface().update_subtask_status( subtask_id=UUID(subtask_id), status="complete", cloud_logs_url=cloud_logs_url, ) - logger.info(f"Successfully updated {len(plans_by_id)} Plans in database") return Response(status_code=200) except Exception as e: From 1df4fb781547caf89c2c1cd581d7044adcb2128c Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 26 Feb 2026 16:53:23 +0000 Subject: [PATCH 279/340] working on export issues --- .idea/watcherTasks.xml | 2 +- backend/export/property_scenarios/main.py | 1 + backend/export/tests/test_export.py | 42 +++++++++++++++++------ sfr/principal_pitch/2_export_data.py | 15 ++++---- 4 files changed, 42 insertions(+), 18 deletions(-) diff --git a/.idea/watcherTasks.xml b/.idea/watcherTasks.xml index 2a14ba99..60d7e26a 100644 --- a/.idea/watcherTasks.xml +++ b/.idea/watcherTasks.xml @@ -1,7 +1,7 @@ - +