diff --git a/.devcontainer/asset_list/devcontainer.json b/.devcontainer/asset_list/devcontainer.json index 4834d559..945dcd88 100644 --- a/.devcontainer/asset_list/devcontainer.json +++ b/.devcontainer/asset_list/devcontainer.json @@ -22,7 +22,9 @@ "jgclark.vscode-todo-highlight", "corentinartaud.pdfpreview", "ms-python.vscode-python-envs", - "ms-python.black-formatter" + "ms-python.black-formatter", + "GrapeCity.gc-excelviewer", + "jakobhoeg.vscode-pokemon" ], "settings": { "files.defaultWorkspace": "/workspaces/model", diff --git a/.devcontainer/backend/Dockerfile b/.devcontainer/backend/Dockerfile index 4c5d16f5..662f53b0 100644 --- a/.devcontainer/backend/Dockerfile +++ b/.devcontainer/backend/Dockerfile @@ -43,4 +43,24 @@ WORKDIR /workspaces/model # 6) Make Python find your package # Add project root to PYTHONPATH for all processes -ENV PYTHONPATH=/workspaces/model:${PYTHONPATH} \ No newline at end of file +ENV PYTHONPATH=/workspaces/model:${PYTHONPATH} + + +# Install terraform +RUN apt-get update && sudo apt-get install -y gnupg software-properties-common +RUN wget -O- https://apt.releases.hashicorp.com/gpg | \ +gpg --dearmor | \ +sudo tee /usr/share/keyrings/hashicorp-archive-keyring.gpg > /dev/null +RUN echo "deb [signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] \ +https://apt.releases.hashicorp.com $(lsb_release -cs) main" | \ +tee /etc/apt/sources.list.d/hashicorp.list +RUN apt update +RUN apt-get install terraform +RUN terraform -install-autocomplete + +# Install postgres +RUN apt install -y wget gnupg2 lsb-release +RUN echo "deb http://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" | sudo tee /etc/apt/sources.list.d/pgdg.list +RUN wget -qO - https://www.postgresql.org/media/keys/ACCC4CF8.asc | sudo apt-key add - +RUN apt update +RUN apt install -y postgresql-14 \ No newline at end of file diff --git a/.devcontainer/backend/devcontainer.json b/.devcontainer/backend/devcontainer.json index c672b1bf..ac654ac1 100644 --- a/.devcontainer/backend/devcontainer.json +++ b/.devcontainer/backend/devcontainer.json @@ -6,7 +6,8 @@ "workspaceFolder": "/workspaces/model", "postStartCommand": "bash .devcontainer/backend/post-install.sh", "mounts": [ - "source=${localEnv:HOME},target=/workspaces/home,type=bind" + // "source=${localEnv:HOME},target=/home/vscode,type=bind", + "source=${localEnv:HOME}/.aws,target=/home/vscode/.aws,type=bind,consistency=cached" ], "customizations": { "vscode": { @@ -22,7 +23,11 @@ "corentinartaud.pdfpreview", "ms-python.vscode-python-envs", "ms-python.black-formatter", - "waderyan.gitblame" + "waderyan.gitblame", + "GrapeCity.gc-excelviewer", + "jakobhoeg.vscode-pokemon", + "github.vscode-github-actions", + "me-dutour-mathieu.vscode-github-actions" ], "settings": { "files.defaultWorkspace": "/workspaces/model", @@ -38,3 +43,4 @@ "PYTHONFLAGS": "-Xfrozen_modules=off" } } + \ No newline at end of file diff --git a/.devcontainer/backend/requirements.txt b/.devcontainer/backend/requirements.txt index 9562aa6a..5cd40ced 100644 --- a/.devcontainer/backend/requirements.txt +++ b/.devcontainer/backend/requirements.txt @@ -9,7 +9,7 @@ mangum==0.19.0 # AWS boto3==1.35.44 # Data -openpyxl==3.1.2 +openpyxl==3.1.5 # Basic pytz uvicorn[standard] @@ -18,5 +18,9 @@ sqlmodel pytest==9.0.2 pytest-cov==7.0.0 ipykernel>=6.25,<7 +dotenv +psycopg[binary] +pytest-postgresql # Formatting -black==26.1.0 \ No newline at end of file +black==26.1.0 +boto3-stubs \ No newline at end of file diff --git a/.github/workflows/_build_image.yml b/.github/workflows/_build_image.yml index 641e31f9..3435c92d 100644 --- a/.github/workflows/_build_image.yml +++ b/.github/workflows/_build_image.yml @@ -38,6 +38,8 @@ on: required: false DEV_DB_NAME: required: false + EPC_AUTH_TOKEN: + required: false jobs: build: @@ -47,6 +49,7 @@ jobs: DEV_DB_HOST: ${{ secrets.DEV_DB_HOST }} DEV_DB_PORT: ${{ secrets.DEV_DB_PORT }} DEV_DB_NAME: ${{ secrets.DEV_DB_NAME }} + EPC_AUTH_TOKEN: ${{ secrets.EPC_AUTH_TOKEN }} outputs: image_digest: ${{ steps.digest.outputs.image_digest }} @@ -87,14 +90,17 @@ jobs: temp=$(eval echo "$line") BUILD_ARGS="$BUILD_ARGS --build-arg $temp" done <<< "${{ inputs.build_args }}" - - docker build \ + + docker buildx build \ + --no-cache \ + --platform linux/amd64 \ + --provenance=false \ + --sbom=false \ + --push \ -f ${{ inputs.dockerfile_path }} \ $BUILD_ARGS \ -t $IMAGE_URI \ ${{ inputs.build_context }} - - docker push $IMAGE_URI - name: Resolve image digest id: digest diff --git a/.github/workflows/_deploy_lambda.yml b/.github/workflows/_deploy_lambda.yml index 3612ab43..ce1a0e77 100644 --- a/.github/workflows/_deploy_lambda.yml +++ b/.github/workflows/_deploy_lambda.yml @@ -42,6 +42,22 @@ on: required: true AWS_REGION: required: true + TF_VAR_db_host: + required: false + TF_VAR_db_name: + required: false + TF_VAR_db_port: + required: false + TF_VAR_api_key: + required: false + TF_VAR_secret_key: + required: false + TF_VAR_domain_name: + required: false + TF_VAR_epc_auth_token: + required: false + TF_VAR_google_solar_api_key: + required: false jobs: deploy: @@ -90,6 +106,15 @@ jobs: - name: Terraform Plan working-directory: ${{ inputs.lambda_path }} + env: + TF_VAR_db_host: ${{ secrets.TF_VAR_db_host }} + TF_VAR_db_name: ${{ secrets.TF_VAR_db_name }} + TF_VAR_db_port: ${{ secrets.TF_VAR_db_port }} + TF_VAR_api_key: ${{ secrets.TF_VAR_api_key }} + TF_VAR_secret_key: ${{ secrets.TF_VAR_secret_key }} + TF_VAR_domain_name: ${{ secrets.TF_VAR_domain_name }} + TF_VAR_epc_auth_token: ${{ secrets.TF_VAR_epc_auth_token }} + TF_VAR_google_solar_api_key: ${{ secrets.TF_VAR_google_solar_api_key }} run: | terraform plan \ -var="stage=${{ inputs.stage }}" \ @@ -106,4 +131,18 @@ jobs: - name: Terraform Destroy if: inputs.terraform_destroy == 'true' && inputs.terraform_apply != 'true' working-directory: ${{ inputs.lambda_path }} - run: terraform destroy -auto-approve \ No newline at end of file + env: + TF_VAR_db_host: ${{ secrets.TF_VAR_db_host }} + TF_VAR_db_name: ${{ secrets.TF_VAR_db_name }} + TF_VAR_db_port: ${{ secrets.TF_VAR_db_port }} + TF_VAR_api_key: ${{ secrets.TF_VAR_api_key }} + TF_VAR_secret_key: ${{ secrets.TF_VAR_secret_key }} + TF_VAR_domain_name: ${{ secrets.TF_VAR_domain_name }} + TF_VAR_epc_auth_token: ${{ secrets.TF_VAR_epc_auth_token }} + TF_VAR_google_solar_api_key: ${{ secrets.TF_VAR_google_solar_api_key }} + run: | + terraform destroy -auto-approve \ + -var="stage=${{ inputs.stage }}" \ + -var="lambda_name=${{ inputs.lambda_name }}" \ + -var="ecr_repo_url=${{ steps.repo.outputs.ecr_repo_url }}" \ + -var="image_digest=${{ inputs.image_digest }}" diff --git a/.github/workflows/deploy_fastapi_backend.yml b/.github/workflows/deploy_fastapi_backend.yml index 32e30bfa..7b00d3f2 100644 --- a/.github/workflows/deploy_fastapi_backend.yml +++ b/.github/workflows/deploy_fastapi_backend.yml @@ -87,7 +87,13 @@ jobs: - name: Build Docker Image For Engine run: | - docker build -t fastapi-lambda-image:${{ github.sha }} -f backend/docker/engine.Dockerfile . --load + docker buildx build \ + --platform linux/amd64 \ + --provenance=false \ + --output=type=docker \ + -t fastapi-lambda-image:${{ github.sha }} \ + -f backend/docker/engine.Dockerfile \ + . - name: Login to ECR run: | @@ -135,3 +141,4 @@ jobs: # Deploy to AWS Lambda via Serverless sls deploy --stage ${{ github.ref_name }} --verbose + diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index 71e2ad9d..c360aadf 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -3,12 +3,9 @@ name: Deploy infrastructure on: push: branches: - - "**" - paths: - - 'infrastructure/terraform/**' - - '.github/workflows/deploy_terraform.yml' - - '.github/workflows/_build_image.yml' - - '.github/workflows/_deploy_lambda.yml' + - "dev" + - "prod" + workflow_dispatch: jobs: determine_stage: @@ -51,6 +48,7 @@ jobs: runs-on: ubuntu-latest env: STAGE: ${{ needs.determine_stage.outputs.stage }} + TERRAFORM_APPLY: ${{ needs.determine_stage.outputs.terraform_apply }} steps: - uses: actions/checkout@v4 @@ -76,10 +74,10 @@ jobs: run: terraform plan -var-file=${STAGE}.tfvars -out=tfplan - name: Terraform Apply - if: env.STAGE == 'prod' + if: env.TERRAFORM_APPLY == 'true' working-directory: infrastructure/terraform/shared run: terraform apply -auto-approve tfplan - + # ============================================================ # 2️⃣ Build Address 2 UPRN image and Push # ============================================================ @@ -90,10 +88,19 @@ jobs: ecr_repo: address2uprn-${{ needs.determine_stage.outputs.stage }} dockerfile_path: backend/address2UPRN/handler/Dockerfile build_context: . + build_args: | + DEV_DB_HOST=$DEV_DB_HOST + DEV_DB_PORT=$DEV_DB_PORT + DEV_DB_NAME=$DEV_DB_NAME + EPC_AUTH_TOKEN=$EPC_AUTH_TOKEN secrets: AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} AWS_REGION: ${{ secrets.DEV_AWS_REGION }} + DEV_DB_HOST: ${{ secrets.DEV_DB_HOST }} + DEV_DB_PORT: ${{ secrets.DEV_DB_PORT }} + DEV_DB_NAME: ${{ secrets.DEV_DB_NAME }} + EPC_AUTH_TOKEN: ${{ secrets.DEV_EPC_AUTH_TOKEN }} # ============================================================ # 3️⃣ Deploy Address 2 UPRN Lambda @@ -140,7 +147,7 @@ jobs: # 3️⃣ Deploy Postcode Splitter Lambda # ============================================================ postcodeSplitter_lambda: - needs: [postcodeSplitter_image, determine_stage] + needs: [postcodeSplitter_image, determine_stage, address2uprn_lambda] uses: ./.github/workflows/_deploy_lambda.yml with: lambda_name: postcodeSplitter @@ -192,4 +199,85 @@ jobs: secrets: AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} - AWS_REGION: ${{ secrets.DEV_AWS_REGION }} \ No newline at end of file + AWS_REGION: ${{ secrets.DEV_AWS_REGION }} + + # ============================================================ + # Categorisation image and Push + # ============================================================ + categorisation_image: + needs: [determine_stage, shared_terraform] + uses: ./.github/workflows/_build_image.yml + with: + ecr_repo: categorisation-${{ needs.determine_stage.outputs.stage }} + dockerfile_path: backend/categorisation/handler/Dockerfile + build_context: . + build_args: | + DEV_DB_HOST=$DEV_DB_HOST + DEV_DB_PORT=$DEV_DB_PORT + DEV_DB_NAME=$DEV_DB_NAME + secrets: + AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} + AWS_REGION: ${{ secrets.DEV_AWS_REGION }} + DEV_DB_HOST: ${{ secrets.DEV_DB_HOST }} + DEV_DB_PORT: ${{ secrets.DEV_DB_PORT }} + DEV_DB_NAME: ${{ secrets.DEV_DB_NAME }} + + # ============================================================ + # Deploy Categorisation Lambda + # ============================================================ + categorisation_lambda: + needs: [categorisation_image, determine_stage] + uses: ./.github/workflows/_deploy_lambda.yml + with: + lambda_name: categorisation + lambda_path: infrastructure/terraform/lambda/categorisation + stage: ${{ needs.determine_stage.outputs.stage }} + ecr_repo: categorisation-${{ needs.determine_stage.outputs.stage }} + image_digest: ${{ needs.categorisation_image.outputs.image_digest }} + terraform_apply: ${{ needs.determine_stage.outputs.terraform_apply }} + secrets: + AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} + AWS_REGION: ${{ secrets.DEV_AWS_REGION }} + + # ============================================================ + # Ara Engine image and Push + # ============================================================ + ara_engine_image: + needs: [determine_stage, shared_terraform] + uses: ./.github/workflows/_build_image.yml + with: + ecr_repo: engine-${{ needs.determine_stage.outputs.stage }} + dockerfile_path: backend/docker/engine.Dockerfile + build_context: . + secrets: + AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} + AWS_REGION: ${{ secrets.DEV_AWS_REGION }} + + # ============================================================ + # Deploy Categorisation Lambda + # ============================================================ + ara_engine_lambda: + needs: [ara_engine_image, determine_stage] + uses: ./.github/workflows/_deploy_lambda.yml + with: + lambda_name: ara_engine + lambda_path: infrastructure/terraform/lambda/engine + stage: ${{ needs.determine_stage.outputs.stage }} + ecr_repo: engine-${{ needs.determine_stage.outputs.stage }} + image_digest: ${{ needs.ara_engine_image.outputs.image_digest }} + terraform_apply: ${{ needs.determine_stage.outputs.terraform_apply }} + secrets: + AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} + AWS_REGION: ${{ secrets.DEV_AWS_REGION }} + TF_VAR_db_host: ${{ secrets.DEV_DB_HOST }} + TF_VAR_db_name: ${{ secrets.DEV_DB_NAME }} + TF_VAR_db_port: ${{ secrets.DEV_DB_PORT }} + TF_VAR_api_key: ${{ secrets.DEV_API_KEY }} + TF_VAR_secret_key: ${{ secrets.DEV_SECRET_KEY }} + TF_VAR_domain_name: ${{ secrets.DEV_DOMAIN_NAME }} + TF_VAR_epc_auth_token: ${{ secrets.DEV_EPC_AUTH_TOKEN }} + TF_VAR_google_solar_api_key: ${{ secrets.DEV_GOOGLE_SOLAR_API_KEY }} \ No newline at end of file diff --git a/.gitignore b/.gitignore index 6268360b..68e66052 100644 --- a/.gitignore +++ b/.gitignore @@ -279,4 +279,7 @@ cache/ *.png *.pptx -local_data* \ No newline at end of file +local_data* + +# pyright local config +pyrightconfig.json \ No newline at end of file diff --git a/.idea/Model.iml b/.idea/Model.iml index 09f2e496..0b8ab409 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -10,4 +10,7 @@ + + \ No newline at end of file diff --git a/.idea/watcherTasks.xml b/.idea/watcherTasks.xml new file mode 100644 index 00000000..60d7e26a --- /dev/null +++ b/.idea/watcherTasks.xml @@ -0,0 +1,25 @@ + + + + + + + + \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json index 3d4c6b42..b294c736 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -9,12 +9,14 @@ "path": "/bin/bash" } }, -<<<<<<< HEAD -======= "python.testing.unittestEnabled": false, "python.testing.pytestEnabled": true, - "python.testing.pytestArgs": ["-s", "-q", "--no-cov"] ->>>>>>> 11b482838efcf46f376fd3ecbf2c1bb0be6d097d + "python.testing.pytestArgs": ["-s", "-q", "--no-cov"], + + "python.languageServer": "Pylance", + "python.analysis.typeCheckingMode": "strict", + "python.analysis.autoSearchPaths": true, + "python.analysis.extraPaths": ["./src"] // Hot reload setting that needs to be in user settings // "jupyter.runStartupCommands": [ diff --git a/asset_list/AssetList.py b/asset_list/AssetList.py index ea4d8b34..dede3162 100644 --- a/asset_list/AssetList.py +++ b/asset_list/AssetList.py @@ -25,17 +25,19 @@ import asset_list.mappings.outcomes as outcomes_mappings from recommendations.recommendation_utils import ( estimate_perimeter, estimate_external_wall_area, - estimate_number_of_floors + estimate_number_of_floors, ) from etl.epc_clean.epc_attributes.RoofAttributes import RoofAttributes from etl.epc_clean.epc_attributes.WallAttributes import WallAttributes +from dotenv import load_dotenv + logger = setup_logger() +load_dotenv(dotenv_path="../backend/.env") # OpenAI API Key (set this in your environment variables for security) -OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "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") - +OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY") class DataRemapper: @@ -61,7 +63,9 @@ class DataRemapper: self.max_tokens = max_tokens # Limit for OpenAI API # Memoization for AI calls - self.ai_cache = {} # {tuple(unmapped_values): {original_value: standardized_value}} + self.ai_cache = ( + {} + ) # {tuple(unmapped_values): {original_value: standardized_value}} # Capture the reponse for debugging self.ai_response = None @@ -79,14 +83,16 @@ class DataRemapper: if not isinstance(text, str): return None text = text.strip().lower() - text = re.sub(r'[^\w\s]', '', text) # Remove punctuation + text = re.sub(r"[^\w\s]", "", text) # Remove punctuation # Replace double strings - text = re.sub(r'\s+', ' ', text) + text = re.sub(r"\s+", " ", text) return text def fuzzy_match(self, text): """Use fuzzy matching to find the closest standard value.""" - match, score = process.extractOne(text, self.standard_values) if text else (None, 0) + match, score = ( + process.extractOne(text, self.standard_values) if text else (None, 0) + ) return match if score >= self.fuzzy_threshold else None def count_tokens(self, text): @@ -98,7 +104,9 @@ class DataRemapper: if not unmapped_values: return {} - unmapped_tuple = tuple(sorted(unmapped_values)) # Ensure consistency for memoization + unmapped_tuple = tuple( + sorted(unmapped_values) + ) # Ensure consistency for memoization if unmapped_tuple in self.ai_cache: return self.ai_cache[unmapped_tuple] # Return memoized result @@ -180,7 +188,9 @@ class DataRemapper: # Rule-Based Check (Predefined Mapping) if cleaned_value in self.standard_map or value in self.standard_map: self.remap_dict[value] = ( - self.standard_map[cleaned_value] if cleaned_value in self.standard_map else self.standard_map[value] + self.standard_map[cleaned_value] + if cleaned_value in self.standard_map + else self.standard_map[value] ) continue @@ -237,22 +247,22 @@ class AssetList: "roof-description": "epc_roof_construction", "floor-description": "epc_floor_construction", "mainheat-description": "epc_heating_type", - 'mainheatcont-description': "epc_heating_controls", + "mainheatcont-description": "epc_heating_controls", "secondheat-description": "epc_secondary_heating", "transaction-type": "epc_reason", "energy-consumption-current": "epc_heat_demand", "photo-supply": "epc_photo_supply", - "estimated": "estimated" + "estimated": "estimated", } FIND_EPC_DATA_NAMES = { "heating_text": "epc_estiamted_heating_kwh", "hot_water_text": "epc_estimated_hotwater_kwh", - 'Assessor’s name': "epc_assessor_name", + "Assessor’s name": "epc_assessor_name", "Assessor's Telephone": "epc_assessor_telephone", "Assessor's Email": "epc_assessor_email", "Accreditation scheme": "epc_assessor_accreditation", "Assessor’s ID": "epc_assessor_id", - "Solar photovoltaics": "epc_solar_pv" + "Solar photovoltaics": "epc_solar_pv", } DATETIME_REMAP = { @@ -286,44 +296,69 @@ class AssetList: DOMNA_PROPERTY_ID = "domna_property_id" # Regular expression for identifying if the address might point to multiple units - MULTI_UNIT_REGEX = re.compile(r'\b([A-Za-z0-9]+)-([A-Za-z0-9]+)\b') + MULTI_UNIT_REGEX = re.compile(r"\b([A-Za-z0-9]+)-([A-Za-z0-9]+)\b") # List of columns relating to the non-intrusive data NON_INTRUSIVES_COLNAMES = [ - "Archetype", "Construction", "Insulated", "Material", "CIGA Check Required", - "PV, ACCESS ISSUE, SEE NOTES", "OFF GAS - ROOF ORIENTATION", - "Any further surveyor notes", 'Surveyors Name' + "Archetype", + "Construction", + "Insulated", + "Material", + "CIGA Check Required", + "PV, ACCESS ISSUE, SEE NOTES", + "OFF GAS - ROOF ORIENTATION", + "Any further surveyor notes", + "Surveyors Name", ] NON_INTRUSIVES_NEW_FORMAT_COLNAMES = [ - "Has the property been re-walled?", "Is the property tile hung?", "Does the property have a render?", - "Does the property have cladding?", "Gable Wall Obstructions", + "Has the property been re-walled?", + "Is the property tile hung?", + "Does the property have a render?", + "Does the property have cladding?", + "Gable Wall Obstructions", "Does the property have foliage that needs removal?", - "Potential unsafe environment", "Date of Inspection", "Borescoped?" + "Potential unsafe environment", + "Date of Inspection", + "Borescoped?", ] # Another version of non-intrusives: NON_INTRUSIVES_NEW_FORMAT_COLNAMES_V2 = [ - 'Archetype', 'Archetype 2', 'Construction', 'Insulated', 'Material', 'Borescoped?', - 'CIGA Check Required', 'ROOF ORIENTATION', 'TILE HUNG', 'RENDERED', - 'CLADDING', 'ACCESS ISSUES', 'FURTHER SURVEYOR NOTES', 'DATE', - 'NAME OF SURVEYOR' + "Archetype", + "Archetype 2", + "Construction", + "Insulated", + "Material", + "Borescoped?", + "CIGA Check Required", + "ROOF ORIENTATION", + "TILE HUNG", + "RENDERED", + "CLADDING", + "ACCESS ISSUES", + "FURTHER SURVEYOR NOTES", + "DATE", + "NAME OF SURVEYOR", ] # Solar non-intrusive fields NON_INTRUSIVES_SOLAR_COLNAMES = [ - 'PV, ACCESS ISSUE, SEE NOTES', 'ROOF ORIENTATION', - 'AREA (m²) OF ROOF WHERE PV WILL BE SITUATED ', 'SHADING', - 'Roof Tiles - CONCRETE/SLATE/ROSEMARY', - 'NO. OF PANELS (Typical size of 420W panel is 1mx1.7m and need 30cm all the way around panels)', - 'SCAFFOLD REQUIRED? IF YES, ARE THERE ANY SURROUNDING ACCESS ISSUES - PLEASE DESCRIBE', - 'IF PANELS ARE GOING ON REAR PLEASE CHECK FOR SPACE FOR SCAFFOLDING - DESCRIBE ANY ISSUES BELOW', - 'DATE', 'NAME OF SURVEYOR' + "PV, ACCESS ISSUE, SEE NOTES", + "ROOF ORIENTATION", + "AREA (m²) OF ROOF WHERE PV WILL BE SITUATED ", + "SHADING", + "Roof Tiles - CONCRETE/SLATE/ROSEMARY", + "NO. OF PANELS (Typical size of 420W panel is 1mx1.7m and need 30cm all the way around panels)", + "SCAFFOLD REQUIRED? IF YES, ARE THERE ANY SURROUNDING ACCESS ISSUES - PLEASE DESCRIBE", + "IF PANELS ARE GOING ON REAR PLEASE CHECK FOR SPACE FOR SCAFFOLDING - DESCRIBE ANY ISSUES BELOW", + "DATE", + "NAME OF SURVEYOR", ] NON_INTRUSIVES_ELIGIBILITY_COLUMN = "Eligibility (Red/Yellow/Green)" - OLD_FORMAT_NON_INTRUSIVE_COLNAMES = ['WFT Findings', 'ECO Eligibility'] + OLD_FORMAT_NON_INTRUSIVE_COLNAMES = ["WFT Findings", "ECO Eligibility"] # This SAP threshold is a key search criteria for properties that may be eligible for extraction FILLED_CAVITY_SAP_THRESHOLD = 75 @@ -341,7 +376,9 @@ class AssetList: ATTRIBUTE_ESTIMATED_PERIMETER = "attribute_est_perimter" ATTRIBUTE_HEAT_LOSS_AREA = "attribute_heat_loss_area" ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS = "attribute_epc_roof_insulation_thickness" - ATTRIBUTE_SAP_THRESHOLD_AND_BELOW = f"sap_rating_{FILLED_CAVITY_SAP_THRESHOLD}_and_below" + ATTRIBUTE_SAP_THRESHOLD_AND_BELOW = ( + f"sap_rating_{FILLED_CAVITY_SAP_THRESHOLD}_and_below" + ) ATTRIBUTE_EPC_PRE_YEAR_THRESHOLD = f"epc_is_pre_{EPC_YEAR_THRESHOLD}" # These are the descriptions that we look for in the EPC data that are indicative of no insulation @@ -354,12 +391,17 @@ class AssetList: # List of strings that we look for in the EPC data, where substrings indicate that the wall is insulated EPC_INSULATED_WALLS_SUBSTRINGS = [ - ", insulated", "with external insulation", "with internal insulation", "filled cavity" + ", insulated", + "with external insulation", + "with internal insulation", + "filled cavity", ] # List of strings that we look for in the EPC data, where substrings indicate that the roof is insulated EPC_INSULATED_ROOF_SUBSTRINGS = [ - "(another dwelling above)", ", insulated", ", insulated (assumed) ", + "(another dwelling above)", + ", insulated", + ", insulated (assumed) ", ", ceiling insulated", ] @@ -374,35 +416,69 @@ class AssetList: # Work type prefixes: # Empties EMPTY_CAVITY_NON_INTRUSIVE = "Non-Intrusive Data Shows Empty Cavity" - EMPTY_CAVITY_NON_INTRUSIVE_YEAR = 'Non-Intrusive Data Shows Empty Cavity, built after 2002' - EPC_EMPTY_INSPECTIONS_RETRO_DRILLED = "EPC Shows Empty Cavity, inspections show retro drilled" - EPC_EMPTY_INSPECTIONS_FILLED = "EPC Shows Empty Cavity, inspections show filled or other" - EPC_EMPTY_INSPECTIONS_FILLED_AT_BUILD = "EPC Shows Empty Cavity, inspections show filled at build" - EPC_EMPTY_INSPECTIONS_NON_CAVITY = "EPC Shows Empty Cavity, inspections show non-cavity build" + EMPTY_CAVITY_NON_INTRUSIVE_YEAR = ( + "Non-Intrusive Data Shows Empty Cavity, built after 2002" + ) + EPC_EMPTY_INSPECTIONS_RETRO_DRILLED = ( + "EPC Shows Empty Cavity, inspections show retro drilled" + ) + EPC_EMPTY_INSPECTIONS_FILLED = ( + "EPC Shows Empty Cavity, inspections show filled or other" + ) + EPC_EMPTY_INSPECTIONS_FILLED_AT_BUILD = ( + "EPC Shows Empty Cavity, inspections show filled at build" + ) + EPC_EMPTY_INSPECTIONS_NON_CAVITY = ( + "EPC Shows Empty Cavity, inspections show non-cavity build" + ) EPC_EMPTY = "EPC Shows Empty Cavity" - LANDLORD_EMPTY_INSPECTIONS_OTHER = ("Landlord Data Shows Empty Cavity, EPC & Inspections Shows Filled or " - "Non-cavity") + LANDLORD_EMPTY_INSPECTIONS_OTHER = ( + "Landlord Data Shows Empty Cavity, EPC & Inspections Shows Filled or " + "Non-cavity" + ) # Extraction EXTRACTION_NON_INTRUSIVE = "Non-Intrusive Data Shows Cavity Extraction" # Solar SOLAR_ELIGIBLE = "Solar Eligible" - SOLAR_ELIGIBLE_SOLID_WALL_UNINSULATED = "Solar Eligible, Solid Wall Uninsulated, EPC E or Below" + SOLAR_ELIGIBLE_SOLID_WALL_UNINSULATED = ( + "Solar Eligible, Solid Wall Uninsulated, EPC E or Below" + ) SOLAR_ELIGIBLE_NEEDS_HEATING_UPGRADE = "Solar Eligible, Needs Heating Upgrade" CRM_HISTORICAL_CAVITY_PRODUCT = { - "id": 156989182176, "unit_price": 0, "name": "Historical ECO Cavity" + "id": 156989182176, + "unit_price": 0, + "name": "Historical ECO Cavity", } CRM_PRODUCTS = { - "Empty Cavity - ECO4": {"id": 82733738177, "unit_price": 1000, "name": "Empty Cavity - ECO4"}, - "Extract & Fill - ECO4": {"id": 100307905778, "unit_price": 500, "name": "Extract & Fill - ECO4"}, - "Solar PV - ECO4": {"id": 82623589564, "unit_price": 1608, "name": "Solar PV - ECO4"}, - "Solar PV + HHRSH - ECO4": {"id": 155529972924, "unit_price": 1608, "name": "Solar PV + HHRSH - ECO4"}, - "Solar PV + Heating Upgrade - ECO4": { - "id": 109265426665, "unit_price": 1608, "name": "Solar PV + Heating Upgrade - ECO4" + "Empty Cavity - ECO4": { + "id": 82733738177, + "unit_price": 1000, + "name": "Empty Cavity - ECO4", }, - "Historical ECO Cavity": CRM_HISTORICAL_CAVITY_PRODUCT + "Extract & Fill - ECO4": { + "id": 100307905778, + "unit_price": 500, + "name": "Extract & Fill - ECO4", + }, + "Solar PV - ECO4": { + "id": 82623589564, + "unit_price": 1608, + "name": "Solar PV - ECO4", + }, + "Solar PV + HHRSH - ECO4": { + "id": 155529972924, + "unit_price": 1608, + "name": "Solar PV + HHRSH - ECO4", + }, + "Solar PV + Heating Upgrade - ECO4": { + "id": 109265426665, + "unit_price": 1608, + "name": "Solar PV + Heating Upgrade - ECO4", + }, + "Historical ECO Cavity": CRM_HISTORICAL_CAVITY_PRODUCT, } def __init__( @@ -427,13 +503,15 @@ class AssetList: landlord_sap=None, landlord_block_reference=None, phase=False, - header=0 + header=0, ): self.local_filepath = local_filepath self.sheet_name = sheet_name # Read in the data if local_filepath.endswith(".xlsx"): - self.raw_asset_list = pd.read_excel(local_filepath, header=header, sheet_name=sheet_name) + self.raw_asset_list = pd.read_excel( + local_filepath, header=header, sheet_name=sheet_name + ) else: self.raw_asset_list = pd.read_csv(local_filepath) self.standardised_asset_list = self.raw_asset_list.copy() @@ -459,21 +537,31 @@ class AssetList: self.phase = phase # We detect the presence of the non-intrusive columns - self.non_intrusives_present = "CIGA Check Required" in self.raw_asset_list.columns + self.non_intrusives_present = ( + "CIGA Check Required" in self.raw_asset_list.columns + ) # We detect if we have the old format of non-intruvies - self.old_format_non_intrusives_present = "WFT Findings" in self.raw_asset_list.columns + self.old_format_non_intrusives_present = ( + "WFT Findings" in self.raw_asset_list.columns + ) if self.old_format_non_intrusives_present: self.non_intrusives_present = False - self.non_intrusives_eligibility = "Eligibility (Red/Yellow/Green)" in self.raw_asset_list.columns + self.non_intrusives_eligibility = ( + "Eligibility (Red/Yellow/Green)" in self.raw_asset_list.columns + ) self.new_format_non_insturives_present = ( "Has the property been re-walled?" in self.raw_asset_list.columns ) - self.new_format_non_insturives_present_v2 = 'TILE HUNG' in self.raw_asset_list.columns + self.new_format_non_insturives_present_v2 = ( + "TILE HUNG" in self.raw_asset_list.columns + ) - self.solar_non_intrusives_present = "AREA (m²) OF ROOF WHERE PV WILL BE SITUATED" in self.raw_asset_list.columns + self.solar_non_intrusives_present = ( + "AREA (m²) OF ROOF WHERE PV WILL BE SITUATED" in self.raw_asset_list.columns + ) # Names of columns self.landlord_property_id = landlord_property_id @@ -500,7 +588,7 @@ class AssetList: "property_type": None, "wall_construction": None, "heating_system": None, - "existing_pv": None + "existing_pv": None, } self.variable_mappings = {} @@ -510,8 +598,12 @@ class AssetList: self.keep_variables = [] # Finally, we handle the case where the landlord's property ID is actually the OS UPRN - if (self.landlord_uprn == self.landlord_property_id) and (self.landlord_property_id is not None): - self.standardised_asset_list[self.STANDARD_UPRN] = self.standardised_asset_list[self.landlord_uprn].copy() + if (self.landlord_uprn == self.landlord_property_id) and ( + self.landlord_property_id is not None + ): + self.standardised_asset_list[self.STANDARD_UPRN] = ( + self.standardised_asset_list[self.landlord_uprn].copy() + ) # Update the reference to landlord UPRn self.landlord_uprn = self.STANDARD_UPRN @@ -558,41 +650,63 @@ class AssetList: self.prefixes_to_products = { # Empty self.EMPTY_CAVITY_NON_INTRUSIVE: self.CRM_PRODUCTS["Empty Cavity - ECO4"], - self.EPC_EMPTY_INSPECTIONS_RETRO_DRILLED: self.CRM_PRODUCTS["Empty Cavity - ECO4"], + self.EPC_EMPTY_INSPECTIONS_RETRO_DRILLED: self.CRM_PRODUCTS[ + "Empty Cavity - ECO4" + ], self.EPC_EMPTY_INSPECTIONS_FILLED: self.CRM_PRODUCTS["Empty Cavity - ECO4"], - self.EPC_EMPTY_INSPECTIONS_FILLED_AT_BUILD: self.CRM_PRODUCTS["Empty Cavity - ECO4"], - self.EPC_EMPTY_INSPECTIONS_NON_CAVITY: self.CRM_PRODUCTS["Empty Cavity - ECO4"], + self.EPC_EMPTY_INSPECTIONS_FILLED_AT_BUILD: self.CRM_PRODUCTS[ + "Empty Cavity - ECO4" + ], + self.EPC_EMPTY_INSPECTIONS_NON_CAVITY: self.CRM_PRODUCTS[ + "Empty Cavity - ECO4" + ], self.EPC_EMPTY: self.CRM_PRODUCTS["Empty Cavity - ECO4"], - self.LANDLORD_EMPTY_INSPECTIONS_OTHER: self.CRM_PRODUCTS["Empty Cavity - ECO4"], + self.LANDLORD_EMPTY_INSPECTIONS_OTHER: self.CRM_PRODUCTS[ + "Empty Cavity - ECO4" + ], # Extraction self.EXTRACTION_NON_INTRUSIVE: self.CRM_PRODUCTS["Extract & Fill - ECO4"], # Solar self.SOLAR_ELIGIBLE: self.CRM_PRODUCTS["Solar PV - ECO4"], - self.SOLAR_ELIGIBLE_SOLID_WALL_UNINSULATED: self.CRM_PRODUCTS["Solar PV - ECO4"], - self.SOLAR_ELIGIBLE_NEEDS_HEATING_UPGRADE: self.CRM_PRODUCTS["Solar PV + Heating Upgrade - ECO4"], + self.SOLAR_ELIGIBLE_SOLID_WALL_UNINSULATED: self.CRM_PRODUCTS[ + "Solar PV - ECO4" + ], + self.SOLAR_ELIGIBLE_NEEDS_HEATING_UPGRADE: self.CRM_PRODUCTS[ + "Solar PV + Heating Upgrade - ECO4" + ], } - def _extract_address1(self, asset_list, full_address_col, postcode_col, method="first_two_words"): + def _extract_address1( + self, asset_list, full_address_col, postcode_col, method="first_two_words" + ): if method not in self.ADDRESS_1_CLEANING_METHODS: raise ValueError(f"Method {method} for producing address1 not recognized") if method == "first_two_words": - asset_list[self.address1_colname] = asset_list[full_address_col].str.split(" ").str[:2].str.join(" ") + asset_list[self.address1_colname] = ( + asset_list[full_address_col].str.split(" ").str[:2].str.join(" ") + ) return asset_list if method == "first_word": - asset_list[self.address1_colname] = asset_list[full_address_col].str.split(" ").str[0] + asset_list[self.address1_colname] = ( + asset_list[full_address_col].str.split(" ").str[0] + ) return asset_list if method == "house_number_extraction": asset_list[self.address1_colname] = asset_list.apply( - lambda x: SearchEpc.get_house_number(address=x[full_address_col], postcode=x[postcode_col]), - axis=1 + lambda x: SearchEpc.get_house_number( + address=x[full_address_col], postcode=x[postcode_col] + ), + axis=1, ) for _, x in asset_list.iterrows(): - SearchEpc.get_house_number(address=x[full_address_col], postcode=x[postcode_col]) + SearchEpc.get_house_number( + address=x[full_address_col], postcode=x[postcode_col] + ) return asset_list raise ValueError(f"Method {method} not recognized") @@ -622,9 +736,16 @@ class AssetList: # Apply transformation self.standardised_asset_list[self.DOMNA_PROPERTY_ID] = ( - self.standardised_asset_list[self.full_address_colname] + - self.standardised_asset_list[self.postcode_colname] - ).str.strip().str.replace(r"[^\w\s]", "", regex=True).str.replace(" ", "").str.lower().apply(_make_hash) + ( + self.standardised_asset_list[self.full_address_colname] + + self.standardised_asset_list[self.postcode_colname] + ) + .str.strip() + .str.replace(r"[^\w\s]", "", regex=True) + .str.replace(" ", "") + .str.lower() + .apply(_make_hash) + ) @staticmethod def _strip_postcode_from_full_address(full_address, postcode): @@ -666,9 +787,7 @@ class AssetList: postcode = postcode.replace(" ", " ") if " " not in postcode: # Restructure it - return " ".join( - [postcode[:-3], postcode[-3:]] - ) + return " ".join([postcode[:-3], postcode[-3:]]) return postcode @@ -680,52 +799,72 @@ class AssetList: # Remove rows without a postcode if self.postcode_colname is not None: - self.standardised_asset_list = self.standardised_asset_list.dropna(subset=[self.postcode_colname]) + self.standardised_asset_list = self.standardised_asset_list.dropna( + subset=[self.postcode_colname] + ) # We also clean postcode columns where if there is not space, we create one - self.standardised_asset_list[self.postcode_colname] = self.standardised_asset_list[ - self.postcode_colname - ].apply(self._clean_postcode) + self.standardised_asset_list[self.postcode_colname] = ( + self.standardised_asset_list[self.postcode_colname].apply( + self._clean_postcode + ) + ) # We clean up portential non-breaking spaces, and double spaces for col in [ - c for c in [self.postcode_colname, self.full_address_colname, self.address1_colname] if - c is not None + c + for c in [ + self.postcode_colname, + self.full_address_colname, + self.address1_colname, + ] + if c is not None ]: - self.standardised_asset_list[col] = self.standardised_asset_list[col].astype(str) - self.standardised_asset_list[col] = self.standardised_asset_list[col].str.replace('\xa0', ' ', regex=False) - self.standardised_asset_list[col] = self.standardised_asset_list[col].str.replace(' ', ' ', regex=False) + self.standardised_asset_list[col] = self.standardised_asset_list[ + col + ].astype(str) + self.standardised_asset_list[col] = self.standardised_asset_list[ + col + ].str.replace("\xa0", " ", regex=False) + self.standardised_asset_list[col] = self.standardised_asset_list[ + col + ].str.replace(" ", " ", regex=False) if self.address1_colname is None: if self.address1_extraction_method is None: - raise ValueError("Missing address 1 - please specify an extraction method") + raise ValueError( + "Missing address 1 - please specify an extraction method" + ) self.address1_colname = self.STANDARD_ADDRESS_1 # If we do not have this, we produce it self.standardised_asset_list = self._extract_address1( asset_list=self.standardised_asset_list, full_address_col=self.full_address_colname, postcode_col=self.postcode_colname, - method=self.address1_extraction_method + method=self.address1_extraction_method, ) if self.full_address_colname is None: if not self.full_address_cols_to_concat: - raise ValueError("Missing full address - please specify columns to concatenate") + raise ValueError( + "Missing full address - please specify columns to concatenate" + ) self.full_address_colname = self.STANDARD_FULL_ADDRESS self.standardised_asset_list[self.full_address_colname] = ( self.standardised_asset_list[self.full_address_cols_to_concat].apply( - lambda x: ", ".join([y for y in x if not pd.isnull(y)]), - axis=1 + lambda x: ", ".join([y for y in x if not pd.isnull(y)]), axis=1 ) ) else: # Make sure to strip the postcode out of the full address - self.standardised_asset_list[self.full_address_colname] = self.standardised_asset_list.apply( - lambda x: self._strip_postcode_from_full_address( - full_address=x[self.full_address_colname], - postcode=x[self.postcode_colname] - ), - axis=1 + self.standardised_asset_list[self.full_address_colname] = ( + self.standardised_asset_list.apply( + lambda x: self._strip_postcode_from_full_address( + full_address=x[self.full_address_colname], + postcode=x[self.postcode_colname], + ), + axis=1, + ) ) # We create the domna property id @@ -734,7 +873,9 @@ class AssetList: # Clean up the UPRN column, if the landlord has provided them if self.landlord_uprn is not None: self.standardised_asset_list[self.landlord_uprn] = ( - self.standardised_asset_list[self.landlord_uprn].apply(self._convert_uprn) + self.standardised_asset_list[self.landlord_uprn].apply( + self._convert_uprn + ) ) # We keep just the columns we care about and will work through the various columns and standardise @@ -771,12 +912,15 @@ class AssetList: self.landlord_heating_system: self.STANDARD_HEATING_SYSTEM, self.landlord_existing_pv: self.STANDARD_EXISTING_PV, self.landlord_sap: self.STANDARD_SAP, - self.landlord_block_reference: self.STANDARD_BLOCK_REFERENCE + self.landlord_block_reference: self.STANDARD_BLOCK_REFERENCE, } self.rename_map = {k: v for k, v in self.rename_map.items() if k is not None} non_intrusive_columns = [] - if self.non_intrusives_present and not self.new_format_non_insturives_present_v2: + if ( + self.non_intrusives_present + and not self.new_format_non_insturives_present_v2 + ): non_intrusive_columns = self.NON_INTRUSIVES_COLNAMES if self.non_intrusives_eligibility: @@ -794,7 +938,9 @@ class AssetList: if self.old_format_non_intrusives_present: # We check if we have the ECO Eligibility column, which we might not have non_intrusive_columns = [ - c for c in self.OLD_FORMAT_NON_INTRUSIVE_COLNAMES if c in self.standardised_asset_list.columns + c + for c in self.OLD_FORMAT_NON_INTRUSIVE_COLNAMES + if c in self.standardised_asset_list.columns ] if "Warmfront Finding" in self.standardised_asset_list.columns: @@ -805,8 +951,11 @@ class AssetList: self.rename_map = { **self.rename_map, **dict( - zip(non_intrusive_columns, ["non-intrusives: " + c for c in non_intrusive_columns]) - ) + zip( + non_intrusive_columns, + ["non-intrusives: " + c for c in non_intrusive_columns], + ) + ), } # We idenfiy addresses which are likely to be multi-addresses (i.g are rooms x-y) @@ -818,11 +967,12 @@ class AssetList: # we see instances of "average thermal transmittance" in the description if self.landlord_wall_construction is not None: self.standardised_asset_list[self.landlord_wall_construction] = np.where( - self.standardised_asset_list[self.landlord_wall_construction].str.lower().str.contains( - "average thermal transmittance" - ) == True, - "new build - average thermal transmittance", self.standardised_asset_list[self.landlord_wall_construction] + .str.lower() + .str.contains("average thermal transmittance") + == True, + "new build - average thermal transmittance", + self.standardised_asset_list[self.landlord_wall_construction], ) else: # We want to make sure that we have a column for wall construction @@ -837,15 +987,21 @@ class AssetList: # We attempt to process the year built column if self.landlord_year_built is not None: # We check if we have a datetime - year built has not been renamed - if isinstance(self.standardised_asset_list[self.landlord_year_built].iloc[0], datetime): + if isinstance( + self.standardised_asset_list[self.landlord_year_built].iloc[0], datetime + ): # We treat any string columns - with common values we see self.standardised_asset_list[self.landlord_year_built] = ( - self.standardised_asset_list[self.landlord_year_built].replace(self.DATETIME_REMAP) + self.standardised_asset_list[self.landlord_year_built].replace( + self.DATETIME_REMAP + ) ) no_data_codes = {"No Data": None} self.standardised_asset_list[self.landlord_year_built] = ( - self.standardised_asset_list[self.landlord_year_built].replace(no_data_codes) + self.standardised_asset_list[self.landlord_year_built].replace( + no_data_codes + ) ) self.standardised_asset_list[self.landlord_year_built] = pd.to_datetime( @@ -866,7 +1022,7 @@ class AssetList: "UNKNOWN", "This cell has an external reference that can't be shown or edited. Editing this cell will " "remove the external reference.", - 0 + 0, } if pd.isnull(date_str) or date_str in known_errors: @@ -889,7 +1045,9 @@ class AssetList: return int(match.group(1)) # Find all 4-digit years in string - years = [int(y) for y in re.findall(r"\b(?:19|20)\d{2}\b", date_str)] + years = [ + int(y) for y in re.findall(r"\b(?:19|20)\d{2}\b", date_str) + ] if years: return max(years) # Return most recent year @@ -898,38 +1056,42 @@ class AssetList: if len(numeric_str) == 4 and numeric_str.isdigit(): return int(numeric_str) - raise NotImplementedError(f"Unhandled format for year built, value is {date_str} - implement me") + raise NotImplementedError( + f"Unhandled format for year built, value is {date_str} - implement me" + ) - self.standardised_asset_list[self.landlord_year_built] = self.standardised_asset_list[ - self.landlord_year_built - ].apply(extract_year) + self.standardised_asset_list[self.landlord_year_built] = ( + self.standardised_asset_list[self.landlord_year_built].apply( + extract_year + ) + ) # We now create standard lookups to_remap = { self.landlord_property_type: { "standard_values": property_type_mappings.STANDARD_PROPERTY_TYPES, - "standard_map": property_type_mappings.PROPERTY_MAPPING + "standard_map": property_type_mappings.PROPERTY_MAPPING, }, self.landlord_built_form: { "standard_values": built_form_mappings.STANDARD_BUILT_FORMS, - "standard_map": built_form_mappings.BUILT_FORM_MAPPINGS + "standard_map": built_form_mappings.BUILT_FORM_MAPPINGS, }, self.landlord_wall_construction: { "standard_values": walls_mappings.STANDARD_WALL_CONSTRUCTIONS, - "standard_map": walls_mappings.WALL_CONSTRUCTION_MAPPINGS + "standard_map": walls_mappings.WALL_CONSTRUCTION_MAPPINGS, }, self.landlord_heating_system: { "standard_values": heating_mappings.STANDARD_HEATING_SYSTEMS, - "standard_map": heating_mappings.HEATING_MAPPINGS + "standard_map": heating_mappings.HEATING_MAPPINGS, }, self.landlord_existing_pv: { "standard_values": existing_pv_mappings.STANDARD_EXISTING_PV, - "standard_map": existing_pv_mappings.EXISTING_PV_MAPPINGS + "standard_map": existing_pv_mappings.EXISTING_PV_MAPPINGS, }, self.landlord_roof_construction: { "standard_values": roof_mappings.STANDARD_ROOF_CONSTRUCTIONS, - "standard_map": roof_mappings.ROOF_CONSTRUCTION_MAPPINGS - } + "standard_map": roof_mappings.ROOF_CONSTRUCTION_MAPPINGS, + }, } # Keep just entries where the key is not None to_remap = {k: v for k, v in to_remap.items() if k is not None} @@ -937,11 +1099,18 @@ class AssetList: for variable, config in to_remap.items(): logger.info("Standardising variable: %s", variable) # Strip each of these columns - self.standardised_asset_list[variable] = self.standardised_asset_list[variable].str.strip() + self.standardised_asset_list[variable] = self.standardised_asset_list[ + variable + ].str.strip() values_to_remap = self.standardised_asset_list[variable].unique() # We want to map this to our standardised list of property types we're interested in - remapper = DataRemapper(standard_values=config["standard_values"], standard_map=config["standard_map"]) - remap_dictionary = remapper.standardize_list(values_to_remap=values_to_remap.tolist()) + remapper = DataRemapper( + standard_values=config["standard_values"], + standard_map=config["standard_map"], + ) + remap_dictionary = remapper.standardize_list( + values_to_remap=values_to_remap.tolist() + ) self.variable_mappings[variable] = remap_dictionary # We now print out the variable mappings, which can be reviewed by the user, before the final standardised @@ -963,9 +1132,12 @@ class AssetList: if self.phase: # We filter on just the properties that have had an inspection - if self.new_format_non_insturives_present_v2 or self.solar_non_intrusives_present: + if ( + self.new_format_non_insturives_present_v2 + or self.solar_non_intrusives_present + ): self.standardised_asset_list = self.standardised_asset_list[ - ~self.standardised_asset_list['NAME OF SURVEYOR'].isin( + ~self.standardised_asset_list["NAME OF SURVEYOR"].isin( ["YET TO BE SURVEYED", "", None] ) ] @@ -974,7 +1146,9 @@ class AssetList: ] else: self.standardised_asset_list = self.standardised_asset_list[ - ~self.standardised_asset_list['Surveyors Name'].isin(["YET TO BE SURVEYED"]) + ~self.standardised_asset_list["Surveyors Name"].isin( + ["YET TO BE SURVEYED"] + ) ] if not self.variable_mappings and not override_empty_mappings: @@ -986,7 +1160,9 @@ class AssetList: self.standardised_asset_list[variable + "_original_from_landlord"] = ( self.standardised_asset_list[variable].copy() ) - self.standardised_asset_list[variable] = self.standardised_asset_list[variable].map(mapping) + self.standardised_asset_list[variable] = self.standardised_asset_list[ + variable + ].map(mapping) if self.standardised_asset_list[self.DOMNA_PROPERTY_ID].duplicated().sum(): # Drop the dupes @@ -998,13 +1174,28 @@ class AssetList: # Keep a record of duplicates self.duplicated_addresses = self.standardised_asset_list[ self.standardised_asset_list[self.DOMNA_PROPERTY_ID].duplicated() - ][[self.DOMNA_PROPERTY_ID, self.full_address_colname, self.address1_colname, self.postcode_colname]].copy() + ][ + [ + self.DOMNA_PROPERTY_ID, + self.full_address_colname, + self.address1_colname, + self.postcode_colname, + ] + ].copy() df = self.standardised_asset_list[ self.standardised_asset_list[self.DOMNA_PROPERTY_ID].isin( - self.duplicated_addresses[self.DOMNA_PROPERTY_ID]) - ][[self.landlord_property_id, self.DOMNA_PROPERTY_ID, self.full_address_colname, self.address1_colname, - self.postcode_colname]].copy() + self.duplicated_addresses[self.DOMNA_PROPERTY_ID] + ) + ][ + [ + self.landlord_property_id, + self.DOMNA_PROPERTY_ID, + self.full_address_colname, + self.address1_colname, + self.postcode_colname, + ] + ].copy() df = df.sort_values(by=[self.DOMNA_PROPERTY_ID]) @@ -1020,13 +1211,14 @@ class AssetList: k + "_original_from_landlord" for k in self.variable_mappings.keys() ] - self.standardised_asset_list = self.standardised_asset_list[self.keep_variables].rename( - columns=self.rename_map - ) + self.standardised_asset_list = self.standardised_asset_list[ + self.keep_variables + ].rename(columns=self.rename_map) # We fill any standard columns that are not in the data because they were not provided by the landlord missing_variables = [ - v for v in [ + v + for v in [ self.STANDARD_EXISTING_PV, self.STANDARD_HEATING_SYSTEM, self.STANDARD_UPRN, @@ -1035,7 +1227,8 @@ class AssetList: self.STANDARD_WALL_CONSTRUCTION, self.STANDARD_HEATING_SYSTEM, self.STANDARD_BLOCK_REFERENCE, - ] if v not in self.standardised_asset_list.columns + ] + if v not in self.standardised_asset_list.columns ] for v in missing_variables: self.standardised_asset_list[v] = None @@ -1050,13 +1243,13 @@ class AssetList: self.standardised_asset_list[self.STANDARD_SAP] = ( self.standardised_asset_list[self.STANDARD_SAP] .astype(str) - .str.replace('\xa0', ' ', regex=False) + .str.replace("\xa0", " ", regex=False) .str.strip() ) self.standardised_asset_list[self.STANDARD_SAP] = np.where( self.standardised_asset_list[self.STANDARD_SAP] == "", None, - self.standardised_asset_list[self.STANDARD_SAP] + self.standardised_asset_list[self.STANDARD_SAP], ) self.standardised_asset_list[self.STANDARD_SAP] = ( self.standardised_asset_list[self.STANDARD_SAP].astype(float) @@ -1065,10 +1258,13 @@ class AssetList: self.standardised_asset_list[self.STANDARD_SAP] = np.where( self.standardised_asset_list[self.STANDARD_SAP] == 0, None, - self.standardised_asset_list[self.STANDARD_SAP] + self.standardised_asset_list[self.STANDARD_SAP], ) - has_blocks_of_flats = (self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "block of flats").sum() + has_blocks_of_flats = ( + self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] + == "block of flats" + ).sum() # Perform block splitting, ahead of fetching the EPC data # If we blocks of flats, without a landlord block reference, we create this @@ -1083,13 +1279,12 @@ class AssetList: :return: """ if self.DOMNA_PROPERTY_ID not in df.columns: - raise ValueError(f"Dataframe must contain the column {self.DOMNA_PROPERTY_ID}") + raise ValueError( + f"Dataframe must contain the column {self.DOMNA_PROPERTY_ID}" + ) if df[self.DOMNA_PROPERTY_ID].duplicated().sum(): - df = df.drop_duplicates( - subset=[self.DOMNA_PROPERTY_ID], - keep="first" - ) + df = df.drop_duplicates(subset=[self.DOMNA_PROPERTY_ID], keep="first") self.standardised_asset_list = self.standardised_asset_list.merge( df, how="left", on=self.DOMNA_PROPERTY_ID @@ -1098,9 +1293,14 @@ class AssetList: def extract_attributes(self, pull_epc=True): # Used to extracty the typical attributes that we use to identify viable work - self.standardised_asset_list[self.ATTRIBUTE_HAS_SOLAR] = ( - self.standardised_asset_list[self.FIND_EPC_DATA_NAMES["Solar photovoltaics"]] | - ~self.standardised_asset_list[self.EPC_API_DATA_NAMES["photo-supply"]].isin(["0.0", 0, None, "", np.nan]) + self.standardised_asset_list[ + self.ATTRIBUTE_HAS_SOLAR + ] = self.standardised_asset_list[ + self.FIND_EPC_DATA_NAMES["Solar photovoltaics"] + ] | ~self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["photo-supply"] + ].isin( + ["0.0", 0, None, "", np.nan] ) accepted_epc_property_types = ["House", "Flat", "Bungalow", "Maisonette"] @@ -1109,83 +1309,127 @@ class AssetList: # 1) Take the property type provided by the HA themselves # 2) In absence of that, take the EPC property type # 3) Otherwise use None - self.standardised_asset_list[self.ATTRIBUTE_NUMBER_OF_FLOORS] = self.standardised_asset_list.apply( - lambda x: estimate_number_of_floors( - property_type=( - str(x[self.STANDARD_PROPERTY_TYPE]).title() if - str(x[self.STANDARD_PROPERTY_TYPE]).title() in accepted_epc_property_types else ( - x[self.EPC_API_DATA_NAMES["property-type"]] if not - pd.isnull(x[self.EPC_API_DATA_NAMES["property-type"]]) else None + self.standardised_asset_list[self.ATTRIBUTE_NUMBER_OF_FLOORS] = ( + self.standardised_asset_list.apply( + lambda x: estimate_number_of_floors( + property_type=( + str(x[self.STANDARD_PROPERTY_TYPE]).title() + if str(x[self.STANDARD_PROPERTY_TYPE]).title() + in accepted_epc_property_types + else ( + x[self.EPC_API_DATA_NAMES["property-type"]] + if not pd.isnull( + x[self.EPC_API_DATA_NAMES["property-type"]] + ) + else None + ) ) - ) - ), - axis=1 + ), + axis=1, + ) ) self.standardised_asset_list[self.EPC_API_DATA_NAMES["total-floor-area"]] = ( - self.standardised_asset_list[self.EPC_API_DATA_NAMES["total-floor-area"]].astype(float) + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["total-floor-area"] + ].astype(float) ) # Replace "" value with None - self.standardised_asset_list[self.EPC_API_DATA_NAMES["number-habitable-rooms"]] = ( - self.standardised_asset_list[self.EPC_API_DATA_NAMES["number-habitable-rooms"]].replace("", None) + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["number-habitable-rooms"] + ] = self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["number-habitable-rooms"] + ].replace( + "", None ) - self.standardised_asset_list[self.EPC_API_DATA_NAMES["number-habitable-rooms"]] = ( - self.standardised_asset_list[self.EPC_API_DATA_NAMES["number-habitable-rooms"]].astype(float) + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["number-habitable-rooms"] + ] = self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["number-habitable-rooms"] + ].astype( + float ) # Estimate the perimeter # Handle funky edge case - self.standardised_asset_list[self.EPC_API_DATA_NAMES["total-floor-area"]] = np.where( - (self.standardised_asset_list[self.EPC_API_DATA_NAMES["total-floor-area"]] == 0), - self.standardised_asset_list[self.EPC_API_DATA_NAMES["total-floor-area"]].mean(), - self.standardised_asset_list[self.EPC_API_DATA_NAMES["total-floor-area"]] - ) - - self.standardised_asset_list[self.ATTRIBUTE_ESTIMATED_PERIMETER] = self.standardised_asset_list.apply( - lambda x: estimate_perimeter( - floor_area=x[self.EPC_API_DATA_NAMES["total-floor-area"]] / x[self.ATTRIBUTE_NUMBER_OF_FLOORS], - num_rooms=x[self.EPC_API_DATA_NAMES["number-habitable-rooms"]] / x[self.ATTRIBUTE_NUMBER_OF_FLOORS], - ), axis=1 - ) - - self.standardised_asset_list[self.ATTRIBUTE_HEAT_LOSS_AREA] = self.standardised_asset_list.apply( - lambda x: estimate_external_wall_area( - num_floors=x[self.ATTRIBUTE_NUMBER_OF_FLOORS], - floor_height=( - float(x[self.EPC_API_DATA_NAMES["floor-height"]]) if - not pd.isnull(x[self.EPC_API_DATA_NAMES["floor-height"]]) else 2.5 + self.standardised_asset_list[self.EPC_API_DATA_NAMES["total-floor-area"]] = ( + np.where( + ( + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["total-floor-area"] + ] + == 0 ), - perimeter=x[self.ATTRIBUTE_ESTIMATED_PERIMETER], - built_form=x[self.EPC_API_DATA_NAMES["built-form"]] - ), - axis=1 + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["total-floor-area"] + ].mean(), + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["total-floor-area"] + ], + ) ) - + + self.standardised_asset_list[self.ATTRIBUTE_ESTIMATED_PERIMETER] = ( + self.standardised_asset_list.apply( + lambda x: estimate_perimeter( + floor_area=x[self.EPC_API_DATA_NAMES["total-floor-area"]] + / x[self.ATTRIBUTE_NUMBER_OF_FLOORS], + num_rooms=x[self.EPC_API_DATA_NAMES["number-habitable-rooms"]] + / x[self.ATTRIBUTE_NUMBER_OF_FLOORS], + ), + axis=1, + ) + ) + + self.standardised_asset_list[self.ATTRIBUTE_HEAT_LOSS_AREA] = ( + self.standardised_asset_list.apply( + lambda x: estimate_external_wall_area( + num_floors=x[self.ATTRIBUTE_NUMBER_OF_FLOORS], + floor_height=( + float(x[self.EPC_API_DATA_NAMES["floor-height"]]) + if not pd.isnull(x[self.EPC_API_DATA_NAMES["floor-height"]]) + else 2.5 + ), + perimeter=x[self.ATTRIBUTE_ESTIMATED_PERIMETER], + built_form=x[self.EPC_API_DATA_NAMES["built-form"]], + ), + axis=1, + ) + ) + col = self.EPC_API_DATA_NAMES["roof-description"] - self.standardised_asset_list[self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS] = self.standardised_asset_list.apply( - lambda x: RoofAttributes(description=x[col]).process()[ - "insulation_thickness"] if not pd.isnull( - x[col]) else None, - axis=1 + self.standardised_asset_list[self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS] = ( + self.standardised_asset_list.apply( + lambda x: ( + RoofAttributes(description=x[col]).process()["insulation_thickness"] + if not pd.isnull(x[col]) + else None + ), + axis=1, + ) ) - self.standardised_asset_list[self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS] = ( - self.standardised_asset_list[self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS].str.replace("+", "") + self.standardised_asset_list[ + self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS + ].str.replace("+", "") ) # We produce some additional fields # 1) Is the SAP rating below C75 self.standardised_asset_list[self.ATTRIBUTE_SAP_THRESHOLD_AND_BELOW] = ( - self.standardised_asset_list[self.EPC_API_DATA_NAMES["current-energy-efficiency"]].astype(float) <= - self.FILLED_CAVITY_SAP_THRESHOLD + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["current-energy-efficiency"] + ].astype(float) + <= self.FILLED_CAVITY_SAP_THRESHOLD ) # 2) Flag anything where the EPC is older than 5 years self.standardised_asset_list[self.ATTRIBUTE_EPC_PRE_YEAR_THRESHOLD] = ( pd.to_datetime( self.standardised_asset_list[self.EPC_API_DATA_NAMES["inspection-date"]] - ).dt.year < self.EPC_YEAR_THRESHOLD + ).dt.year + < self.EPC_YEAR_THRESHOLD ) self.process_age_band() @@ -1195,30 +1439,37 @@ class AssetList: for _, x in self.standardised_asset_list.iterrows(): if pd.isnull(x[self.EPC_API_DATA_NAMES["construction-age-band"]]) or ( - x[self.EPC_API_DATA_NAMES["construction-age-band"]] in Definitions.DATA_ANOMALY_MATCHES + x[self.EPC_API_DATA_NAMES["construction-age-band"]] + in Definitions.DATA_ANOMALY_MATCHES ): processed_age_band.append( { self.DOMNA_PROPERTY_ID: x[self.DOMNA_PROPERTY_ID], "epc_year_lower_bound": None, "epc_year_upper_bound": None, - "does_age_band_match_epc_age_band": "No EPC Age Band" + "does_age_band_match_epc_age_band": "No EPC Age Band", } ) continue # We exatract the upper and lower bounds if x[self.EPC_API_DATA_NAMES["construction-age-band"]] in [ - "England and Wales: 2007 onwards", "England and Wales: 2012 onwards" + "England and Wales: 2007 onwards", + "England and Wales: 2012 onwards", ]: - year_lower_bound = 2007 if x[self.EPC_API_DATA_NAMES[ - "construction-age-band"]] == "England and Wales: 2007 onwards" else 2012 + year_lower_bound = ( + 2007 + if x[self.EPC_API_DATA_NAMES["construction-age-band"]] + == "England and Wales: 2007 onwards" + else 2012 + ) if pd.isnull(x[self.STANDARD_YEAR_BUILT]): age_band_matches = "No Year Built From Landlord" else: age_band_matches = ( - "EPC Age Band Matches Year Built" if x[self.STANDARD_YEAR_BUILT] >= year_lower_bound + "EPC Age Band Matches Year Built" + if x[self.STANDARD_YEAR_BUILT] >= year_lower_bound else "EPC Age Band is older than Year Built" ) @@ -1227,18 +1478,22 @@ class AssetList: self.DOMNA_PROPERTY_ID: x[self.DOMNA_PROPERTY_ID], "epc_year_lower_bound": year_lower_bound, "epc_year_upper_bound": None, - "does_age_band_match_epc_age_band": age_band_matches + "does_age_band_match_epc_age_band": age_band_matches, } ) continue - if x[self.EPC_API_DATA_NAMES["construction-age-band"]] == "England and Wales: before 1900": + if ( + x[self.EPC_API_DATA_NAMES["construction-age-band"]] + == "England and Wales: before 1900" + ): if pd.isnull(x[self.STANDARD_YEAR_BUILT]): age_band_matches = "No Year Built From Landlord" else: age_band_matches = ( - "EPC Age Band Matches Year Built" if x[self.STANDARD_YEAR_BUILT] < 1900 + "EPC Age Band Matches Year Built" + if x[self.STANDARD_YEAR_BUILT] < 1900 else "EPC Age Band is newer than Year Built" ) @@ -1247,7 +1502,7 @@ class AssetList: self.DOMNA_PROPERTY_ID: x[self.DOMNA_PROPERTY_ID], "epc_year_lower_bound": None, "epc_year_upper_bound": 1899, - "does_age_band_match_epc_age_band": age_band_matches + "does_age_band_match_epc_age_band": age_band_matches, } ) continue @@ -1258,35 +1513,44 @@ class AssetList: age_band_matches = "No Year Built From Landlord" else: age_band_matches = ( - "EPC Age Band Matches Year Built" if x[self.STANDARD_YEAR_BUILT] == int( - x[self.EPC_API_DATA_NAMES["construction-age-band"]] - ) + "EPC Age Band Matches Year Built" + if x[self.STANDARD_YEAR_BUILT] + == int(x[self.EPC_API_DATA_NAMES["construction-age-band"]]) else "EPC Age Band is different from Year Built" ) processed_age_band.append( { self.DOMNA_PROPERTY_ID: x[self.DOMNA_PROPERTY_ID], - "epc_year_lower_bound": int(x[self.EPC_API_DATA_NAMES["construction-age-band"]]), - "epc_year_upper_bound": int(x[self.EPC_API_DATA_NAMES["construction-age-band"]]), - "does_age_band_match_epc_age_band": age_band_matches + "epc_year_lower_bound": int( + x[self.EPC_API_DATA_NAMES["construction-age-band"]] + ), + "epc_year_upper_bound": int( + x[self.EPC_API_DATA_NAMES["construction-age-band"]] + ), + "does_age_band_match_epc_age_band": age_band_matches, } ) continue # Oherwise, we extract the upper and lower bounds - age_band = x[self.EPC_API_DATA_NAMES["construction-age-band"]].split(": ")[1] + age_band = x[self.EPC_API_DATA_NAMES["construction-age-band"]].split(": ")[ + 1 + ] lower_date, upper_date = age_band.split("-") if not x[self.STANDARD_YEAR_BUILT]: age_band_matches = "No Year Built From Landlord" else: age_band_matches = ( - "EPC Age Band Matches Year Built" if (x[self.STANDARD_YEAR_BUILT] >= float(lower_date)) and ( - x[self.STANDARD_YEAR_BUILT] <= float(upper_date) + "EPC Age Band Matches Year Built" + if (x[self.STANDARD_YEAR_BUILT] >= float(lower_date)) + and (x[self.STANDARD_YEAR_BUILT] <= float(upper_date)) + else ( + "EPC Age Band is older than Year Built" + if x[self.STANDARD_YEAR_BUILT] > float(upper_date) + else "EPC Age Band is newer than Year Built" ) - else "EPC Age Band is older than Year Built" if x[self.STANDARD_YEAR_BUILT] > float(upper_date) - else "EPC Age Band is newer than Year Built" ) processed_age_band.append( @@ -1294,7 +1558,7 @@ class AssetList: self.DOMNA_PROPERTY_ID: x[self.DOMNA_PROPERTY_ID], "epc_year_lower_bound": int(lower_date), "epc_year_upper_bound": int(upper_date), - "does_age_band_match_epc_age_band": age_band_matches + "does_age_band_match_epc_age_band": age_band_matches, } ) @@ -1310,34 +1574,54 @@ class AssetList: # We add a SAP category for all work type identification self.standardised_asset_list["SAP Category"] = np.where( ( - (self.standardised_asset_list[self.EPC_API_DATA_NAMES["current-energy-efficiency"]] <= 54) | - (self.standardised_asset_list[self.STANDARD_SAP] <= 54) + ( + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["current-energy-efficiency"] + ] + <= 54 + ) + | (self.standardised_asset_list[self.STANDARD_SAP] <= 54) ), "SAP Rating 54 or less", np.where( ( - (self.standardised_asset_list[self.EPC_API_DATA_NAMES["current-energy-efficiency"]] <= 68) | - (self.standardised_asset_list[self.STANDARD_SAP] <= 68) + ( + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["current-energy-efficiency"] + ] + <= 68 + ) + | (self.standardised_asset_list[self.STANDARD_SAP] <= 68) ), "SAP Rating 55-68", np.where( ( ( - self.standardised_asset_list[self.EPC_API_DATA_NAMES["current-energy-efficiency"]] <= - self.EMPTY_CAVITY_SAP_THRESHOLD - ) | (self.standardised_asset_list[self.STANDARD_SAP] <= self.EMPTY_CAVITY_SAP_THRESHOLD) + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["current-energy-efficiency"] + ] + <= self.EMPTY_CAVITY_SAP_THRESHOLD + ) + | ( + self.standardised_asset_list[self.STANDARD_SAP] + <= self.EMPTY_CAVITY_SAP_THRESHOLD + ) ), f"SAP Rating 69-{self.EMPTY_CAVITY_SAP_THRESHOLD}", - f"SAP Rating {self.EMPTY_CAVITY_SAP_THRESHOLD + 1} or more" + f"SAP Rating {self.EMPTY_CAVITY_SAP_THRESHOLD + 1} or more", ), - ) + ), ) self.standardised_asset_list["SAP Category"] = np.where( - pd.isnull(self.standardised_asset_list[self.STANDARD_SAP]) & - pd.isnull(self.standardised_asset_list[self.EPC_API_DATA_NAMES["current-energy-efficiency"]]), + pd.isnull(self.standardised_asset_list[self.STANDARD_SAP]) + & pd.isnull( + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["current-energy-efficiency"] + ] + ), "SAP Unknown", - self.standardised_asset_list["SAP Category"] + self.standardised_asset_list["SAP Category"], ) else: @@ -1345,55 +1629,81 @@ class AssetList: # We break into 4 categories (54 or less, 55-68, 69-74, 75 or more) self.standardised_asset_list["SAP Category"] = np.where( - (self.standardised_asset_list[self.EPC_API_DATA_NAMES["current-energy-efficiency"]] <= 54), + ( + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["current-energy-efficiency"] + ] + <= 54 + ), "SAP Rating 54 or less", np.where( - (self.standardised_asset_list[self.EPC_API_DATA_NAMES["current-energy-efficiency"]] <= 68), + ( + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["current-energy-efficiency"] + ] + <= 68 + ), "SAP Rating 55-68", np.where( ( - self.standardised_asset_list[self.EPC_API_DATA_NAMES["current-energy-efficiency"]] <= - self.EMPTY_CAVITY_SAP_THRESHOLD + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["current-energy-efficiency"] + ] + <= self.EMPTY_CAVITY_SAP_THRESHOLD ), f"SAP Rating 69-{self.EMPTY_CAVITY_SAP_THRESHOLD}", - f"SAP Rating {self.EMPTY_CAVITY_SAP_THRESHOLD + 1} or more" + f"SAP Rating {self.EMPTY_CAVITY_SAP_THRESHOLD + 1} or more", ), - ) + ), ) self.standardised_asset_list["SAP Category"] = np.where( - pd.isnull(self.standardised_asset_list[self.EPC_API_DATA_NAMES["current-energy-efficiency"]]), + pd.isnull( + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["current-energy-efficiency"] + ] + ), "SAP Unknown", - self.standardised_asset_list["SAP Category"] + self.standardised_asset_list["SAP Category"], ) # Before we being, we identify if a property has solar already as we use this # for identifying cavity jobs if self.non_intrusives_present and not self.old_format_non_intrusives_present: - if self.new_format_non_insturives_present_v2 or self.solar_non_intrusives_present: + if ( + self.new_format_non_insturives_present_v2 + or self.solar_non_intrusives_present + ): existing_solar_non_intrusives_check = ( - self.standardised_asset_list["non-intrusives: ROOF ORIENTATION"].str.strip().isin( - ["ALREADY HAS SOLAR PV", "ALREADY HAS PV"] - ) + self.standardised_asset_list["non-intrusives: ROOF ORIENTATION"] + .str.strip() + .isin(["ALREADY HAS SOLAR PV", "ALREADY HAS PV"]) ) else: existing_solar_non_intrusives_check = ( - self.standardised_asset_list["non-intrusives: PV, ACCESS ISSUE, SEE NOTES"] == "SOLAR PV ON ROOF" + self.standardised_asset_list[ + "non-intrusives: PV, ACCESS ISSUE, SEE NOTES" + ] + == "SOLAR PV ON ROOF" ) elif self.old_format_non_intrusives_present: existing_solar_non_intrusives_check = ( - self.standardised_asset_list["non-intrusives: WFT Findings"].str.lower().str.strip().isin( - ["solar pv on roof"] - ) + self.standardised_asset_list["non-intrusives: WFT Findings"] + .str.lower() + .str.strip() + .isin(["solar pv on roof"]) ) else: # We don't have an indication existing_solar_non_intrusives_check = False self.standardised_asset_list["property_has_solar"] = ( - (self.standardised_asset_list[self.STANDARD_EXISTING_PV] == "already has PV") | - existing_solar_non_intrusives_check | - (self.standardised_asset_list[self.ATTRIBUTE_HAS_SOLAR]) + ( + self.standardised_asset_list[self.STANDARD_EXISTING_PV] + == "already has PV" + ) + | existing_solar_non_intrusives_check + | (self.standardised_asset_list[self.ATTRIBUTE_HAS_SOLAR]) ) # If we have non-intrusives completed, we can use this to identify work types @@ -1407,38 +1717,60 @@ class AssetList: if self.non_intrusives_present: if self.new_format_non_insturives_present_v2: non_intrusives_wall_filter = ( - (self.standardised_asset_list['non-intrusives: Construction'] == "CAVITY") & - self.standardised_asset_list['non-intrusives: Insulated'].isin(["EMPTY", "PARTIAL", "EMPTY CAVITY"]) + self.standardised_asset_list["non-intrusives: Construction"] + == "CAVITY" + ) & self.standardised_asset_list["non-intrusives: Insulated"].isin( + ["EMPTY", "PARTIAL", "EMPTY CAVITY"] ) else: non_intrusives_wall_filter = ( - (self.standardised_asset_list['non-intrusives: Construction'] == "CAVITY") & - self.standardised_asset_list['non-intrusives: Insulated'].isin(["EMPTY", "PARTIAL"]) + self.standardised_asset_list["non-intrusives: Construction"] + == "CAVITY" + ) & self.standardised_asset_list["non-intrusives: Insulated"].isin( + ["EMPTY", "PARTIAL"] ) elif self.old_format_non_intrusives_present: - non_intrusives_wall_filter = ( - self.standardised_asset_list['non-intrusives: WFT Findings'].str.lower().str.strip().isin( - ["empty cavity", "partial fill", "empty", "EMPTY CAVITY 70MM", "partial", "empty cav"] - ) | ( - ( - self.standardised_asset_list['non-intrusives: WFT Findings'] - .str.lower().str.strip().str.contains("empty cavity|partial fill") & - ~self.standardised_asset_list['non-intrusives: WFT Findings'] - .astype(str).str.lower().str.strip().str.contains("major access issues") - ) - ) - ) + non_intrusives_wall_filter = self.standardised_asset_list[ + "non-intrusives: WFT Findings" + ].str.lower().str.strip().isin( + [ + "empty cavity", + "partial fill", + "empty", + "EMPTY CAVITY 70MM", + "partial", + "empty cav", + ] + ) | ( + ( + self.standardised_asset_list["non-intrusives: WFT Findings"] + .str.lower() + .str.strip() + .str.contains("empty cavity|partial fill") + & ~self.standardised_asset_list["non-intrusives: WFT Findings"] + .astype(str) + .str.lower() + .str.strip() + .str.contains("major access issues") + ) + ) else: # We set the filter to False, as we have no non-intrusives non_intrusives_wall_filter = False if self.landlord_year_built is None: - year_built_filter = self.standardised_asset_list["epc_year_upper_bound"] <= self.EMPTY_CAVITY_YEAR_THRESHOLD + year_built_filter = ( + self.standardised_asset_list["epc_year_upper_bound"] + <= self.EMPTY_CAVITY_YEAR_THRESHOLD + ) else: year_built_filter = ( - (self.standardised_asset_list[self.STANDARD_YEAR_BUILT] <= self.EMPTY_CAVITY_YEAR_THRESHOLD) | - (self.standardised_asset_list["epc_year_upper_bound"] <= self.EMPTY_CAVITY_YEAR_THRESHOLD) - ) + self.standardised_asset_list[self.STANDARD_YEAR_BUILT] + <= self.EMPTY_CAVITY_YEAR_THRESHOLD + ) | ( + self.standardised_asset_list["epc_year_upper_bound"] + <= self.EMPTY_CAVITY_YEAR_THRESHOLD + ) # Criteria: # The property isn't a bedsit @@ -1446,74 +1778,118 @@ class AssetList: # The EPC year is before 2002 # We also flag where the property has solar on the roof, because this is a signal of a high EPC rating self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] = ( - (~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"])) & - non_intrusives_wall_filter & - year_built_filter & ( - ~self.standardised_asset_list["property_has_solar"] + ~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin( + ["bedsit"] + ) ) + & non_intrusives_wall_filter + & year_built_filter + & (~self.standardised_asset_list["property_has_solar"]) ) - self.standardised_asset_list["non_intrusive_indicates_empty_cavity_has_solar"] = ( - ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] & - (~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"])) & - non_intrusives_wall_filter & - year_built_filter & - ( + self.standardised_asset_list[ + "non_intrusive_indicates_empty_cavity_has_solar" + ] = ( + ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] + & ( + ~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin( + ["bedsit"] + ) + ) + & non_intrusives_wall_filter + & year_built_filter + & ( # If the property has solar, there's a chance it won't qualify self.standardised_asset_list["property_has_solar"] ) ) # We also add a filter on anything that was generally identified by the non-intrusives - self.standardised_asset_list["non_intrusive_indicates_empty_cavity_no_year_filter"] = ( - ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] & - ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity_has_solar"] & - (~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"])) & - non_intrusives_wall_filter + self.standardised_asset_list[ + "non_intrusive_indicates_empty_cavity_no_year_filter" + ] = ( + ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] + & ~self.standardised_asset_list[ + "non_intrusive_indicates_empty_cavity_has_solar" + ] + & ( + ~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin( + ["bedsit"] + ) + ) + & non_intrusives_wall_filter ) - if (not self.non_intrusives_eligibility) and (not self.old_format_non_intrusives_present): + if (not self.non_intrusives_eligibility) and ( + not self.old_format_non_intrusives_present + ): # If we have NO inspections data, we capture all of the wall types and don't filter on age of the EPC self.standardised_asset_list["epc_indicates_empty_cavity"] = ( - self.standardised_asset_list[self.EPC_API_DATA_NAMES["walls-description"]].str.lower().isin( - self.EPC_NO_WALL_INSULATION_DESCRIPTIONS - ) & ( - self.standardised_asset_list["epc_year_upper_bound"] <= self.EMPTY_CAVITY_YEAR_THRESHOLD - ) & ( - ~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"]) + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["walls-description"] + ] + .str.lower() + .isin(self.EPC_NO_WALL_INSULATION_DESCRIPTIONS) + & ( + self.standardised_asset_list["epc_year_upper_bound"] + <= self.EMPTY_CAVITY_YEAR_THRESHOLD + ) + & ( + ~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin( + ["bedsit"] + ) ) ) else: self.standardised_asset_list["epc_indicates_empty_cavity"] = ( - self.standardised_asset_list[self.EPC_API_DATA_NAMES["walls-description"]].str.lower().isin( - self.EPC_NO_WALL_INSULATION_DESCRIPTIONS - ) & ( - self.standardised_asset_list["epc_year_upper_bound"] <= self.EMPTY_CAVITY_YEAR_THRESHOLD - ) & ( - ~self.standardised_asset_list[self.ATTRIBUTE_EPC_PRE_YEAR_THRESHOLD] - ) & ( - ~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"]) + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["walls-description"] + ] + .str.lower() + .isin(self.EPC_NO_WALL_INSULATION_DESCRIPTIONS) + & ( + self.standardised_asset_list["epc_year_upper_bound"] + <= self.EMPTY_CAVITY_YEAR_THRESHOLD + ) + & (~self.standardised_asset_list[self.ATTRIBUTE_EPC_PRE_YEAR_THRESHOLD]) + & ( + ~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin( + ["bedsit"] + ) ) ) self.standardised_asset_list["landlord_data_indicates_empty_cavity"] = ( - self.standardised_asset_list[self.STANDARD_WALL_CONSTRUCTION].isin(["uninsulated cavity"]) & - ( - (self.standardised_asset_list[self.STANDARD_YEAR_BUILT] <= self.EMPTY_CAVITY_YEAR_THRESHOLD) | - (self.standardised_asset_list["epc_year_upper_bound"] <= self.EMPTY_CAVITY_YEAR_THRESHOLD) - ) & ( - ~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin(["bedsit"]) + self.standardised_asset_list[self.STANDARD_WALL_CONSTRUCTION].isin( + ["uninsulated cavity"] + ) + & ( + ( + self.standardised_asset_list[self.STANDARD_YEAR_BUILT] + <= self.EMPTY_CAVITY_YEAR_THRESHOLD + ) + | ( + self.standardised_asset_list["epc_year_upper_bound"] + <= self.EMPTY_CAVITY_YEAR_THRESHOLD + ) + ) + & ( + ~self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE].isin( + ["bedsit"] + ) ) ) # Finally, we create a flag to indicate that the cavity is empty, based on the criteria above self.standardised_asset_list["cavity_is_empty"] = ( - non_intrusives_wall_filter | - self.standardised_asset_list[self.EPC_API_DATA_NAMES["walls-description"]].str.lower().isin( - self.EPC_NO_WALL_INSULATION_DESCRIPTIONS - ) | - self.standardised_asset_list[self.STANDARD_WALL_CONSTRUCTION].isin(["uninsulated cavity"]) + non_intrusives_wall_filter + | self.standardised_asset_list[self.EPC_API_DATA_NAMES["walls-description"]] + .str.lower() + .isin(self.EPC_NO_WALL_INSULATION_DESCRIPTIONS) + | self.standardised_asset_list[self.STANDARD_WALL_CONSTRUCTION].isin( + ["uninsulated cavity"] + ) ) ###################################################### @@ -1524,127 +1900,211 @@ class AssetList: if self.non_intrusives_present: extraction_wall_filter = ( - (self.standardised_asset_list["non-intrusives: Construction"] == "CAVITY") & - (self.standardised_asset_list["non-intrusives: Insulated"].isin(["RETRO DRILLED", "FILLED AT BUILD"])) & - (~self.standardised_asset_list['non-intrusives: Material'].isin( - ["GREY LOOSE BEAD", "COMPACTED BEAD", "FIBRE BATT NO CAVITY", "EMPTY NARROW BELOW 30mm"] - )) + ( + self.standardised_asset_list["non-intrusives: Construction"] + == "CAVITY" + ) + & ( + self.standardised_asset_list["non-intrusives: Insulated"].isin( + ["RETRO DRILLED", "FILLED AT BUILD"] + ) + ) + & ( + ~self.standardised_asset_list["non-intrusives: Material"].isin( + [ + "GREY LOOSE BEAD", + "COMPACTED BEAD", + "FIBRE BATT NO CAVITY", + "EMPTY NARROW BELOW 30mm", + ] + ) + ) ) if self.non_intrusives_eligibility: # If we have the eligibility column, we check if the wall is eligible extraction_wall_filter = ( - extraction_wall_filter & - ~self.standardised_asset_list["non-intrusives: Eligibility (Red/Yellow/Green)"].isin( - ["RED"] - ) + extraction_wall_filter + & ~self.standardised_asset_list[ + "non-intrusives: Eligibility (Red/Yellow/Green)" + ].isin(["RED"]) ) - self.standardised_asset_list["non_intrusive_indicates_cavity_extraction"] = ( - extraction_wall_filter & year_built_filter - ) - self.standardised_asset_list["non_intrusive_indicates_cavity_extraction_no_year_filter"] = ( - extraction_wall_filter & ~year_built_filter - ) + self.standardised_asset_list[ + "non_intrusive_indicates_cavity_extraction" + ] = (extraction_wall_filter & year_built_filter) + self.standardised_asset_list[ + "non_intrusive_indicates_cavity_extraction_no_year_filter" + ] = (extraction_wall_filter & ~year_built_filter) elif self.old_format_non_intrusives_present: print("Review these categories!!!!") extraction_wall_filter = ( - self.standardised_asset_list['non-intrusives: WFT Findings'].str.lower().str.strip().isin( + self.standardised_asset_list["non-intrusives: WFT Findings"] + .str.lower() + .str.strip() + .isin( [ - 'blown in yellow wool', 'retro drilled & filled', 'white fibre from build', - 'foam filled from build', 'retro drilled gas in block', 'block in rock wool', 'rdf / tilehung', - 'fibre from build', 'blown in rock wool', 'rdf / tile hung', 'retro drilled', - 'rock wool from build', 'part rendered retro drilled', 'white fibtr from build.', - 'retro drilled and filled', 'blown in white wool', 'blown in yellow fibre from build', 'rdf', - 'polybead', 'foam filled', 'blown in white bead from build', 'blown in yellow fibre', - 'retro drilled det', 'blown in rockwool', 'retro drilled det empty cav', 'retro drilled end', - 'retro filled extension', 'retro filled', 'foam' + "blown in yellow wool", + "retro drilled & filled", + "white fibre from build", + "foam filled from build", + "retro drilled gas in block", + "block in rock wool", + "rdf / tilehung", + "fibre from build", + "blown in rock wool", + "rdf / tile hung", + "retro drilled", + "rock wool from build", + "part rendered retro drilled", + "white fibtr from build.", + "retro drilled and filled", + "blown in white wool", + "blown in yellow fibre from build", + "rdf", + "polybead", + "foam filled", + "blown in white bead from build", + "blown in yellow fibre", + "retro drilled det", + "blown in rockwool", + "retro drilled det empty cav", + "retro drilled end", + "retro filled extension", + "retro filled", + "foam", ] ) ) - self.standardised_asset_list["non_intrusive_indicates_cavity_extraction"] = ( - extraction_wall_filter - ) - self.standardised_asset_list["non_intrusive_indicates_cavity_extraction_no_year_filter"] = False + self.standardised_asset_list[ + "non_intrusive_indicates_cavity_extraction" + ] = extraction_wall_filter + self.standardised_asset_list[ + "non_intrusive_indicates_cavity_extraction_no_year_filter" + ] = False else: - self.standardised_asset_list["non_intrusive_indicates_cavity_extraction"] = False - self.standardised_asset_list["non_intrusive_indicates_cavity_extraction_no_year_filter"] = False + self.standardised_asset_list[ + "non_intrusive_indicates_cavity_extraction" + ] = False + self.standardised_asset_list[ + "non_intrusive_indicates_cavity_extraction_no_year_filter" + ] = False ###################################################### # Solar ###################################################### # Criteria: # Check 1: Does the property have a valid heating system? - self.standardised_asset_list["solar_landlord_data_indicates_correct_heating_system"] = ( - self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM].isin( - [ - "air source heat pump", - "ground source heat pump", - "high heat retention storage heaters", - "electric boiler" - ] - ) + self.standardised_asset_list[ + "solar_landlord_data_indicates_correct_heating_system" + ] = self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM].isin( + [ + "air source heat pump", + "ground source heat pump", + "high heat retention storage heaters", + "electric boiler", + ] ) - self.standardised_asset_list["solar_landlord_data_indicates_needs_heating_upgrade"] = ( - self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM].isin( - ["electric storage heaters", "room heaters", "electric radiators", "no heating", "electric fuel"] - ) + self.standardised_asset_list[ + "solar_landlord_data_indicates_needs_heating_upgrade" + ] = self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM].isin( + [ + "electric storage heaters", + "room heaters", + "electric radiators", + "no heating", + "electric fuel", + ] ) - self.standardised_asset_list["solar_epc_data_indicates_correct_heating_system"] = ( - ( - self.standardised_asset_list[self.EPC_API_DATA_NAMES["mainheat-description"]] - .str.lower().str.contains("air source heat pump|ground source heat pump|boiler and radiators, electric") + self.standardised_asset_list[ + "solar_epc_data_indicates_correct_heating_system" + ] = ( + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["mainheat-description"] + ] + .str.lower() + .str.contains( + "air source heat pump|ground source heat pump|boiler and radiators, electric" + ) ) | ( self.standardised_asset_list[ - self.EPC_API_DATA_NAMES["mainheat-description"]].str.lower().str.contains( - "electric storage heaters" - ) & ( - self.standardised_asset_list[self.EPC_API_DATA_NAMES[ - "mainheatcont-description"]] == "Controls for high heat retention storage heaters" + self.EPC_API_DATA_NAMES["mainheat-description"] + ] + .str.lower() + .str.contains("electric storage heaters") + & ( + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["mainheatcont-description"] + ] + == "Controls for high heat retention storage heaters" ) ) - ) # If the landlord has given us the heating system, we default to that on heating upgrades. Because of the # poor heating in place, if the EPC indicates that this property had a low efficiency heating system but the # landlord data suggests otherwise (e.g. there's a gas boiler), we default to what the landlord has told us - self.standardised_asset_list["solar_epc_data_indicates_requires_heating_upgrade"] = ( - ( - self.standardised_asset_list[self.EPC_API_DATA_NAMES["mainheat-description"]].str.lower().str.contains( - "electric storage heaters|room heaters" - ) & ( + self.standardised_asset_list[ + "solar_epc_data_indicates_requires_heating_upgrade" + ] = ( + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["mainheat-description"] + ] + .str.lower() + .str.contains("electric storage heaters|room heaters") + & ( self.standardised_asset_list[ self.EPC_API_DATA_NAMES["mainheatcont-description"] - ] != "Controls for high heat retention storage heaters" + ] + != "Controls for high heat retention storage heaters" ) ) & ( ~self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM].isin( ["district heating", "communal heating", "communal gas boiler"] - ) & ~self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM].astype(str).str.contains("gas ") + ) + & ~self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM] + .astype(str) + .str.contains("gas ") ) - ) # Basic check - both of the previous two shouldn't be true simultaneously if ( - self.standardised_asset_list["solar_epc_data_indicates_correct_heating_system"] & - self.standardised_asset_list["solar_epc_data_indicates_requires_heating_upgrade"] + self.standardised_asset_list[ + "solar_epc_data_indicates_correct_heating_system" + ] + & self.standardised_asset_list[ + "solar_epc_data_indicates_requires_heating_upgrade" + ] ).sum(): - logger.info("We have an example of both heating system checks being true - checking known cases") - known_edge_cases = ['Ground source heat pump, radiators, electric, Electric storage heaters'] + logger.info( + "We have an example of both heating system checks being true - checking known cases" + ) + known_edge_cases = [ + "Ground source heat pump, radiators, electric, Electric storage heaters" + ] error_cases = self.standardised_asset_list[ ( - self.standardised_asset_list["solar_epc_data_indicates_correct_heating_system"] & - self.standardised_asset_list["solar_epc_data_indicates_requires_heating_upgrade"] + self.standardised_asset_list[ + "solar_epc_data_indicates_correct_heating_system" + ] + & self.standardised_asset_list[ + "solar_epc_data_indicates_requires_heating_upgrade" + ] ) ] - if all(error_cases[self.EPC_API_DATA_NAMES["mainheat-description"]].isin(known_edge_cases)): + if all( + error_cases[self.EPC_API_DATA_NAMES["mainheat-description"]].isin( + known_edge_cases + ) + ): logger.info("Within known edge cases") else: - raise ValueError("Both heating system checks are true - this should not be possible") + raise ValueError( + "Both heating system checks are true - this should not be possible" + ) # Check 3: Does the property meet the fabric condition # Solar PV installs are subject to the minimum insulation requirements which means: @@ -1663,19 +2123,19 @@ class AssetList: # With this in mind, we look for 2 clases # 1) The property is fully insulated apart from the loft (<200mm insulation) # 2) THe property is fully insulated - self.standardised_asset_list["solar_landlord_walls_insulated"] = ( - self.standardised_asset_list[self.STANDARD_WALL_CONSTRUCTION].isin( - [ - "filled cavity", - "insulated solid brick", - "insulated timber frame", - "uninsulated cavity", - "insulated system built", - "insulated granite or whinstone", - "insulated sandstone or limestone", - "new build - average thermal transmittance" - ] - ) + self.standardised_asset_list[ + "solar_landlord_walls_insulated" + ] = self.standardised_asset_list[self.STANDARD_WALL_CONSTRUCTION].isin( + [ + "filled cavity", + "insulated solid brick", + "insulated timber frame", + "uninsulated cavity", + "insulated system built", + "insulated granite or whinstone", + "insulated sandstone or limestone", + "new build - average thermal transmittance", + ] ) if self.non_intrusives_present: @@ -1685,34 +2145,51 @@ class AssetList: ) ) elif self.old_format_non_intrusives_present: - self.standardised_asset_list["solar_non_intrusives_walls_insulated"] = ( - self.standardised_asset_list["non-intrusives: WFT Findings"].str.lower().str.strip().isin( - [ - "retro drilled", "retro filled", "ewi", "retro drilled/ solid", "retro drilled and filled", - ] - ) | - self.standardised_asset_list["non-intrusives: WFT Findings"].str.lower().str.strip().str.contains( - "retro drilled" - ) + self.standardised_asset_list[ + "solar_non_intrusives_walls_insulated" + ] = self.standardised_asset_list[ + "non-intrusives: WFT Findings" + ].str.lower().str.strip().isin( + [ + "retro drilled", + "retro filled", + "ewi", + "retro drilled/ solid", + "retro drilled and filled", + ] + ) | self.standardised_asset_list[ + "non-intrusives: WFT Findings" + ].str.lower().str.strip().str.contains( + "retro drilled" ) else: self.standardised_asset_list["solar_non_intrusives_walls_insulated"] = False self.standardised_asset_list["walls_u_value"] = self.standardised_asset_list[ self.EPC_API_DATA_NAMES["walls-description"] - ].apply(lambda x: WallAttributes(x).process()["thermal_transmittance"] if not pd.isnull(x) else None) - - self.standardised_asset_list["solar_epc_walls_insulated"] = ( - ( - self.standardised_asset_list[ - self.EPC_API_DATA_NAMES["walls-description"]].str.lower().str.contains( - "|".join(self.EPC_INSULATED_WALLS_SUBSTRINGS) - ) - ) | ( - self.standardised_asset_list["walls_u_value"].apply(lambda x: x <= 0.7 if not pd.isnull(x) else False) + ].apply( + lambda x: ( + WallAttributes(x).process()["thermal_transmittance"] + if not pd.isnull(x) + else None ) ) + self.standardised_asset_list["solar_epc_walls_insulated"] = ( + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES[ + "walls-description"]] + .str.lower() + .str.contains("|".join( + self.EPC_INSULATED_WALLS_SUBSTRINGS)) + ) | ( + self.standardised_asset_list[ + "walls_u_value"].apply( + lambda x: x <= 0.7 if not pd.isnull( + x) else False + ) + ) + roof_data = [] for desc in self.standardised_asset_list[ self.EPC_API_DATA_NAMES["roof-description"] @@ -1722,7 +2199,7 @@ class AssetList: roof_data.append( { self.EPC_API_DATA_NAMES["roof-description"]: desc, - **RoofAttributes(desc).process() + **RoofAttributes(desc).process(), } ) roof_data = pd.DataFrame(roof_data) @@ -1733,33 +2210,40 @@ class AssetList: # If the u-value of a roof is less than 0.7 we consider it insulated self.standardised_asset_list["solar_epc_roof_insulated"] = ( - self.standardised_asset_list[self.EPC_API_DATA_NAMES["roof-description"]].str.lower().str.contains( + self.standardised_asset_list[self.EPC_API_DATA_NAMES["roof-description"]] + .str.lower() + .str.contains( "|".join(self.EPC_INSULATED_ROOF_SUBSTRINGS), - ) | ( - self.standardised_asset_list[self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS].apply( - lambda x: int(x) >= 200 if str(x).isdigit() else False - ) - ) | ( + ) + | ( + self.standardised_asset_list[ + self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS + ].apply(lambda x: int(x) >= 200 if str(x).isdigit() else False) + ) + | ( self.standardised_asset_list["roof_u_value"].apply( lambda x: x <= 0.7 if not pd.isnull(x) else False ) ) ) - self.standardised_asset_list["solar_epc_loft_needs_topup"] = ( - self.standardised_asset_list[ - self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS].apply( - lambda x: int(x) < 200 if str(x).isdigit() else False - ) | ( + self.standardised_asset_list[ + "solar_epc_loft_needs_topup" + ] = self.standardised_asset_list[ + self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS + ].apply( + lambda x: int(x) < 200 if str(x).isdigit() else False + ) | ( ( - self.standardised_asset_list["is_loft"] | self.standardised_asset_list["is_pitched"] - ) & ( - self.standardised_asset_list[self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS].isin( - ["below average", "none"] - ) + self.standardised_asset_list["is_loft"] + | self.standardised_asset_list["is_pitched"] + ) + & ( + self.standardised_asset_list[ + self.ATTRIBUTE_EPC_ROOF_INSULATION_THICKNESS + ].isin(["below average", "none"]) ) ) - ) self.standardised_asset_list["epc_has_floor_recommendation"] = ( self.standardised_asset_list["epc_has_floor_recommendation"].fillna(False) @@ -1768,14 +2252,16 @@ class AssetList: # Check if the boiler is electric # We check if it contains both the terms boiler & electric self.standardised_asset_list["has_electric_boiler"] = ( - ( - self.standardised_asset_list[self.EPC_API_DATA_NAMES["mainheat-description"]] - .str.lower().isin( - ["boiler and radiators, electric"]) - ) | ( - self.standardised_asset_list[self.STANDARD_HEATING_SYSTEM] == "electric boiler" - ) - ) + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["mainheat-description"] + ] + .str.lower() + .isin(["boiler and radiators, electric"]) + ) | ( + self.standardised_asset_list[ + self.STANDARD_HEATING_SYSTEM] + == "electric boiler" + ) #################################### # Check solar eligibility @@ -1783,14 +2269,22 @@ class AssetList: # Set up the filters to stop repetition correct_heating_system = ( - self.standardised_asset_list["solar_landlord_data_indicates_correct_heating_system"] | - self.standardised_asset_list["solar_epc_data_indicates_correct_heating_system"] | - self.standardised_asset_list["has_electric_boiler"] + self.standardised_asset_list[ + "solar_landlord_data_indicates_correct_heating_system" + ] + | self.standardised_asset_list[ + "solar_epc_data_indicates_correct_heating_system" + ] + | self.standardised_asset_list["has_electric_boiler"] ) needs_heating_upgrade = ( - self.standardised_asset_list["solar_landlord_data_indicates_needs_heating_upgrade"] | - self.standardised_asset_list["solar_epc_data_indicates_requires_heating_upgrade"] + self.standardised_asset_list[ + "solar_landlord_data_indicates_needs_heating_upgrade" + ] + | self.standardised_asset_list[ + "solar_epc_data_indicates_requires_heating_upgrade" + ] ) # The requirements for walls are: @@ -1799,13 +2293,17 @@ class AssetList: walls_meet_solar_requirements = ( # The landlord is saying the walls are insulated - self.standardised_asset_list["solar_landlord_walls_insulated"] | + self.standardised_asset_list["solar_landlord_walls_insulated"] + | # EPC data is saying the walls are insulated - self.standardised_asset_list["solar_epc_walls_insulated"] | + self.standardised_asset_list["solar_epc_walls_insulated"] + | # Non-intrusives are saying the walls are insulated - self.standardised_asset_list["solar_non_intrusives_walls_insulated"] | + self.standardised_asset_list["solar_non_intrusives_walls_insulated"] + | # It's empty cavity - self.standardised_asset_list["cavity_is_empty"] | + self.standardised_asset_list["cavity_is_empty"] + | # It's a cavity wall self.standardised_asset_list[self.STANDARD_WALL_CONSTRUCTION].isin( ["filled cavity", "partial insulated cavity"] @@ -1816,7 +2314,8 @@ class AssetList: if all(self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "unknown"): # Use EPC not_a_flat = ( - self.standardised_asset_list[self.EPC_API_DATA_NAMES["property-type"]] != "Flat" + self.standardised_asset_list[self.EPC_API_DATA_NAMES["property-type"]] + != "Flat" ) else: not_a_flat = ( @@ -1824,32 +2323,40 @@ class AssetList: ) solar_roof_meets_criteria = ( - self.standardised_asset_list["solar_epc_roof_insulated"] | - self.standardised_asset_list["solar_epc_loft_needs_topup"] + self.standardised_asset_list["solar_epc_roof_insulated"] + | self.standardised_asset_list["solar_epc_loft_needs_topup"] ) self.standardised_asset_list["solar_eligible"] = ( # Property isn't a flag - not_a_flat & + not_a_flat + & # Landlord data or EPC data indicates the heating system is appropriate - correct_heating_system & + correct_heating_system + & # The property doesn't currently have solar - ~self.standardised_asset_list["property_has_solar"] & + ~self.standardised_asset_list["property_has_solar"] + & # The walls are insulated - walls_meet_solar_requirements & + walls_meet_solar_requirements + & # Roof meets criteria solar_roof_meets_criteria ) # With heating upgrade self.standardised_asset_list["solar_eligible_needs_heating_upgrade"] = ( - not_a_flat & + not_a_flat + & # Needs heating upgrade - needs_heating_upgrade & + needs_heating_upgrade + & # The property doesn't currently have solar - ~self.standardised_asset_list["property_has_solar"] & + ~self.standardised_asset_list["property_has_solar"] + & # The walls are insulated - walls_meet_solar_requirements & + walls_meet_solar_requirements + & # Roof meets criteria solar_roof_meets_criteria ) @@ -1857,15 +2364,23 @@ class AssetList: # We check for a specific sub-set of properties which are uninsulated solid wall properties that are EPC E # or below (we'll use 57 as a threshold) - These are for a pilot with Net Zero Renewables self.standardised_asset_list["solar_eligible_solid_wall_uninsulated"] = ( - not_a_flat & + not_a_flat + & # Landlord data or EPC data indicates the heating system is appropriate - in this case, we can also take # electric boilers - correct_heating_system & + correct_heating_system + & # The property doesn't currently have solar - ~self.standardised_asset_list["property_has_solar"] & + ~self.standardised_asset_list["property_has_solar"] + & # The walls are uninsulated solid - ~walls_meet_solar_requirements & - (self.standardised_asset_list[self.EPC_API_DATA_NAMES["current-energy-efficiency"]] <= 57) + ~walls_meet_solar_requirements + & ( + self.standardised_asset_list[ + self.EPC_API_DATA_NAMES["current-energy-efficiency"] + ] + <= 57 + ) ) # Drop anything we don't need @@ -1875,100 +2390,128 @@ class AssetList: # Adjust flagged extraction jobs to remove anything for solar self.standardised_asset_list["non_intrusive_indicates_cavity_extraction"] = ( - self.standardised_asset_list["non_intrusive_indicates_cavity_extraction"] & - ~self.standardised_asset_list["solar_eligible"] + self.standardised_asset_list["non_intrusive_indicates_cavity_extraction"] + & ~self.standardised_asset_list["solar_eligible"] ) # Finally, we note why each property has been flagged self.standardised_asset_list["cavity_reason"] = None empty_cavity_map = { - "non_intrusive_indicates_empty_cavity": self.EMPTY_CAVITY_NON_INTRUSIVE + ": ", + "non_intrusive_indicates_empty_cavity": self.EMPTY_CAVITY_NON_INTRUSIVE + + ": ", "non_intrusive_indicates_empty_cavity_has_solar": f"{self.EMPTY_CAVITY_NON_INTRUSIVE} - property " "already has solar: ", "non_intrusive_indicates_empty_cavity_no_year_filter": f"{self.EMPTY_CAVITY_NON_INTRUSIVE}, " f"built after {self.EMPTY_CAVITY_YEAR_THRESHOLD}: ", - } for variable, description in empty_cavity_map.items(): self.standardised_asset_list["cavity_reason"] = np.where( - self.standardised_asset_list[variable] & - pd.isnull(self.standardised_asset_list["cavity_reason"]), + self.standardised_asset_list[variable] + & pd.isnull(self.standardised_asset_list["cavity_reason"]), description + self.standardised_asset_list["SAP Category"], - self.standardised_asset_list["cavity_reason"] + self.standardised_asset_list["cavity_reason"], ) # We break the cavity reason into a few different categories, when the EPC is different from inspections if self.old_format_non_intrusives_present: self.standardised_asset_list["cavity_reason"] = np.where( ( - self.standardised_asset_list["epc_indicates_empty_cavity"] & - ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] & - (self.standardised_asset_list['non-intrusives: WFT Findings'].str.lower().str.strip().isin( - [ - "retro drilled and filled", "retro drilled", "retro filled", "retro drilled & filled", - ] - )) & - pd.isnull(self.standardised_asset_list["cavity_reason"]) + self.standardised_asset_list["epc_indicates_empty_cavity"] + & ~self.standardised_asset_list[ + "non_intrusive_indicates_empty_cavity" + ] + & ( + self.standardised_asset_list["non-intrusives: WFT Findings"] + .str.lower() + .str.strip() + .isin( + [ + "retro drilled and filled", + "retro drilled", + "retro filled", + "retro drilled & filled", + ] + ) + ) + & pd.isnull(self.standardised_asset_list["cavity_reason"]) ), - f"{self.EPC_EMPTY_INSPECTIONS_RETRO_DRILLED}: " + self.standardised_asset_list[ - "SAP Category"], - self.standardised_asset_list["cavity_reason"] + f"{self.EPC_EMPTY_INSPECTIONS_RETRO_DRILLED}: " + + self.standardised_asset_list["SAP Category"], + self.standardised_asset_list["cavity_reason"], ) self.standardised_asset_list["cavity_reason"] = np.where( ( - self.standardised_asset_list["epc_indicates_empty_cavity"] & - ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] & - self.standardised_asset_list['non_intrusive_indicates_cavity_extraction'] & - pd.isnull(self.standardised_asset_list["cavity_reason"]) + self.standardised_asset_list["epc_indicates_empty_cavity"] + & ~self.standardised_asset_list[ + "non_intrusive_indicates_empty_cavity" + ] + & self.standardised_asset_list[ + "non_intrusive_indicates_cavity_extraction" + ] + & pd.isnull(self.standardised_asset_list["cavity_reason"]) ), - f"{self.EPC_EMPTY_INSPECTIONS_FILLED}: " + self.standardised_asset_list[ - "SAP Category"], - self.standardised_asset_list["cavity_reason"] + f"{self.EPC_EMPTY_INSPECTIONS_FILLED}: " + + self.standardised_asset_list["SAP Category"], + self.standardised_asset_list["cavity_reason"], ) elif self.non_intrusives_present: self.standardised_asset_list["cavity_reason"] = np.where( ( - self.standardised_asset_list["epc_indicates_empty_cavity"] & - ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] & - (self.standardised_asset_list['non-intrusives: Insulated'] == "RETRO DRILLED") & - pd.isnull(self.standardised_asset_list["cavity_reason"]) + self.standardised_asset_list["epc_indicates_empty_cavity"] + & ~self.standardised_asset_list[ + "non_intrusive_indicates_empty_cavity" + ] + & ( + self.standardised_asset_list["non-intrusives: Insulated"] + == "RETRO DRILLED" + ) + & pd.isnull(self.standardised_asset_list["cavity_reason"]) ), - f"{self.EPC_EMPTY_INSPECTIONS_RETRO_DRILLED}: " + self.standardised_asset_list[ - "SAP Category"], - self.standardised_asset_list["cavity_reason"] + f"{self.EPC_EMPTY_INSPECTIONS_RETRO_DRILLED}: " + + self.standardised_asset_list["SAP Category"], + self.standardised_asset_list["cavity_reason"], ) self.standardised_asset_list["cavity_reason"] = np.where( ( - self.standardised_asset_list["epc_indicates_empty_cavity"] & - ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] & - (self.standardised_asset_list['non-intrusives: Insulated'] == "FILLED AT BUILD") & - pd.isnull(self.standardised_asset_list["cavity_reason"]) + self.standardised_asset_list["epc_indicates_empty_cavity"] + & ~self.standardised_asset_list[ + "non_intrusive_indicates_empty_cavity" + ] + & ( + self.standardised_asset_list["non-intrusives: Insulated"] + == "FILLED AT BUILD" + ) + & pd.isnull(self.standardised_asset_list["cavity_reason"]) ), - f"{self.EPC_EMPTY_INSPECTIONS_FILLED_AT_BUILD}: " + self.standardised_asset_list["SAP Category"], - self.standardised_asset_list["cavity_reason"] + f"{self.EPC_EMPTY_INSPECTIONS_FILLED_AT_BUILD}: " + + self.standardised_asset_list["SAP Category"], + self.standardised_asset_list["cavity_reason"], ) else: self.standardised_asset_list["cavity_reason"] = np.where( ( - self.standardised_asset_list["epc_indicates_empty_cavity"] & - ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] & - pd.isnull(self.standardised_asset_list["cavity_reason"]) + self.standardised_asset_list["epc_indicates_empty_cavity"] + & ~self.standardised_asset_list[ + "non_intrusive_indicates_empty_cavity" + ] + & pd.isnull(self.standardised_asset_list["cavity_reason"]) ), f"{self.EPC_EMPTY}: " + self.standardised_asset_list["SAP Category"], - self.standardised_asset_list["cavity_reason"] + self.standardised_asset_list["cavity_reason"], ) self.standardised_asset_list["cavity_reason"] = np.where( ( - self.standardised_asset_list["epc_indicates_empty_cavity"] & - ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] & - pd.isnull(self.standardised_asset_list["cavity_reason"]) + self.standardised_asset_list["epc_indicates_empty_cavity"] + & ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] + & pd.isnull(self.standardised_asset_list["cavity_reason"]) ), - f"{self.EPC_EMPTY_INSPECTIONS_NON_CAVITY}: " + self.standardised_asset_list["SAP Category"], - self.standardised_asset_list["cavity_reason"] + f"{self.EPC_EMPTY_INSPECTIONS_NON_CAVITY}: " + + self.standardised_asset_list["SAP Category"], + self.standardised_asset_list["cavity_reason"], ) # Work type prefixes @@ -1977,34 +2520,39 @@ class AssetList: # inspections show filled self.standardised_asset_list["cavity_reason"] = np.where( ( - self.standardised_asset_list["landlord_data_indicates_empty_cavity"] & - ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] & - ~self.standardised_asset_list["epc_indicates_empty_cavity"] & - pd.isnull(self.standardised_asset_list["cavity_reason"]) + self.standardised_asset_list["landlord_data_indicates_empty_cavity"] + & ~self.standardised_asset_list["non_intrusive_indicates_empty_cavity"] + & ~self.standardised_asset_list["epc_indicates_empty_cavity"] + & pd.isnull(self.standardised_asset_list["cavity_reason"]) ), - f"{self.LANDLORD_EMPTY_INSPECTIONS_OTHER}: " + - self.standardised_asset_list["SAP Category"], - self.standardised_asset_list["cavity_reason"] + f"{self.LANDLORD_EMPTY_INSPECTIONS_OTHER}: " + + self.standardised_asset_list["SAP Category"], + self.standardised_asset_list["cavity_reason"], ) # Flag extraction self.standardised_asset_list["cavity_reason"] = np.where( ( - self.standardised_asset_list["non_intrusive_indicates_cavity_extraction"] & - pd.isnull(self.standardised_asset_list["cavity_reason"]) + self.standardised_asset_list[ + "non_intrusive_indicates_cavity_extraction" + ] + & pd.isnull(self.standardised_asset_list["cavity_reason"]) ), - f"{self.EXTRACTION_NON_INTRUSIVE}: " + self.standardised_asset_list["SAP Category"], - self.standardised_asset_list["cavity_reason"] + f"{self.EXTRACTION_NON_INTRUSIVE}: " + + self.standardised_asset_list["SAP Category"], + self.standardised_asset_list["cavity_reason"], ) self.standardised_asset_list["cavity_reason"] = np.where( ( - self.standardised_asset_list["non_intrusive_indicates_cavity_extraction_no_year_filter"] & - pd.isnull(self.standardised_asset_list["cavity_reason"]) + self.standardised_asset_list[ + "non_intrusive_indicates_cavity_extraction_no_year_filter" + ] + & pd.isnull(self.standardised_asset_list["cavity_reason"]) ), - f"{self.EXTRACTION_NON_INTRUSIVE}, built after {self.EMPTY_CAVITY_YEAR_THRESHOLD}: " + - self.standardised_asset_list["SAP Category"], - self.standardised_asset_list["cavity_reason"] + f"{self.EXTRACTION_NON_INTRUSIVE}, built after {self.EMPTY_CAVITY_YEAR_THRESHOLD}: " + + self.standardised_asset_list["SAP Category"], + self.standardised_asset_list["cavity_reason"], ) ###################################################### @@ -2017,75 +2565,81 @@ class AssetList: solar_reason_map = { "solar_eligible": f"{self.SOLAR_ELIGIBLE}: ", "solar_eligible_solid_wall_uninsulated": f"{self.SOLAR_ELIGIBLE_SOLID_WALL_UNINSULATED}: ", - "solar_eligible_needs_heating_upgrade": f"{self.SOLAR_ELIGIBLE_NEEDS_HEATING_UPGRADE}: " + "solar_eligible_needs_heating_upgrade": f"{self.SOLAR_ELIGIBLE_NEEDS_HEATING_UPGRADE}: ", } for variable, reason in solar_reason_map.items(): self.standardised_asset_list["solar_reason"] = np.where( - self.standardised_asset_list[variable] & pd.isnull(self.standardised_asset_list["solar_reason"]), + self.standardised_asset_list[variable] + & pd.isnull(self.standardised_asset_list["solar_reason"]), reason + self.standardised_asset_list["SAP Category"], - self.standardised_asset_list["solar_reason"] + self.standardised_asset_list["solar_reason"], ) # Finally, anything flagged for solar should not be flagged for cavity - make them None self.standardised_asset_list["cavity_reason"] = np.where( ( - ~pd.isnull(self.standardised_asset_list["solar_reason"]) & - ~pd.isnull(self.standardised_asset_list["cavity_reason"]) + ~pd.isnull(self.standardised_asset_list["solar_reason"]) + & ~pd.isnull(self.standardised_asset_list["cavity_reason"]) ), None, - self.standardised_asset_list["cavity_reason"] + self.standardised_asset_list["cavity_reason"], ) # Flag anything that has existing outcomes - if (self.outcomes is not None) and ("surveyed" in self.standardised_asset_list.columns): + if (self.outcomes is not None) and ( + "surveyed" in self.standardised_asset_list.columns + ): if "installer refusal" not in self.standardised_asset_list.columns: self.standardised_asset_list["cavity_reason"] = np.where( - ( - (self.standardised_asset_list["surveyed"] > 0) - ), + ((self.standardised_asset_list["surveyed"] > 0)), None, - self.standardised_asset_list["cavity_reason"] + self.standardised_asset_list["cavity_reason"], ) else: for col in ["cavity_reason", "solar_reason"]: self.standardised_asset_list[col] = np.where( ( - (self.standardised_asset_list["surveyed"] > 0) | - (self.standardised_asset_list["installer refusal"] > 0) + (self.standardised_asset_list["surveyed"] > 0) + | (self.standardised_asset_list["installer refusal"] > 0) ), None, - self.standardised_asset_list[col] + self.standardised_asset_list[col], ) if self.master_surveyed is not None: for col in ["cavity_reason", "solar_reason"]: self.standardised_asset_list[col] = np.where( - ( - (~pd.isnull(self.standardised_asset_list["submission_status"])) - ), + ((~pd.isnull(self.standardised_asset_list["submission_status"]))), None, - self.standardised_asset_list[col] + self.standardised_asset_list[col], ) - if self.ecosurv is not None and "ecosurv_install_status" in self.standardised_asset_list.columns: + if ( + self.ecosurv is not None + and "ecosurv_install_status" in self.standardised_asset_list.columns + ): # If we didn't match anything to ecosurv, the ecosurv_install_status won't exist for col in ["cavity_reason", "solar_reason"]: self.standardised_asset_list[col] = np.where( ( - (~pd.isnull(self.standardised_asset_list["ecosurv_install_status"])) + ( + ~pd.isnull( + self.standardised_asset_list["ecosurv_install_status"] + ) + ) ), None, - self.standardised_asset_list[col] + self.standardised_asset_list[col], ) # We prepare outcomes for output if self.outcomes is not None: logger.info("Preparing outcomes for output") identified_work = self.standardised_asset_list[ - ~pd.isnull(self.standardised_asset_list["cavity_reason"]) | - ~pd.isnull(self.standardised_asset_list["solar_reason"]) + ~pd.isnull(self.standardised_asset_list["cavity_reason"]) + | ~pd.isnull(self.standardised_asset_list["solar_reason"]) ][self.DOMNA_PROPERTY_ID].values if self.DOMNA_PROPERTY_ID in self.outcomes.columns: @@ -2096,37 +2650,49 @@ class AssetList: # Finally, direct operations feedback has suggested that if a property is a flat that has a SAP rating of # 76 or above, we should exclude it because it's likely not going to be eligible for anyting self.standardised_asset_list["cavity_reason"] = np.where( - (self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "flat") & - (self.standardised_asset_list["SAP Category"] == "SAP Rating 76 or more"), + (self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "flat") + & (self.standardised_asset_list["SAP Category"] == "SAP Rating 76 or more"), self.standardised_asset_list["cavity_reason"] + " - (unlikely to quality)", - self.standardised_asset_list["cavity_reason"] + self.standardised_asset_list["cavity_reason"], ) # Split cavity_reason on the colon and check if the first part is equal to one of the two options above # that indicates empties self.standardised_asset_list["identified_empty_cavity"] = ( - self.standardised_asset_list["cavity_reason"].str.split(":").str[0].isin( - [self.EMPTY_CAVITY_NON_INTRUSIVE, self.EMPTY_CAVITY_NON_INTRUSIVE_YEAR, self.EPC_EMPTY] + self.standardised_asset_list["cavity_reason"] + .str.split(":") + .str[0] + .isin( + [ + self.EMPTY_CAVITY_NON_INTRUSIVE, + self.EMPTY_CAVITY_NON_INTRUSIVE_YEAR, + self.EPC_EMPTY, + ] ) ) def get_work_figures(self): blocks_of_flats = self.standardised_asset_list[ - self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "block of flats" + self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] + == "block of flats" ] non_blocks_of_flats = self.standardised_asset_list[ - self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] != "block of flats" + self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] + != "block of flats" ] # Produce some aggregate figures self.work_type_figures = { **non_blocks_of_flats["cavity_reason"].value_counts().to_dict(), **{ - k + " (Block of flats)": v for k, v in - blocks_of_flats["solar_reason"].value_counts().to_dict().items() + k + " (Block of flats)": v + for k, v in blocks_of_flats["solar_reason"] + .value_counts() + .to_dict() + .items() }, - **self.standardised_asset_list["solar_reason"].value_counts().to_dict() + **self.standardised_asset_list["solar_reason"].value_counts().to_dict(), } pprint(self.work_type_figures) @@ -2136,12 +2702,15 @@ class AssetList: # If we have blocks of flats, we fill the landlord_block_reference field with address 1 + postcode self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE] = np.where( - (self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "block of flats") & ( - pd.isnull(self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE]) - ), - self.standardised_asset_list[self.STANDARD_ADDRESS_1] + " " + - self.standardised_asset_list[self.STANDARD_POSTCODE], - self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE] + ( + self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] + == "block of flats" + ) + & (pd.isnull(self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE])), + self.standardised_asset_list[self.STANDARD_ADDRESS_1] + + " " + + self.standardised_asset_list[self.STANDARD_POSTCODE], + self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE], ) def split_blocks(self): @@ -2152,16 +2721,21 @@ class AssetList: """ blocks = self.standardised_asset_list[ - self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] == "block of flats" + self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] + == "block of flats" ].copy() if blocks.empty: return - RANGE_RE = re.compile(r'\b(\d+[A-Za-z]?)\s*[-–]\s*(\d+[A-Za-z]?)\b') - NUM_RE = re.compile(r'\b\d+[A-Za-z]?\b') # captures 12, 12A, etc. - TO_RANGE_RE = re.compile(r'\b(\d+[A-Za-z]?)\s+(?:to|To|TO)\s+(\d+[A-Za-z]?)\b') # captures "13 to 15" - LETTER_RANGE_RE = re.compile(r'\b(\d+)([A-Za-z]?)\s*[-–]\s*(\d+)([A-Za-z]?)\b') # captures "1A-3B" + RANGE_RE = re.compile(r"\b(\d+[A-Za-z]?)\s*[-–]\s*(\d+[A-Za-z]?)\b") + NUM_RE = re.compile(r"\b\d+[A-Za-z]?\b") # captures 12, 12A, etc. + TO_RANGE_RE = re.compile( + r"\b(\d+[A-Za-z]?)\s+(?:to|To|TO)\s+(\d+[A-Za-z]?)\b" + ) # captures "13 to 15" + LETTER_RANGE_RE = re.compile( + r"\b(\d+)([A-Za-z]?)\s*[-–]\s*(\d+)([A-Za-z]?)\b" + ) # captures "1A-3B" expanded_rows = [] @@ -2172,16 +2746,16 @@ class AssetList: # We also look for terms like "Odd", "even", "all" in the address to indicate if it should be just # the odds, evens or all of the numbers has_odd = ( - "(odd)" in addr.lower() or - "(odd)" in full_addr.lower() or - "(odds)" in addr.lower() or - "(odds)" in full_addr.lower() + "(odd)" in addr.lower() + or "(odd)" in full_addr.lower() + or "(odds)" in addr.lower() + or "(odds)" in full_addr.lower() ) has_even = ( - "(even)" in addr.lower() or - "(even)" in full_addr.lower() or - "(evens)" in addr.lower() or - "(evens)" in full_addr.lower() + "(even)" in addr.lower() + or "(even)" in full_addr.lower() + or "(evens)" in addr.lower() + or "(evens)" in full_addr.lower() ) # 1 ─ Range (e.g. 1-7) @@ -2190,7 +2764,9 @@ class AssetList: if m_range or to_range: start, end = m_range.groups() if m_range else to_range.groups() - start, end = int(re.match(r'\d+', start)[0]), int(re.match(r'\d+', end)[0]) + start, end = int(re.match(r"\d+", start)[0]), int( + re.match(r"\d+", end)[0] + ) if start > end or (end - start) > 200: raise ValueError(f"Suspicious range '{addr}'") @@ -2217,18 +2793,26 @@ class AssetList: new["is_expended_block"] = True # We update the full address - new[self.DOMNA_PROPERTY_ID] = f"{row[self.DOMNA_PROPERTY_ID]}-{new_addr}" + new[self.DOMNA_PROPERTY_ID] = ( + f"{row[self.DOMNA_PROPERTY_ID]}-{new_addr}" + ) expanded_rows.append(new.to_dict()) continue # 2 ─ Explicit list (e.g. 1, 2, 5 Block) or split by an ampersand (e.g. 1 & 2 Block) nums = NUM_RE.findall(addr) - if len(nums) > 1 and (',' in addr or '&' in addr or ' and ' in addr.lower()): + if len(nums) > 1 and ( + "," in addr or "&" in addr or " and " in addr.lower() + ): for n in nums: new = row.copy() - new_addr = re.sub(NUM_RE, n, addr, count=1) # replace the first number only + new_addr = re.sub( + NUM_RE, n, addr, count=1 + ) # replace the first number only new[self.STANDARD_ADDRESS_1] = new_addr - new[self.DOMNA_PROPERTY_ID] = f"{row[self.DOMNA_PROPERTY_ID]}-{new_addr}" + new[self.DOMNA_PROPERTY_ID] = ( + f"{row[self.DOMNA_PROPERTY_ID]}-{new_addr}" + ) expanded_rows.append(new.to_dict()) continue @@ -2252,7 +2836,9 @@ class AssetList: new = row.copy() new_addr = f"{n}{chr(letter)}" new[self.STANDARD_ADDRESS_1] = new_addr - new[self.DOMNA_PROPERTY_ID] = f"{row[self.DOMNA_PROPERTY_ID]}-{new_addr}" + new[self.DOMNA_PROPERTY_ID] = ( + f"{row[self.DOMNA_PROPERTY_ID]}-{new_addr}" + ) expanded_rows.append(new.to_dict()) continue @@ -2272,18 +2858,19 @@ class AssetList: # We drop the blocks from the standardised asset list and append on the expanded blocks self.standardised_asset_list = self.standardised_asset_list[ - self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] != "block of flats" + self.standardised_asset_list[self.STANDARD_PROPERTY_TYPE] + != "block of flats" ] self.standardised_asset_list = pd.concat( - [self.standardised_asset_list, expanded_blocks], - ignore_index=True + [self.standardised_asset_list, expanded_blocks], ignore_index=True ) # As a final clean up, for any blocks that are size 1, we don't includr a project code sizes = ( - expanded_blocks - .groupby(self.STANDARD_BLOCK_REFERENCE)[self.DOMNA_PROPERTY_ID] + expanded_blocks.groupby(self.STANDARD_BLOCK_REFERENCE)[ + self.DOMNA_PROPERTY_ID + ] .nunique() .reset_index() ) @@ -2294,7 +2881,7 @@ class AssetList: size_1[self.STANDARD_BLOCK_REFERENCE].values ), None, - self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE] + self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE], ) def label_property_status(self): @@ -2307,10 +2894,10 @@ class AssetList: # For anything that is ready to go, that gets set to ready to be scheduled self.standardised_asset_list["hubspot_status"] = np.where( - ~pd.isnull(self.standardised_asset_list["cavity_reason"]) | - ~pd.isnull(self.standardised_asset_list["solar_reason"]), + ~pd.isnull(self.standardised_asset_list["cavity_reason"]) + | ~pd.isnull(self.standardised_asset_list["solar_reason"]), hubspot_config.HubspotProcessStatus.READY_TO_BE_SCHEDULED.label, - None + None, ) # we step through the process of flagging completed surveys @@ -2321,43 +2908,56 @@ class AssetList: def get_max_status_from_columns(row): status_candidates = [] - for col in ["submission_status", "ecosurv_install_status", "outcome_status"]: + for col in [ + "submission_status", + "ecosurv_install_status", + "outcome_status", + ]: label = row.get(col) if label in label_to_enum: status_candidates.append(label_to_enum[label]) if not status_candidates: - return row["hubspot_status"] # fallback to existing status if no updates + return row[ + "hubspot_status" + ] # fallback to existing status if no updates return max(status_candidates).label - self.standardised_asset_list["hubspot_status"] = self.standardised_asset_list.apply( - get_max_status_from_columns, axis=1 + self.standardised_asset_list["hubspot_status"] = ( + self.standardised_asset_list.apply(get_max_status_from_columns, axis=1) ) self.standardised_asset_list["project_code"] = None # if we have any blocks, where work is eligible, we flag them now # These blocks may be refecence via the landlord_block_reference field, or by property types being # blocks of flats - has_landlord_block_reference = sum(~pd.isnull(self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE])) + has_landlord_block_reference = sum( + ~pd.isnull(self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE]) + ) if has_landlord_block_reference: # For blocks that have a 50% allocation, we create project codes self.block_analysis() # find any block refs with more than 50% emptires viable_empty_blocks = self.block_analysis_df[ - self.block_analysis_df['Percentage of Empties'] >= 0.50 + self.block_analysis_df["Percentage of Empties"] >= 0.50 ] if not viable_empty_blocks.empty: project_code_lookup = viable_empty_blocks[["Block Reference"]].copy() self.standardised_asset_list = self.standardised_asset_list.merge( - project_code_lookup, how="left", left_on=self.STANDARD_BLOCK_REFERENCE, right_on="Block Reference" + project_code_lookup, + how="left", + left_on=self.STANDARD_BLOCK_REFERENCE, + right_on="Block Reference", ) self.standardised_asset_list["project_code"] = np.where( ~pd.isnull(self.standardised_asset_list["Block Reference"]), self.standardised_asset_list["Block Reference"], - self.standardised_asset_list["project_code"] + self.standardised_asset_list["project_code"], + ) + self.standardised_asset_list = self.standardised_asset_list.drop( + columns=["Block Reference"] ) - self.standardised_asset_list = self.standardised_asset_list.drop(columns=["Block Reference"]) def analyse_geographies(self): cavity_programme = ( @@ -2379,13 +2979,15 @@ class AssetList: .reset_index() .rename(columns={"landlord_property_id": "n_properties"}) ) - geographical_areas = postcodes.merge(cavity_programme, how="left", on="domna_postcode").merge( - solar_programme, how="left", on="domna_postcode" - ).fillna(0) + geographical_areas = ( + postcodes.merge(cavity_programme, how="left", on="domna_postcode") + .merge(solar_programme, how="left", on="domna_postcode") + .fillna(0) + ) geographical_areas["coverage"] = ( - ( - geographical_areas["solar_reason"] + geographical_areas["cavity_reason"] - ) / geographical_areas["n_properties"] * 100 + (geographical_areas["solar_reason"] + geographical_areas["cavity_reason"]) + / geographical_areas["n_properties"] + * 100 ) geographical_areas = geographical_areas.sort_values("coverage", ascending=False) @@ -2397,34 +2999,55 @@ class AssetList: LABEL_TO_ENUM = {e.label: e for e in hubspot_config.HubspotProcessStatus} # Threshold status - anything that is at this stage or beyond is considered surveyed - threshold = hubspot_config.HubspotProcessStatus.SURVEYED_COMPLETED_SIGNED_OFF.value + threshold = ( + hubspot_config.HubspotProcessStatus.SURVEYED_COMPLETED_SIGNED_OFF.value + ) block_analysis = [] - for block_reference, group in self.standardised_asset_list.groupby(self.STANDARD_BLOCK_REFERENCE): + for block_reference, group in self.standardised_asset_list.groupby( + self.STANDARD_BLOCK_REFERENCE + ): - cavity_breakdown = group["cavity_reason"].fillna("No Eligibility").value_counts(normalize=True) * 100 + cavity_breakdown = ( + group["cavity_reason"] + .fillna("No Eligibility") + .value_counts(normalize=True) + * 100 + ) if all(cavity_breakdown.index == "No Eligibility"): continue # We check the % of empty vs not empty as right now, we're focused on empty n_empties = ( - (group["identified_empty_cavity"] == True) & - (~pd.isnull(group["cavity_reason"])) & - (~group["cavity_reason"].str.contains("(unlikely to quality)", case=False, na=False, regex=False)) + (group["identified_empty_cavity"] == True) + & (~pd.isnull(group["cavity_reason"])) + & ( + ~group["cavity_reason"].str.contains( + "(unlikely to quality)", case=False, na=False, regex=False + ) + ) ).sum() n_empties_high_confidence = ( - (group["identified_empty_cavity"] == True) & - (~group["SAP Category"].isin(["SAP Rating 69-75", "SAP Rating 76 or more"])) & - (~pd.isnull(group["cavity_reason"])) & - (~group["cavity_reason"].str.contains("(unlikely to quality)", case=False, na=False, regex=False)) + (group["identified_empty_cavity"] == True) + & ( + ~group["SAP Category"].isin( + ["SAP Rating 69-75", "SAP Rating 76 or more"] + ) + ) + & (~pd.isnull(group["cavity_reason"])) + & ( + ~group["cavity_reason"].str.contains( + "(unlikely to quality)", case=False, na=False, regex=False + ) + ) ).sum() # Average age of the EPCs group["time_since_epc"] = ( - pd.to_datetime("now") - pd.to_datetime( - group[self.EPC_API_DATA_NAMES["inspection-date"]]) + pd.to_datetime("now") + - pd.to_datetime(group[self.EPC_API_DATA_NAMES["inspection-date"]]) ).dt.days average_age_of_epc = group["time_since_epc"].mean() @@ -2456,21 +3079,26 @@ class AssetList: block_analysis["Eligible for Works"] = ( block_analysis["Percentage of Empties"] >= 0.50 ) - block_analysis = block_analysis.sort_values("Percentage of Empties", ascending=False) + block_analysis = block_analysis.sort_values( + "Percentage of Empties", ascending=False + ) # For properties that are NOT eligible, we should update the cavity reason - ineligible_blocks = block_analysis[ - ~block_analysis["Eligible for Works"] - ]["Block Reference"].values + ineligible_blocks = block_analysis[~block_analysis["Eligible for Works"]][ + "Block Reference" + ].values - eligible_blocks = block_analysis[ - block_analysis["Eligible for Works"] - ]["Block Reference"].values + eligible_blocks = block_analysis[block_analysis["Eligible for Works"]][ + "Block Reference" + ].values self.standardised_asset_list["cavity_reason"] = np.where( - self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE].isin(ineligible_blocks), - self.standardised_asset_list["cavity_reason"] + " (Flat in block with less than 50% eligible)", + self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE].isin( + ineligible_blocks + ), self.standardised_asset_list["cavity_reason"] + + " (Flat in block with less than 50% eligible)", + self.standardised_asset_list["cavity_reason"], ) # if the property is in a block of flats that eligible, but the property itself is not eligible, we flag this @@ -2478,10 +3106,13 @@ class AssetList: # =The property should be in a block of flats self.standardised_asset_list["cavity_reason"] = np.where( - self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE].isin(eligible_blocks), - self.standardised_asset_list["cavity_reason"] - + " " + "(Flat in block with more than 50% eligible)", + self.standardised_asset_list[self.STANDARD_BLOCK_REFERENCE].isin( + eligible_blocks + ), self.standardised_asset_list["cavity_reason"] + + " " + + "(Flat in block with more than 50% eligible)", + self.standardised_asset_list["cavity_reason"], ) self.block_analysis_df = block_analysis @@ -2513,7 +3144,7 @@ class AssetList: email_column=None, fullname_column=None, firstname_column=None, - lastname_column=None + lastname_column=None, ): self.contact_detail_fields = { @@ -2524,12 +3155,16 @@ class AssetList: "email": email_column, "fullname": fullname_column, "firstname": firstname_column, - "lastname": lastname_column + "lastname": lastname_column, } details_colnames = [ - phone_number_column, secondary_phone_number_column, email_column, fullname_column, firstname_column, - lastname_column + phone_number_column, + secondary_phone_number_column, + email_column, + fullname_column, + firstname_column, + lastname_column, ] # We'll fill them none_details = [x for x in details_colnames if x is None] @@ -2537,23 +3172,29 @@ class AssetList: if local_filepath is None: # Create an empty DataFrame based on the fields in self.contact_detail_fields - self.contact_details = pd.DataFrame(columns=list(self.contact_detail_fields.keys())) + self.contact_details = pd.DataFrame( + columns=list(self.contact_detail_fields.keys()) + ) return - contact_details = pd.read_excel( - local_filepath, sheet_name=sheet_name - )[[self.contact_detail_fields["landlord_property_id"]] + details_colnames] + contact_details = pd.read_excel(local_filepath, sheet_name=sheet_name)[ + [self.contact_detail_fields["landlord_property_id"]] + details_colnames + ] contact_details = contact_details[ - ~pd.isnull(contact_details[self.contact_detail_fields["landlord_property_id"]]) + ~pd.isnull( + contact_details[self.contact_detail_fields["landlord_property_id"]] + ) ] # Fill anything we don't have for detail in none_details: contact_details[detail] = None if fullname_column and not (firstname_column and lastname_column): - contact_details["title"], contact_details["first_name"], contact_details["last_name"] = zip( - *contact_details[fullname_column].apply(self.split_full_name) - ) + ( + contact_details["title"], + contact_details["first_name"], + contact_details["last_name"], + ) = zip(*contact_details[fullname_column].apply(self.split_full_name)) else: contact_details["title"] = None @@ -2588,11 +3229,13 @@ class AssetList: landlord_sap=cls.STANDARD_SAP, landlord_block_reference=cls.STANDARD_BLOCK_REFERENCE, phase=False, - header=header + header=header, ) return instance - def prepare_for_crm(self, company_domain, installer_name, reconcile_programme=False): + def prepare_for_crm( + self, company_domain, installer_name, reconcile_programme=False + ): """ This function prepares the data for upload into Hubspot :param company_domain: The company domain name to be used in the CRM @@ -2603,10 +3246,14 @@ class AssetList: """ # This maps the opportunities as we reference them, to the product data as stored in Hubspot if not hubspot_config.Installer.is_valid_value(installer_name): - raise ValueError(f"Installer name {installer_name} is not valid. Please check the installer name.") + raise ValueError( + f"Installer name {installer_name} is not valid. Please check the installer name." + ) # We check if all products are covered in the lookup table - cavity_products = self.standardised_asset_list["cavity_reason"].unique().tolist() + cavity_products = ( + self.standardised_asset_list["cavity_reason"].unique().tolist() + ) cavity_products = [x for x in cavity_products if not pd.isnull(x)] solar_products = self.standardised_asset_list["solar_reason"].unique().tolist() solar_products = [x for x in solar_products if not pd.isnull(x)] @@ -2627,20 +3274,25 @@ class AssetList: programme_data = self.standardised_asset_list.copy() programme_data["domna_full_address"] = ( - programme_data["domna_full_address"].str.replace(";", ", ", regex=False).str.replace(" ", "") + programme_data["domna_full_address"] + .str.replace(";", ", ", regex=False) + .str.replace(" ", "") ) # Format the two date columns - programme_data["survey_date"] = pd.to_datetime(programme_data["survey_date"], errors="coerce") + programme_data["survey_date"] = pd.to_datetime( + programme_data["survey_date"], errors="coerce" + ) programme_data[self.EPC_API_DATA_NAMES["inspection-date"]] = pd.to_datetime( - programme_data[self.EPC_API_DATA_NAMES["inspection-date"]], - errors="coerce" + programme_data[self.EPC_API_DATA_NAMES["inspection-date"]], errors="coerce" ) # Convert to dd/mm/yyyy format - programme_data["survey_date"] = programme_data["survey_date"].dt.strftime("%d/%m/%Y") - programme_data[self.EPC_API_DATA_NAMES["inspection-date"]] = ( - programme_data[self.EPC_API_DATA_NAMES["inspection-date"]].dt.strftime("%d/%m/%Y") + programme_data["survey_date"] = programme_data["survey_date"].dt.strftime( + "%d/%m/%Y" ) + programme_data[self.EPC_API_DATA_NAMES["inspection-date"]] = programme_data[ + self.EPC_API_DATA_NAMES["inspection-date"] + ].dt.strftime("%d/%m/%Y") # We take rows that have a survyor and a date for the survey # We include properties under 2 circumstances: @@ -2653,12 +3305,13 @@ class AssetList: else: if programme_data["hubspot_status"].nunique() > 1: - logger.info("Multiple hubspot_status found - are you sure you don't want to reconcile the programme?") + logger.info( + "Multiple hubspot_status found - are you sure you don't want to reconcile the programme?" + ) ready_to_be_scheduled = ( - ( - programme_data["hubspot_status"] == hubspot_config.HubspotProcessStatus.READY_TO_BE_SCHEDULED.label - ) + programme_data["hubspot_status"] + == hubspot_config.HubspotProcessStatus.READY_TO_BE_SCHEDULED.label ) # completed_works = ( # (programme_data["hubspot_status"] != @@ -2685,8 +3338,14 @@ class AssetList: ) # We check if we have any missings - cavity_missing = pd.isnull(programme_data[~pd.isnull(programme_data["cavity_reason"])]["cavity_product"]).sum() - solar_missing = pd.isnull(programme_data[~pd.isnull(programme_data["solar_reason"])]["solar_product"]).sum() + cavity_missing = pd.isnull( + programme_data[~pd.isnull(programme_data["cavity_reason"])][ + "cavity_product" + ] + ).sum() + solar_missing = pd.isnull( + programme_data[~pd.isnull(programme_data["solar_reason"])]["solar_product"] + ).sum() if cavity_missing > 0 or solar_missing > 0: raise ValueError( @@ -2698,7 +3357,7 @@ class AssetList: programme_data["domna_product"] = np.where( pd.isnull(programme_data["domna_product"]), programme_data["cavity_product"], - programme_data["domna_product"] + programme_data["domna_product"], ) # We filter just on rows where we have a product if reconcile_programme: @@ -2715,33 +3374,41 @@ class AssetList: if pd.isnull(programme_data["domna_product"]).sum(): raise ValueError("Missing products") - programme_data = programme_data.drop(columns=["solar_product", "cavity_product"]) + programme_data = programme_data.drop( + columns=["solar_product", "cavity_product"] + ) product_df = ( - pd.DataFrame(self.CRM_PRODUCTS).T[["name", "id", "unit_price"]] + pd.DataFrame(self.CRM_PRODUCTS) + .T[["name", "id", "unit_price"]] .reset_index() .rename( columns={ "name": "Name ", - "id": 'Product ID ', - "unit_price": 'Unit price ', - "index": "domna_product" + "id": "Product ID ", + "unit_price": "Unit price ", + "index": "domna_product", } ) ) - product_df['Quantity '] = 1 + product_df["Quantity "] = 1 # Append on the product data - programme_data = programme_data.merge(product_df, how="left", on="domna_product") + programme_data = programme_data.merge( + product_df, how="left", on="domna_product" + ) # Add in deal and pipeline information programme_data["dealname"] = ( - programme_data[self.STANDARD_FULL_ADDRESS] + ", " + - programme_data[self.STANDARD_POSTCODE] + " : " + programme_data["domna_product"] + programme_data[self.STANDARD_FULL_ADDRESS] + + ", " + + programme_data[self.STANDARD_POSTCODE] + + " : " + + programme_data["domna_product"] ) - programme_data['Pipeline '] = hubspot_config.CRM_PIPELINE_NAME - programme_data['Associations: Listing'] = "Property Owner" + programme_data["Pipeline "] = hubspot_config.CRM_PIPELINE_NAME + programme_data["Associations: Listing"] = "Property Owner" # We determine which column we should use for the UPRN if self.STANDARD_UPRN not in programme_data.columns: @@ -2761,20 +3428,25 @@ class AssetList: programme_data[uprn_column] = np.where( programme_data["estimated"].isin([1, True]), None, - programme_data[uprn_column] + programme_data[uprn_column], ) # Add in some columns if we have them date_of_inspections = ( - "Non-Intrusives: Date of Inspection" if - "Non-Intrusives: Date of Inspection" in programme_data.columns else None + "Non-Intrusives: Date of Inspection" + if "Non-Intrusives: Date of Inspection" in programme_data.columns + else None ) # Ammend the property type and built form columns - programme_data["hubspot_property_type"] = programme_data[self.STANDARD_PROPERTY_TYPE].copy() + programme_data["hubspot_property_type"] = programme_data[ + self.STANDARD_PROPERTY_TYPE + ].copy() # We don't already have this if self.STANDARD_BUILT_FORM in programme_data.columns: - programme_data["hubspot_built_form"] = programme_data[self.STANDARD_BUILT_FORM].copy() + programme_data["hubspot_built_form"] = programme_data[ + self.STANDARD_BUILT_FORM + ].copy() else: programme_data["hubspot_built_form"] = None @@ -2787,23 +3459,30 @@ class AssetList: valid_values = ["house", "bungalow", "flat", "maisonette"] epc_fill_col = "property-type" elif column_name == "hubspot_built_form": - valid_values = ["detached", "semi-detached", "mid-terrace", "end-terrace"] + valid_values = [ + "detached", + "semi-detached", + "mid-terrace", + "end-terrace", + ] epc_fill_col = "built-form" else: - raise ValueError(f"Invalid column name: {column_name}. Must be 'hubspot_property_type' or " - f"'hubspot_built_form'.") + raise ValueError( + f"Invalid column name: {column_name}. Must be 'hubspot_property_type' or " + f"'hubspot_built_form'." + ) # Any vakue that is not house, bungalow, flat or maisonette is set to None programme_data[column_name] = np.where( ~programme_data[column_name].isin(valid_values), None, - programme_data[column_name] + programme_data[column_name], ) # We fill with the EPC property type programme_data[column_name] = np.where( pd.isnull(programme_data[column_name]), programme_data[self.EPC_API_DATA_NAMES[epc_fill_col]], - programme_data[column_name] + programme_data[column_name], ) programme_data[column_name] = programme_data[column_name].fillna("unknown") @@ -2811,8 +3490,12 @@ class AssetList: return programme_data # Clean up the property type and built form columns - programme_data = _replace_property_description_data(programme_data, "hubspot_property_type") - programme_data = _replace_property_description_data(programme_data, "hubspot_built_form") + programme_data = _replace_property_description_data( + programme_data, "hubspot_property_type" + ) + programme_data = _replace_property_description_data( + programme_data, "hubspot_built_form" + ) # We accomodate the old vs new inspections format if "non-intrusives: WFT Findings" in programme_data.columns: @@ -2826,97 +3509,136 @@ class AssetList: non_intrusives_roof_orientation = None non_intrusives_surveyor_name = None else: - non_intrusives_surveyor_notes = 'non-intrusives: Any further surveyor notes' + non_intrusives_surveyor_notes = "non-intrusives: Any further surveyor notes" non_intrusives_construction = "non-intrusives: Construction" non_intrusives_insulated = "non-intrusives: Insulated" non_intrusives_insulation_material = "non-intrusives: Material" - non_intrusives_ciga_check_required = 'non-intrusives: CIGA Check Required' - non_intrusives_pv_access = 'non-intrusives: PV, ACCESS ISSUE, SEE NOTES' - non_intrusives_roof_orientation = 'non-intrusives: OFF GAS - ROOF ORIENTATION' - non_intrusives_surveyor_name = 'non-intrusives: Surveyors Name' + non_intrusives_ciga_check_required = "non-intrusives: CIGA Check Required" + non_intrusives_pv_access = "non-intrusives: PV, ACCESS ISSUE, SEE NOTES" + non_intrusives_roof_orientation = ( + "non-intrusives: OFF GAS - ROOF ORIENTATION" + ) + non_intrusives_surveyor_name = "non-intrusives: Surveyors Name" # This maps the hubspot schema to the template. Anything that is not covered in this will be flagged schema_mappings = { - 'Company Domain Name ': 'Company Domain Name ', - 'Email ': ( - self.contact_detail_fields["email"] if self.contact_detail_fields["email"] else None + "Company Domain Name ": "Company Domain Name ", + "Email ": ( + self.contact_detail_fields["email"] + if self.contact_detail_fields["email"] + else None ), # TODO: Review - 'First Name ': ( - self.contact_detail_fields["firstname"] if self.contact_detail_fields["firstname"] else None + "First Name ": ( + self.contact_detail_fields["firstname"] + if self.contact_detail_fields["firstname"] + else None ), # TODO: Review - 'Last Name ': ( - self.contact_detail_fields["lastname"] if self.contact_detail_fields["lastname"] else None + "Last Name ": ( + self.contact_detail_fields["lastname"] + if self.contact_detail_fields["lastname"] + else None ), # TODO: Review - 'Phone ': ( - self.contact_detail_fields["phone_number"] if self.contact_detail_fields["phone_number"] else None + "Phone ": ( + self.contact_detail_fields["phone_number"] + if self.contact_detail_fields["phone_number"] + else None ), # TODO: Review - 'Secondary Phone ': ( - self.contact_detail_fields["secondary_phone_number"] if - self.contact_detail_fields["secondary_phone_number"] else None + "Secondary Phone ": ( + self.contact_detail_fields["secondary_phone_number"] + if self.contact_detail_fields["secondary_phone_number"] + else None ), "Secondary Contact Full Name ": ( - self.contact_detail_fields["secondary_contact_full_name"] if - self.contact_detail_fields["secondary_contact_full_name"] else None + self.contact_detail_fields["secondary_contact_full_name"] + if self.contact_detail_fields["secondary_contact_full_name"] + else None ), - 'Full Address ': self.STANDARD_FULL_ADDRESS, - 'Address 1 ': self.STANDARD_ADDRESS_1, - 'Address 2 ': None, # TODO: Don't have this for the moment - 'Postcode ': self.STANDARD_POSTCODE, - 'Property Type ': "hubspot_property_type", - 'Property Sub Type ': "hubspot_built_form", - 'Bedroom(s) ': None, # TODO: Don't have this for the moment - 'Domna Property ID ': self.DOMNA_PROPERTY_ID, + "Full Address ": self.STANDARD_FULL_ADDRESS, + "Address 1 ": self.STANDARD_ADDRESS_1, + "Address 2 ": None, # TODO: Don't have this for the moment + "Postcode ": self.STANDARD_POSTCODE, + "Property Type ": "hubspot_property_type", + "Property Sub Type ": "hubspot_built_form", + "Bedroom(s) ": None, # TODO: Don't have this for the moment + "Domna Property ID ": self.DOMNA_PROPERTY_ID, # We populate this with the column that we have - 'National UPRN ': uprn_column, - 'Owner Property ID ': self.STANDARD_LANDLORD_PROPERTY_ID, - 'Wall Construction ': self.STANDARD_WALL_CONSTRUCTION, - 'Heating System ': self.STANDARD_HEATING_SYSTEM, - 'Year Built ': self.STANDARD_YEAR_BUILT, - 'Boiler Make ': None, # TODO: Don't have this for the moment - 'Boiler Model ': None, # TODO: Don't have this for the moment - 'Non-Intrusives: Date Checked ': date_of_inspections, - 'Non-Intrusives: Wall Type ': non_intrusives_construction, - 'Non-intrusives: Insulation ': non_intrusives_insulated, - 'Non-intrusives: Insulation Material ': + "National UPRN ": uprn_column, + "Owner Property ID ": self.STANDARD_LANDLORD_PROPERTY_ID, + "Wall Construction ": self.STANDARD_WALL_CONSTRUCTION, + "Heating System ": self.STANDARD_HEATING_SYSTEM, + "Year Built ": self.STANDARD_YEAR_BUILT, + "Boiler Make ": None, # TODO: Don't have this for the moment + "Boiler Model ": None, # TODO: Don't have this for the moment + "Non-Intrusives: Date Checked ": date_of_inspections, + "Non-Intrusives: Wall Type ": non_intrusives_construction, + "Non-intrusives: Insulation ": non_intrusives_insulated, + "Non-intrusives: Insulation Material ": non_intrusives_insulation_material, - 'Non-Intrusives: CIGA Check Required ': + "Non-Intrusives: CIGA Check Required ": non_intrusives_ciga_check_required, - 'Non-Intrusives: PV Access Issues ': non_intrusives_pv_access, - 'Non-Intrusives: Roof Orientation ': + "Non-Intrusives: PV Access Issues ": non_intrusives_pv_access, + "Non-Intrusives: Roof Orientation ": non_intrusives_roof_orientation, - 'Non-Intrusives: Surveyor Notes ': non_intrusives_surveyor_notes, - 'Non-Intrusives: Surveyor Name ': non_intrusives_surveyor_name, - 'CIGA: Date Requested ': None, # TODO: Don't have this for the moment - 'CIGA: Cavity Guarantee Found ': None, - 'Last EPC: Is Estimated ': self.EPC_API_DATA_NAMES["estimated"], - 'Last EPC: EPC Rating ': self.EPC_API_DATA_NAMES["current-energy-rating"], - 'Last EPC: SAP Rating ': self.EPC_API_DATA_NAMES["current-energy-efficiency"], - 'Last EPC: Main Heating Description ': self.EPC_API_DATA_NAMES[ - "mainheat-description"], - 'Last EPC: Heating Controls ': self.EPC_API_DATA_NAMES[ - "mainheatcont-description"], - 'Last EPC: Lodgement Date ': self.EPC_API_DATA_NAMES["inspection-date"], - 'Last EPC: Floor Area ': self.EPC_API_DATA_NAMES["total-floor-area"], - 'Last EPC: Wall ': self.EPC_API_DATA_NAMES["walls-description"], - 'Last EPC: Roof ': self.EPC_API_DATA_NAMES["roof-description"], - 'Last EPC: Floor ': self.EPC_API_DATA_NAMES["floor-description"], - 'Last EPC: Room Height ': self.EPC_API_DATA_NAMES["floor-height"], - 'Last EPC: Age Band ': self.EPC_API_DATA_NAMES["construction-age-band"], - 'Pipeline ': 'Pipeline ', - 'Expected Commencement Date ': "survey_date", - 'Deal Name ': "dealname", # Need to create this, - 'Product ID ': 'Product ID ', - 'Name ': 'Name ', - 'Unit price ': 'Unit price ', - 'Quantity ': 'Quantity ', - 'Deal Owner': 'surveyor', - 'Project Code ': 'project_code', - 'Associations: Listing': 'Associations: Listing', - 'Deal Stage ': "hubspot_status", + "Non-Intrusives: Surveyor Notes ": non_intrusives_surveyor_notes, + "Non-Intrusives: Surveyor Name ": non_intrusives_surveyor_name, + "CIGA: Date Requested ": None, # TODO: Don't have this for the moment + "CIGA: Cavity Guarantee Found ": None, + "Last EPC: Is Estimated ": self.EPC_API_DATA_NAMES[ + "estimated" + ], + "Last EPC: EPC Rating ": self.EPC_API_DATA_NAMES[ + "current-energy-rating" + ], + "Last EPC: SAP Rating ": self.EPC_API_DATA_NAMES[ + "current-energy-efficiency" + ], + "Last EPC: Main Heating Description ": self.EPC_API_DATA_NAMES[ + "mainheat-description" + ], + "Last EPC: Heating Controls ": self.EPC_API_DATA_NAMES[ + "mainheatcont-description" + ], + "Last EPC: Lodgement Date ": self.EPC_API_DATA_NAMES[ + "inspection-date" + ], + "Last EPC: Floor Area ": self.EPC_API_DATA_NAMES[ + "total-floor-area" + ], + "Last EPC: Wall ": self.EPC_API_DATA_NAMES[ + "walls-description" + ], + "Last EPC: Roof ": self.EPC_API_DATA_NAMES[ + "roof-description" + ], + "Last EPC: Floor ": self.EPC_API_DATA_NAMES[ + "floor-description" + ], + "Last EPC: Room Height ": self.EPC_API_DATA_NAMES[ + "floor-height" + ], + "Last EPC: Age Band ": self.EPC_API_DATA_NAMES[ + "construction-age-band" + ], + "Pipeline ": "Pipeline ", + "Expected Commencement Date ": "survey_date", + "Deal Name ": "dealname", # Need to create this, + "Product ID ": "Product ID ", + "Name ": "Name ", + "Unit price ": "Unit price ", + "Quantity ": "Quantity ", + "Deal Owner": "surveyor", + "Project Code ": "project_code", + "Associations: Listing": "Associations: Listing", + "Deal Stage ": "hubspot_status", } # We sometimes columns if the landlord never provided them - missed_mapping_cols = [c for c in schema_mappings.values() if c not in programme_data.columns if c is not None] + missed_mapping_cols = [ + c + for c in schema_mappings.values() + if c not in programme_data.columns + if c is not None + ] for c in missed_mapping_cols: programme_data[c] = None @@ -2934,22 +3656,32 @@ class AssetList: columns={v: k for k, v in schema_mappings.items() if v is not None} ) - programme_data['Postcode '] = programme_data['Postcode '].copy() - programme_data['Installer '] = installer_name - programme_data['Name '] = ( - programme_data['Full Address '] + " ," + programme_data['Postcode '] + programme_data["Postcode "] = programme_data[ + "Postcode " + ].copy() + programme_data["Installer "] = installer_name + programme_data["Name "] = ( + programme_data["Full Address "] + + " ," + + programme_data["Postcode "] ) # The listing owner email is the same as the surveyor email (deal owner), so they can see the listing - programme_data['Listing Owner Email '] = programme_data['Deal Owner'] - programme_data['Amount '] = 0 + programme_data["Listing Owner Email "] = ( + programme_data["Deal Owner"] + ) + programme_data["Amount "] = 0 programme_data["Deal Owner"] = np.where( ~pd.isnull(programme_data["Deal Owner"]), programme_data["Deal Owner"].astype(str).str.lower(), - programme_data["Deal Owner"] + programme_data["Deal Owner"], ) # We make sure we have all of the columns that we need - missed_columns = [c for c in hubspot_config.CRM_UPLOAD_COLUMNS if c not in programme_data.columns] + missed_columns = [ + c + for c in hubspot_config.CRM_UPLOAD_COLUMNS + if c not in programme_data.columns + ] if missed_columns: raise ValueError( f"We have the following columns that are not in the programme data: {missed_columns}. " @@ -2959,7 +3691,6 @@ class AssetList: self.hubspot_data = programme_data def flag_ecosurv(self, ecosurv_landlords=None, landlords_to_ignore=None): - """ This class will match ecosurv data to the asset list :return: @@ -2968,7 +3699,9 @@ class AssetList: return # TODO: Fetch from Sharepoint - ecosurv_filepath = "/Users/khalimconn-kowlessar/Documents/hestia/Ecosurv/07.05.2025.csv" + ecosurv_filepath = ( + "/Users/khalimconn-kowlessar/Documents/hestia/Ecosurv/07.05.2025.csv" + ) logger.info("Getting Ecosurv data from %s", ecosurv_filepath) self.ecosurv = pd.read_csv(ecosurv_filepath, encoding="cp437") @@ -2989,12 +3722,16 @@ class AssetList: # Try and match to asset list matched = [] unmatched = [] - for _, row in tqdm(landlord_ecosurv_data.iterrows(), total=landlord_ecosurv_data.shape[0]): + for _, row in tqdm( + landlord_ecosurv_data.iterrows(), total=landlord_ecosurv_data.shape[0] + ): postcode = row["Postcode"].lower() df = self.standardised_asset_list[ ( - self.standardised_asset_list[self.STANDARD_POSTCODE].str.replace(" ", "").str.lower() == - postcode + self.standardised_asset_list[self.STANDARD_POSTCODE] + .str.replace(" ", "") + .str.lower() + == postcode ) ].copy() @@ -3003,25 +3740,28 @@ class AssetList: continue if df.shape[0] > 1: - house_no = SearchEpc.get_house_number(row["Address Line 1"], row["Postcode"]) + house_no = SearchEpc.get_house_number( + row["Address Line 1"], row["Postcode"] + ) df["house_no"] = df.apply( lambda x: SearchEpc.get_house_number( str(x[self.STANDARD_ADDRESS_1]), x[self.STANDARD_POSTCODE] ), - axis=1 + axis=1, ) df = df[df["house_no"] == house_no] if df.shape[0] > 1: # We compare address line 1 to full address if any( - df[self.STANDARD_FULL_ADDRESS].str.lower().str.contains( - row["Address Line 1"].lower(), na=False) + df[self.STANDARD_FULL_ADDRESS] + .str.lower() + .str.contains(row["Address Line 1"].lower(), na=False) ): df = df[ - df[self.STANDARD_FULL_ADDRESS].str.lower().str.contains( - row["Address Line 1"].lower(), na=False - ) + df[self.STANDARD_FULL_ADDRESS] + .str.lower() + .str.contains(row["Address Line 1"].lower(), na=False) ] if df.shape[0] > 1: @@ -3030,7 +3770,9 @@ class AssetList: if df.shape[0] == 1: matched.append( { - self.STANDARD_LANDLORD_PROPERTY_ID: df[self.STANDARD_LANDLORD_PROPERTY_ID].values[0], + self.STANDARD_LANDLORD_PROPERTY_ID: df[ + self.STANDARD_LANDLORD_PROPERTY_ID + ].values[0], "ecosurv_reference": row["Reference"], "ecosurv_address1": row["Address Line 1"], "ecosurv_postcode": row["Postcode"], @@ -3053,7 +3795,9 @@ class AssetList: # We'll possibly have duplicates here, where properties have been sold twice. Ww de-dupe if matched[self.STANDARD_LANDLORD_PROPERTY_ID].duplicated().sum(): # It doesn't matter too much which record we take - matched = matched.drop_duplicates(subset=[self.STANDARD_LANDLORD_PROPERTY_ID]) + matched = matched.drop_duplicates( + subset=[self.STANDARD_LANDLORD_PROPERTY_ID] + ) # We merge on the status of the property matched = matched.merge( @@ -3063,12 +3807,16 @@ class AssetList: "Status": "ecosurv_status", "Lead Status": "ecosurv_lead_status", "Tags": "ecosurv_tags", - "Installer": "ecosurv_installer" + "Installer": "ecosurv_installer", } - ), how="left", on="ecosurv_reference" + ), + how="left", + on="ecosurv_reference", ) - matched["ecosurv_install_status"] = hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER + matched["ecosurv_install_status"] = ( + hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER + ) # This mapping is ordered by process order, where lodgment is the final step so if we have an indication # that the property is ready for lodgement, we set the status to that. We then proceed through the other @@ -3086,7 +3834,7 @@ class AssetList: "Retrofit: Signed off for install": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER, "Audit": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER, "Accepted": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER, - "Sold": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER + "Sold": hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER, } def get_max_status(tag_str): @@ -3100,7 +3848,9 @@ class AssetList: return None return max(matched_statuses).label - matched["ecosurv_install_status"] = matched["ecosurv_tags"].apply(get_max_status) + matched["ecosurv_install_status"] = matched["ecosurv_tags"].apply( + get_max_status + ) self.standardised_asset_list = self.standardised_asset_list.merge( matched, @@ -3120,7 +3870,7 @@ class AssetList: outcomes_address, outcomes_postcode, outcomes_houseno, - outcomes_id + outcomes_id, ): if not outcomes_filepaths: return @@ -3129,7 +3879,9 @@ class AssetList: outcomes_no_match = [] lookup = [] for idx, outcomes_filepath in enumerate(outcomes_filepaths): - outcomes = pd.read_excel(outcomes_filepath, sheet_name=outcomes_sheetname[idx]) + outcomes = pd.read_excel( + outcomes_filepath, sheet_name=outcomes_sheetname[idx] + ) outcomes["row_id"] = outcomes.index if outcomes_houseno[idx] is None: @@ -3139,15 +3891,21 @@ class AssetList: ) # We handle an edge case that occured for LHP - if "Notes / Outcomes" in outcomes.columns and "Outcome" not in outcomes.columns: + if ( + "Notes / Outcomes" in outcomes.columns + and "Outcome" not in outcomes.columns + ): # We use the re-mapper to handle this: outcomes["Notes / Outcomes"] = outcomes["Notes / Outcomes"].str.strip() values_to_remap = outcomes["Notes / Outcomes"].unique() # We want to map this to our standardised list of property types we're interested in remapper = DataRemapper( - standard_values=outcomes_mappings.outcomes_values, standard_map=outcomes_mappings.outcomes_map + standard_values=outcomes_mappings.outcomes_values, + standard_map=outcomes_mappings.outcomes_map, + ) + remap_dictionary = remapper.standardize_list( + values_to_remap=values_to_remap.tolist() ) - remap_dictionary = remapper.standardize_list(values_to_remap=values_to_remap.tolist()) # Perform the remap outcomes["Outcome"] = outcomes["Notes / Outcomes"].map(remap_dictionary) @@ -3167,80 +3925,109 @@ class AssetList: if oid is not None: matched = self.standardised_asset_list[ - (self.standardised_asset_list[ - self.STANDARD_LANDLORD_PROPERTY_ID - ].str.strip() == oid) + ( + self.standardised_asset_list[ + self.STANDARD_LANDLORD_PROPERTY_ID + ].str.strip() + == oid + ) ] if matched.shape[0] == 1: lookup_i.append( { "row_id": x["row_id"], - self.DOMNA_PROPERTY_ID: matched[self.DOMNA_PROPERTY_ID].values[0] + self.DOMNA_PROPERTY_ID: matched[ + self.DOMNA_PROPERTY_ID + ].values[0], } ) continue - address_clean = x[outcomes_address[idx]].lower().replace(",", "").replace(" ", " ") + address_clean = ( + x[outcomes_address[idx]].lower().replace(",", "").replace(" ", " ") + ) matched = self.standardised_asset_list[ - (self.standardised_asset_list[ - self.STANDARD_FULL_ADDRESS - ].str.lower().str.replace(",", "").str.replace(" ", " ") == address_clean) + ( + self.standardised_asset_list[self.STANDARD_FULL_ADDRESS] + .str.lower() + .str.replace(",", "") + .str.replace(" ", " ") + == address_clean + ) ] if matched.shape[0] == 1: lookup_i.append( { "row_id": x["row_id"], - self.DOMNA_PROPERTY_ID: matched[self.DOMNA_PROPERTY_ID].values[0] + self.DOMNA_PROPERTY_ID: matched[ + self.DOMNA_PROPERTY_ID + ].values[0], } ) continue matched = self.standardised_asset_list[ - (self.standardised_asset_list[self.STANDARD_POSTCODE].str.strip() == x[outcomes_postcode[idx]]) + ( + self.standardised_asset_list[self.STANDARD_POSTCODE].str.strip() + == x[outcomes_postcode[idx]] + ) ].copy() if not matched.empty: matched["houseno"] = matched.apply( lambda x: SearchEpc.get_house_number( - str(x[self.STANDARD_ADDRESS_1]), str(x[self.STANDARD_POSTCODE]) + str(x[self.STANDARD_ADDRESS_1]), + str(x[self.STANDARD_POSTCODE]), ), - axis=1 + axis=1, ) if pd.isnull(x[outcomes_houseno[idx]]): house_no_to_match = SearchEpc.get_house_number( - str(x[outcomes_address[idx]]), str(x[outcomes_postcode[idx]]) + str(x[outcomes_address[idx]]), + str(x[outcomes_postcode[idx]]), ) if isinstance(house_no_to_match, str): house_no_to_match = house_no_to_match.lower() else: house_no_to_match = str(x[outcomes_houseno[idx]]).strip() - matched = matched[matched["houseno"].astype(str) == house_no_to_match] + matched = matched[ + matched["houseno"].astype(str) == house_no_to_match + ] if matched.shape[0] == 1: lookup_i.append( { "row_id": x["row_id"], - self.DOMNA_PROPERTY_ID: matched[self.DOMNA_PROPERTY_ID].values[0] + self.DOMNA_PROPERTY_ID: matched[ + self.DOMNA_PROPERTY_ID + ].values[0], } ) continue elif not matched.empty: # Use levenstein distance to match matched["address"] = ( - matched[self.STANDARD_ADDRESS_1] + " " + matched[self.STANDARD_POSTCODE] + matched[self.STANDARD_ADDRESS_1] + + " " + + matched[self.STANDARD_POSTCODE] ) best_match = process.extractOne( - x[outcomes_address[idx]], matched[self.STANDARD_FULL_ADDRESS].values + x[outcomes_address[idx]], + matched[self.STANDARD_FULL_ADDRESS].values, )[0] - matched = matched[matched[self.STANDARD_FULL_ADDRESS] == best_match] + matched = matched[ + matched[self.STANDARD_FULL_ADDRESS] == best_match + ] lookup_i.append( { "row_id": x["row_id"], - self.DOMNA_PROPERTY_ID: matched[self.DOMNA_PROPERTY_ID].values[0] + self.DOMNA_PROPERTY_ID: matched[ + self.DOMNA_PROPERTY_ID + ].values[0], } ) continue @@ -3290,7 +4077,9 @@ class AssetList: raise NotImplementedError("Invalid notes in outcomes - implement me") lookup = lookup.merge( - self.outcomes[["row_id", "Outcome", notes_col, date_col]], how="left", on="row_id" + self.outcomes[["row_id", "Outcome", notes_col, date_col]], + how="left", + on="row_id", ) visit_counts = ( @@ -3305,28 +4094,35 @@ class AssetList: if isinstance(s, str): match = re.search(r"(\d{2}\.\d{2}\.\d{4})", s) if match: - return pd.to_datetime(match.group(1), format="%d.%m.%Y", errors="coerce") + return pd.to_datetime( + match.group(1), format="%d.%m.%Y", errors="coerce" + ) return pd.NaT - lookup['parsed_date'] = lookup[date_col].apply(extract_date) + lookup["parsed_date"] = lookup[date_col].apply(extract_date) def get_latest_note(group): - surveyed = group[group['Outcome'] == 'surveyed'] + surveyed = group[group["Outcome"] == "surveyed"] if not surveyed.empty: - return surveyed.sort_values('parsed_date', ascending=False).iloc[0] + return surveyed.sort_values("parsed_date", ascending=False).iloc[0] else: - return group.sort_values('parsed_date', ascending=False).iloc[0] + return group.sort_values("parsed_date", ascending=False).iloc[0] latest_note = ( - lookup.groupby('domna_property_id', group_keys=False). - apply(get_latest_note). - reset_index(drop=True) + lookup.groupby("domna_property_id", group_keys=False) + .apply(get_latest_note) + .reset_index(drop=True) ) latest_note = latest_note[["domna_property_id", notes_col, "Outcome"]].rename( columns={"Notes": "latest_outcome_note", "Outcome": "latest_outcome"} ) - pivot_df = lookup.groupby(["domna_property_id", "Outcome"]).size().unstack(fill_value=0).reset_index() + pivot_df = ( + lookup.groupby(["domna_property_id", "Outcome"]) + .size() + .unstack(fill_value=0) + .reset_index() + ) pivot_df = pivot_df.merge(visit_counts, how="left", on="domna_property_id") pivot_df = pivot_df.merge(latest_note, how="left", on="domna_property_id") @@ -3336,34 +4132,46 @@ class AssetList: raise Exception("We have duplicated property IDs in the outcomes data") # We merge this data onto outcomes - self.outcomes["matched_to_asset_list"] = self.outcomes["row_id"].isin(lookup["row_id"].values) - self.outcomes = self.outcomes.merge(lookup[["row_id", "domna_property_id"]], how="left", on="row_id") + self.outcomes["matched_to_asset_list"] = self.outcomes["row_id"].isin( + lookup["row_id"].values + ) + self.outcomes = self.outcomes.merge( + lookup[["row_id", "domna_property_id"]], how="left", on="row_id" + ) # We flag the outcome status, based on the outcome pivot_df["outcome_status"] = None if "surveyed" in pivot_df.columns: pivot_df["outcome_status"] = np.where( - pivot_df["surveyed"] > 0, hubspot_config.HubspotProcessStatus.SURVEYED_COMPLETED_SIGNED_OFF.label, - pivot_df["outcome_status"] + pivot_df["surveyed"] > 0, + hubspot_config.HubspotProcessStatus.SURVEYED_COMPLETED_SIGNED_OFF.label, + pivot_df["outcome_status"], ) if "installer refusal" in pivot_df.columns: pivot_df["outcome_status"] = np.where( - pivot_df["installer refusal"] > 0, hubspot_config.HubspotProcessStatus.NOT_VIABLE.label, - pivot_df["outcome_status"] + pivot_df["installer refusal"] > 0, + hubspot_config.HubspotProcessStatus.NOT_VIABLE.label, + pivot_df["outcome_status"], ) pivot_df["outcome_status"] = np.where( - pivot_df["latest_outcome"].isin(["see notes"]) & - (pivot_df["outcome_status"] != hubspot_config.HubspotProcessStatus.SURVEYED_COMPLETED_SIGNED_OFF.label), + pivot_df["latest_outcome"].isin(["see notes"]) + & ( + pivot_df["outcome_status"] + != hubspot_config.HubspotProcessStatus.SURVEYED_COMPLETED_SIGNED_OFF.label + ), hubspot_config.HubspotProcessStatus.SURVEYED_NO_ACCESS_NEEDS_SIGN_OFF.label, - pivot_df["outcome_status"] + pivot_df["outcome_status"], ) # We merge out pivoted outcomes onto the asset list self.standardised_asset_list = self.standardised_asset_list.merge( - pivot_df, how="left", left_on=self.DOMNA_PROPERTY_ID, right_on="domna_property_id" + pivot_df, + how="left", + left_on=self.DOMNA_PROPERTY_ID, + right_on="domna_property_id", ) if self.standardised_asset_list[self.DOMNA_PROPERTY_ID].duplicated().sum(): @@ -3372,10 +4180,7 @@ class AssetList: self.outcomes = self.outcomes.sort_values("domna_property_id", ascending=False) def flag_survey_master( - self, - master_filepaths, - master_id_colnames, - master_to_asset_list_filepath=None + self, master_filepaths, master_id_colnames, master_to_asset_list_filepath=None ): # TODO: This probably needs further expansion @@ -3394,26 +4199,26 @@ class AssetList: master_data = pd.read_csv(filepath) # Strip columns master_data.columns = [c.strip() for c in master_data.columns] - master_data.columns = [re.sub(r'\s+', ' ', c) for c in master_data.columns] + master_data.columns = [re.sub(r"\s+", " ", c) for c in master_data.columns] # Drop any unnamed columns unnamed_columns = [c for c in master_data.columns if "Unnamed:" in c] master_data = master_data.drop(columns=unnamed_columns) if not id_map.empty: master_data = master_data.merge( - id_map, how="left", on=['NO.', 'Street / Block Name', 'Post Code'] + id_map, how="left", on=["NO.", "Street / Block Name", "Post Code"] ) if "INSTALLED OR CANCELLED" in master_data.columns: install_col = "INSTALLED OR CANCELLED" elif "INSTALL / CANCELLATION DATE" in master_data.columns: install_col = "INSTALL / CANCELLATION DATE" - elif 'INSTALL/ CANCELLATION DATE' in master_data.columns: - install_col = 'INSTALL/ CANCELLATION DATE' + elif "INSTALL/ CANCELLATION DATE" in master_data.columns: + install_col = "INSTALL/ CANCELLATION DATE" elif "INSTALL/CANCELLATION DATE" in master_data.columns: install_col = "INSTALL/CANCELLATION DATE" - elif 'Measure 1 Install Date' in master_data.columns: - install_col = 'Measure 1 Install Date' + elif "Measure 1 Install Date" in master_data.columns: + install_col = "Measure 1 Install Date" else: raise ValueError("No install or cancellation date") @@ -3428,14 +4233,19 @@ class AssetList: master_data["row_id"] = master_data.index - self.standardised_asset_list["house_no"] = self.standardised_asset_list.apply( - lambda x: SearchEpc.get_house_number( - str(x[self.STANDARD_ADDRESS_1]), str(x[self.STANDARD_POSTCODE]) - ), - axis=1 + self.standardised_asset_list["house_no"] = ( + self.standardised_asset_list.apply( + lambda x: SearchEpc.get_house_number( + str(x[self.STANDARD_ADDRESS_1]), str(x[self.STANDARD_POSTCODE]) + ), + axis=1, + ) ) - if "AFFORDABLE WARMTH OR EPC FOR HOUSING ASSOCIATION" in master_data.columns: + if ( + "AFFORDABLE WARMTH OR EPC FOR HOUSING ASSOCIATION" + in master_data.columns + ): scheme_col = "AFFORDABLE WARMTH OR EPC FOR HOUSING ASSOCIATION" elif "AFFORDABLE WARMTH" in master_data.columns: scheme_col = "AFFORDABLE WARMTH" @@ -3446,11 +4256,13 @@ class AssetList: else: scheme_col = "OFFICE USE ONLY" - postcode_col = "POSTCODE" if "POSTCODE" in master_data.columns else "Post Code" - if 'NO.' in master_data.columns: - house_no_col = 'NO.' + postcode_col = ( + "POSTCODE" if "POSTCODE" in master_data.columns else "Post Code" + ) + if "NO." in master_data.columns: + house_no_col = "NO." elif "NO" in master_data.columns: - house_no_col = 'NO' + house_no_col = "NO" else: house_no_col = "NUMBER" @@ -3460,8 +4272,8 @@ class AssetList: property_type_col = "PROPERTY TYPE As per table emailed" elif "PROPERTY TYPE" in master_data.columns: property_type_col = "PROPERTY TYPE" - elif 'Property Type' in master_data.columns: - property_type_col = 'Property Type' + elif "Property Type" in master_data.columns: + property_type_col = "Property Type" else: property_type_col = "PROPERTY TYPE (SEE DEEMED SCORES SHEET) Eg. 3W_Flat_1 (As per Matrix)" @@ -3469,14 +4281,21 @@ class AssetList: installer_notes_col = "INSTALLERS NOTES ; REASONS FOR CANCELLATIONS" elif "INSTALLERS NOTES" in master_data.columns: installer_notes_col = "INSTALLERS NOTES" - elif 'Installers Notes' in master_data.columns: - installer_notes_col = 'Installers Notes' - elif 'NOTES ; REASONS FOR CANCELLATIONS OR WHERE INSTALL DATE WAS OBTAINED FROM' in master_data.columns: - installer_notes_col = 'NOTES ; REASONS FOR CANCELLATIONS OR WHERE INSTALL DATE WAS OBTAINED FROM' - elif ('INSTALLERS NOTES / REASONS FOR CANCELLATIONS / WHERE INSTALL DATE WAS RECEIVED FROM' in - master_data.columns): - installer_notes_col = ('INSTALLERS NOTES / REASONS FOR CANCELLATIONS / WHERE INSTALL DATE WAS RECEIVED ' - 'FROM') + elif "Installers Notes" in master_data.columns: + installer_notes_col = "Installers Notes" + elif ( + "NOTES ; REASONS FOR CANCELLATIONS OR WHERE INSTALL DATE WAS OBTAINED FROM" + in master_data.columns + ): + installer_notes_col = "NOTES ; REASONS FOR CANCELLATIONS OR WHERE INSTALL DATE WAS OBTAINED FROM" + elif ( + "INSTALLERS NOTES / REASONS FOR CANCELLATIONS / WHERE INSTALL DATE WAS RECEIVED FROM" + in master_data.columns + ): + installer_notes_col = ( + "INSTALLERS NOTES / REASONS FOR CANCELLATIONS / WHERE INSTALL DATE WAS RECEIVED " + "FROM" + ) else: raise ValueError("No installer notes column found in master data") @@ -3491,8 +4310,8 @@ class AssetList: if "TOWN" in master_data.columns: town_colname = "TOWN" - elif 'Town/Area' in master_data.columns: - town_colname = 'Town/Area' + elif "Town/Area" in master_data.columns: + town_colname = "Town/Area" else: town_colname = "Town/City" @@ -3511,7 +4330,8 @@ class AssetList: if master_id_colnames[idx] is not None: # Filter the standardised asset list on this df = self.standardised_asset_list[ - self.standardised_asset_list[self.STANDARD_LANDLORD_PROPERTY_ID] == row[master_id_colnames[idx]] + self.standardised_asset_list[self.STANDARD_LANDLORD_PROPERTY_ID] + == row[master_id_colnames[idx]] ] if df.shape[0] == 1: matched.append( @@ -3520,7 +4340,9 @@ class AssetList: "original_house_no": original_house_no, "original_street": original_street, "original_postcode": original_postcode, - self.STANDARD_LANDLORD_PROPERTY_ID: df[self.STANDARD_LANDLORD_PROPERTY_ID].values[0], + self.STANDARD_LANDLORD_PROPERTY_ID: df[ + self.STANDARD_LANDLORD_PROPERTY_ID + ].values[0], } ) continue @@ -3530,7 +4352,10 @@ class AssetList: df = self.standardised_asset_list[ ( self.standardised_asset_list[self.STANDARD_POSTCODE] - .str.strip().str.lower().str.replace(" ", "") == postcode_no_space + .str.strip() + .str.lower() + .str.replace(" ", "") + == postcode_no_space ) ] @@ -3548,7 +4373,9 @@ class AssetList: df = self.standardised_asset_list[ ( self.standardised_asset_list[self.STANDARD_POSTCODE] - .str.strip().str.lower().str.startswith(postal_region) + .str.strip() + .str.lower() + .str.startswith(postal_region) ) ] @@ -3558,7 +4385,9 @@ class AssetList: df = df[df["house_no"] == house_no] if df.shape[0] > 1: df = df[ - df[self.STANDARD_FULL_ADDRESS].str.lower().str.contains(row["Street / Block Name"].lower()) + df[self.STANDARD_FULL_ADDRESS] + .str.lower() + .str.contains(row["Street / Block Name"].lower()) ] if df.shape[0] == 0: unmatched.append(row["row_id"]) @@ -3569,7 +4398,9 @@ class AssetList: "original_house_no": original_house_no, "original_street": original_street, "original_postcode": original_postcode, - self.STANDARD_LANDLORD_PROPERTY_ID: df[self.STANDARD_LANDLORD_PROPERTY_ID].values[0], + self.STANDARD_LANDLORD_PROPERTY_ID: df[ + self.STANDARD_LANDLORD_PROPERTY_ID + ].values[0], } ) continue @@ -3579,43 +4410,70 @@ class AssetList: if df.shape[0] != 1: # Levenstein distance - if any(df[self.STANDARD_FULL_ADDRESS].str.contains(row["Street / Block Name"])): + if any( + df[self.STANDARD_FULL_ADDRESS].str.contains( + row["Street / Block Name"] + ) + ): df = df[ - df[self.STANDARD_FULL_ADDRESS].str.contains(row["Street / Block Name"]) + df[self.STANDARD_FULL_ADDRESS].str.contains( + row["Street / Block Name"] + ) ] else: # Levenstein distance df = df[ - df[self.STANDARD_FULL_ADDRESS].str.lower().apply( + df[self.STANDARD_FULL_ADDRESS] + .str.lower() + .apply( lambda x: process.extractOne( " ".join( - [row[house_no_col], row["Street / Block Name"], row[town_colname]]).lower(), - x + [ + row[house_no_col], + row["Street / Block Name"], + row[town_colname], + ] + ).lower(), + x, )[1] - ) > 90 + ) + > 90 ] if df.shape[0] == 0: unmatched.append(row["row_id"]) continue - if any(df[self.STANDARD_FULL_ADDRESS].str.lower().str.contains( - " ".join([row[house_no_col], row["Street / Block Name"]]).lower() - )): + if any( + df[self.STANDARD_FULL_ADDRESS] + .str.lower() + .str.contains( + " ".join( + [row[house_no_col], row["Street / Block Name"]] + ).lower() + ) + ): df = df[ - df[self.STANDARD_FULL_ADDRESS].str.lower().str.contains( - " ".join([row[house_no_col], row["Street / Block Name"]]).lower() + df[self.STANDARD_FULL_ADDRESS] + .str.lower() + .str.contains( + " ".join( + [row[house_no_col], row["Street / Block Name"]] + ).lower() ) ] if any( - df[self.STANDARD_PROPERTY_TYPE].str.contains(row[property_type_col].split(" ")[-1].lower()) + df[self.STANDARD_PROPERTY_TYPE].str.contains( + row[property_type_col].split(" ")[-1].lower() + ) ): # We ignore "block of flats" entries df = df[ df[self.STANDARD_PROPERTY_TYPE].str.contains( row[property_type_col].split(" ")[-1].lower() - ) & (df[self.STANDARD_PROPERTY_TYPE] != "block of flats") + ) + & (df[self.STANDARD_PROPERTY_TYPE] != "block of flats") ] if df.shape[0] != 1: @@ -3628,7 +4486,9 @@ class AssetList: "original_house_no": original_house_no, "original_street": original_street, "original_postcode": original_postcode, - self.STANDARD_LANDLORD_PROPERTY_ID: x[self.STANDARD_LANDLORD_PROPERTY_ID], + self.STANDARD_LANDLORD_PROPERTY_ID: x[ + self.STANDARD_LANDLORD_PROPERTY_ID + ], } ) continue @@ -3639,11 +4499,15 @@ class AssetList: "original_house_no": original_house_no, "original_street": original_street, "original_postcode": original_postcode, - self.STANDARD_LANDLORD_PROPERTY_ID: df[self.STANDARD_LANDLORD_PROPERTY_ID].values[0], + self.STANDARD_LANDLORD_PROPERTY_ID: df[ + self.STANDARD_LANDLORD_PROPERTY_ID + ].values[0], } ) - self.standardised_asset_list = self.standardised_asset_list.drop(columns="house_no") + self.standardised_asset_list = self.standardised_asset_list.drop( + columns="house_no" + ) # We match the "UPRN" which is the landlords ID, onto the master sheet @@ -3654,19 +4518,29 @@ class AssetList: if matched.empty: continue - master_to_append = master_data[ - [scheme_col, "row_id", install_col, submission_col, measure_mix_col, installer_notes_col, installer_col] - ].merge( - matched, how="left", on="row_id" - ).rename( - columns={ - scheme_col: "funding_scheme", - measure_mix_col: "measure_mix", - install_col: "survey_status", - submission_col: "submission_date", - installer_notes_col: "submission_installer_notes", - installer_col: "submission_installer" - } + master_to_append = ( + master_data[ + [ + scheme_col, + "row_id", + install_col, + submission_col, + measure_mix_col, + installer_notes_col, + installer_col, + ] + ] + .merge(matched, how="left", on="row_id") + .rename( + columns={ + scheme_col: "funding_scheme", + measure_mix_col: "measure_mix", + install_col: "survey_status", + submission_col: "submission_date", + installer_notes_col: "submission_installer_notes", + installer_col: "submission_installer", + } + ) ) master_to_append["submission_cancelled"] = ( master_to_append["survey_status"].str.lower().str.contains("cancel") @@ -3675,14 +4549,17 @@ class AssetList: master_to_append["survey_status"].str.lower().str.contains("installed") ) master_surveyed.append(master_to_append) - unmatched_df = master_data[ - master_data["row_id"].isin(unmatched) - ] + unmatched_df = master_data[master_data["row_id"].isin(unmatched)] # The columns are massively different - we take just a few unmatched_df = unmatched_df[ [ - scheme_col, house_no_col, "Street / Block Name", postcode_col, install_col, submission_col + scheme_col, + house_no_col, + "Street / Block Name", + postcode_col, + install_col, + submission_col, ] ].rename( columns={ @@ -3690,14 +4567,16 @@ class AssetList: house_no_col: "House Number", postcode_col: "Postcode", install_col: "survey_status", - submission_col: "submission_date" + submission_col: "submission_date", } ) unmatched_submissions.append(unmatched_df) master_surveyed = pd.concat(master_surveyed) - master_surveyed = master_surveyed[~pd.isnull(master_surveyed[self.STANDARD_LANDLORD_PROPERTY_ID])] + master_surveyed = master_surveyed[ + ~pd.isnull(master_surveyed[self.STANDARD_LANDLORD_PROPERTY_ID]) + ] master_surveyed = master_surveyed[ ~master_surveyed[self.STANDARD_LANDLORD_PROPERTY_ID].isin( ["NOT ON ASSET LIST", "Missing From Asset List"] @@ -3709,20 +4588,24 @@ class AssetList: ].astype(str) # We de-dupe crudely on landlord property id - self.master_surveyed = master_surveyed.drop_duplicates(subset=[self.STANDARD_LANDLORD_PROPERTY_ID]).copy() + self.master_surveyed = master_surveyed.drop_duplicates( + subset=[self.STANDARD_LANDLORD_PROPERTY_ID] + ).copy() # We now add the submission status, based on the hubspot stages - self.master_surveyed["submission_status"] = hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER.label + self.master_surveyed["submission_status"] = ( + hubspot_config.HubspotProcessStatus.SUBMITTED_TO_INSTALLER.label + ) self.master_surveyed["submission_status"] = np.where( self.master_surveyed["submission_cancelled"] == True, hubspot_config.HubspotProcessStatus.INSTALLER_CANCELLED_FINALIZED.label, - self.master_surveyed["submission_status"] + self.master_surveyed["submission_status"], ) self.master_surveyed["submission_status"] = np.where( self.master_surveyed["submission_installed"] == True, hubspot_config.HubspotProcessStatus.INSTALL_COMPLETE.label, - self.master_surveyed["submission_status"] + self.master_surveyed["submission_status"], ) self.standardised_asset_list = self.standardised_asset_list.merge( @@ -3735,6 +4618,4 @@ class AssetList: # Finally, we keep a record of the unmatched if unmatched_submissions: - self.unmatched_submissions = pd.concat( - unmatched_submissions - ) + self.unmatched_submissions = pd.concat(unmatched_submissions) diff --git a/asset_list/app.py b/asset_list/app.py index 4c15b71d..a97bb8e0 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -13,11 +13,15 @@ from asset_list.utils import get_data from dotenv import load_dotenv from backend.SearchEpc import SearchEpc -load_dotenv(dotenv_path="backend/.env") +load_dotenv(dotenv_path="../backend/.env") EPC_AUTH_TOKEN = os.getenv( "EPC_AUTH_TOKEN", ) +OPENAI_API_KEY = os.getenv( + "OPENAI_API_KEY", +) + def extract_address1( asset_list, full_address_col, postcode_col, method="first_two_words" @@ -69,18 +73,19 @@ def app(): Property UPRN """ - data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lifespace Rentals" - data_filename = "For Modelling.xlsx" + data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lifespace Rentals/Missed" + # data_filename = "For Modelling - Final - reviewed.xlsx" + data_filename = "Missed Properties - with address.xlsx" sheet_name = "Sheet1" postcode_column = "Postcode" address1_column = "address1" address1_method = None - fulladdress_column = "full_address" + fulladdress_column = "address1" address_cols_to_concat = [] missing_postcodes_method = None landlord_year_built = None landlord_os_uprn = "UPRN" - landlord_property_type = None + landlord_property_type = "Type" landlord_built_form = None landlord_wall_construction = None landlord_roof_construction = None @@ -102,43 +107,6 @@ def app(): asset_list_header = 0 landlord_block_reference = None - # Peabody data for cleaning - data_folder = ( - "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting " - "Project/data_validation" - ) - data_filename = "to_standardise_uprns.xlsx" - sheet_name = "Sheet1" - postcode_column = "Postcode" - address1_column = None - address1_method = "house_number_extraction" - fulladdress_column = "Address" - address_cols_to_concat = None - missing_postcodes_method = None - landlord_year_built = None - landlord_os_uprn = "UPRN" - landlord_property_type = None - landlord_built_form = None - landlord_wall_construction = None - landlord_roof_construction = None - landlord_heating_system = None - landlord_existing_pv = None - landlord_property_id = "LLUPRN" - landlord_sap = None - outcomes_filename = None - outcomes_sheetname = None - outcomes_postcode = None - outcomes_houseno = None - outcomes_id = None - outcomes_address = None - master_filepaths = [] - master_id_colnames = [] - master_to_asset_list_filepath = None - phase = False - ecosurv_landlords = None - asset_list_header = 0 - landlord_block_reference = None - # Maps addresses to uprn in problematic cases manual_uprn_map = {} @@ -441,10 +409,6 @@ def app(): ) asset_list.merge_data(epc_df) - # asset_list.standardised_asset_list = asset_list.standardised_asset_list[ - # asset_list.standardised_asset_list["domna_full_address"] - # != "120 Airdrie Crescent, Burnley, Lancashire" - # ] asset_list.extract_attributes() asset_list.identify_worktypes() @@ -458,27 +422,6 @@ def app(): os.path.join(data_folder, ".".join(data_filename.split(".")[:-1])) + " - Standardised.xlsx" ) - # Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data - - # Determine inspections priority - # solar_jobs = asset_list.standardised_asset_list[~pd.isnull(asset_list.standardised_asset_list["solar_reason"])][ - # "domna_postcode"].unique() - # asset_list.standardised_asset_list["in_solar_area"] = asset_list.standardised_asset_list["domna_postcode"].isin( - # solar_jobs - # ) - # # Same for cav - # cavity_jobs = asset_list.standardised_asset_list[ - # ~pd.isnull(asset_list.standardised_asset_list["cavity_reason"]) - # ]["domna_postcode"].unique() - # asset_list.standardised_asset_list["in_cavity_area"] = asset_list.standardised_asset_list["domna_postcode"].isin( - # cavity_jobs - # ) - # # We prioritise properties that are in solar areas and cavity areas - # import numpy as np - # asset_list.standardised_asset_list["inspection_priority"] = np.where( - # asset_list.standardised_asset_list["in_solar_area"] | asset_list.standardised_asset_list["in_cavity_area"], - # 1, 2 - # ) with pd.ExcelWriter(filename) as writer: asset_list.standardised_asset_list.to_excel( diff --git a/asset_list/mappings/built_form.py b/asset_list/mappings/built_form.py index d6466539..4842450d 100644 --- a/asset_list/mappings/built_form.py +++ b/asset_list/mappings/built_form.py @@ -528,6 +528,107 @@ BUILT_FORM_MAPPINGS = { 'House: Semi Detached: Top Floor': 'semi-detached', 'House: End Terrace: Ground Floor': 'end-terrace', 'Maisonette: Enclosed End Terrace: Mid Floor': 'enclosed end-terrace', - 'Bungalow: EnclosedEndTerrace': 'enclosed end-terrace' + 'Bungalow: EnclosedEndTerrace': 'enclosed end-terrace', + '2 BED MID TERRACED HOUSE': 'mid-terrace', + '4 BED SEMI DETACHED-PARLOURED': 'semi-detached', + '2 BED END TERRACED HOUSE': 'end-terrace', + '3 BED MID TERRACED HOUSE': 'mid-terrace', + '3 BED SEMI DETACHED HOUSE': 'semi-detached', + '3 BED MID TERRACE - PARLOURED': 'mid-terrace', + '3 BED END TERRACE - PARLOURED': 'end-terrace', + '4 BED+ END TERRACED HOUSE': 'end-terrace', + '3 BED END TERRACED HOUSE': 'end-terrace', + '3 BED SEMI DETACHED-PARLOURED': 'semi-detached', + '4 BED+ END TERRACE - PARLOURED': 'end-terrace', + '2 BED SEMI DETACHED HOUSE': 'semi-detached', + '3 BED DETACHED HOUSE': 'detached', + '2 BED GRD FLR COTT FLT-CNT STR': 'ground floor', + '2 BED 1ST FLOOR WALKUP FLAT': 'mid-floor', + '1 BED GRD FL COTT FLAT-OWN ENT': 'ground floor', + '1 BED 1ST FL WALK UP DECK ACC': 'mid-floor', + '2 BED MAISONETTE UPPER COM ENT': 'mid-floor', + '2 BED GRD FLR COTT FLT OWN ENT': 'ground floor', + '1 BED BUNGALOW': 'unknown', + '2 BED GRD FL COTT FLT-OWN ENTR': 'ground floor', + '1 BED 1ST FL COTT FLT-CNT STR': 'mid-floor', + '1 BED GRD FL WALK UP OWN ENT': 'ground floor', + '1 BED GRD FLOOR WALKUP FLAT': 'ground floor', + '2 BED GRD FLOOR WALKUP FLAT': 'ground floor', + '2 BED 1ST FLR FLT-SHELTERED': 'mid-floor', + '2 BED BUNGALOW': 'unknown', + '2 BED GRD FLR COTT FLT(P)-1950': 'ground floor', + + 'Ground Floor Front Left': 'ground floor', + 'End-Terrace House': 'end-terrace', + 'Ground floor': 'ground floor', + 'Ground Floor Front Right': 'ground floor', + 'End Terrace (GII List)': 'end-terrace', + 'Semi Detached House': 'semi-detached', + 'Ground Floor Right': 'ground floor', + 'PB Ground Floor Flat': 'ground floor', + 'Basement and Ground Floor': 'ground floor', + 'Semi-detached bungalow': 'detached', + 'Detached Cottage': 'detached', + 'Lower & Ground Floor': 'ground floor', + 'Ground FLoor Flat': 'ground floor', + 'ground floor': 'ground floor', + 'Ground Floor Left': 'ground floor', + 'Semi-detached House': 'detached', + 'Basement & Lower Ground': 'basement', + 'Semi-Detached House': 'detached', + 'Ground floor flat -': 'ground floor', + 'Basement Flat': 'basement', + 'semi-detached bungalow': 'semi-detached', + 'Lower Ground Floor Flat': 'ground floor', + 'Ground floor Flat': 'ground floor', + 'Ground Floor flat': 'ground floor', + 'Ground': 'ground floor', + 'Semi detached Bungalow': 'semi-detached', + 'ground floor flat': 'ground floor', + 'Mid terrace House': 'mid-terrace', + 'Raised Ground Floor': 'ground floor', + 'Basement Floor': 'basement', + 'Second floor flat': 'mid-floor', + 'Fourth Floor Flat': 'mid-floor', + 'First/Second Maisonette': 'mid-floor', + 'Ground/First': 'ground floor', + 'First and Second Floor': 'mid-floor', + 'Terrace House': 'mid-terrace', + '1st/2nd Floor Maisonette': 'mid-floor', + 'Semi-det House': 'semi-detached', + 'First': 'mid-floor', + 'Ground & First Floor': 'ground floor', + 'End of Terrace House': 'end-terrace', + '2nd Floor Purpose Built': 'mid-floor', + 'First/Second Floor Maison': 'mid-floor', + 'GFF purpose built': 'ground floor', + 'Second': 'mid-floor', + 'Semi-det House (GII List)': 'semi-detached', + '3rd and 4th Floor': 'mid-floor', + 'First Floor flat': 'mid-floor', + 'Mid-Terrace House': 'mid-terrace', + '1st & 2nd Floors': 'mid-floor', + 'Ground/first floor': 'ground floor', + 'FFF purpose built': 'mid-floor', + 'Second floor': 'mid-floor', + 'Second/Third floor': 'mid-floor', + 'First floor Flat': 'mid-floor', + 'First floor': 'mid-floor', + 'Lower Ground Flat': 'basement', + 'First Floor Rear Flat': 'mid-floor', + 'First & Second Floor': 'mid-floor', + 'Ground & Lower Ground': 'basement', + 'First Floor Rear': 'mid-floor', + 'First & Second': 'mid-floor', + 'First Floor Front': 'mid-floor', + 'First & Second Floors': 'mid-floor', + 'First/Second Floor': 'mid-floor', + 'Sem-detach house': 'semi-detached', + 'Second Floor Flat (Top)': 'top-floor', + '3 FloorTerrace House': 'mid-terrace', + 'First floor flat': 'mid-floor', + 'First & Second Floor Flat': 'mid-floor', + 'First Floor Purpose Built': 'mid-floor', + 'Purpose built First Floor': 'mid-floor', } diff --git a/asset_list/mappings/heating_systems.py b/asset_list/mappings/heating_systems.py index 272d6279..5f962108 100644 --- a/asset_list/mappings/heating_systems.py +++ b/asset_list/mappings/heating_systems.py @@ -498,6 +498,23 @@ HEATING_MAPPINGS = { 'Boiler: A rated Combi, System 2: Boiler: A rated Combi': 'gas combi boiler', 'System 2: Boiler: A rated Regular Boiler, Boiler: A rated Regular Boiler': 'gas boiler, radiators', - 'Boiler: A rated Combi, System 2: Boiler: C rated Combi': 'gas combi boiler' + 'Boiler: A rated Combi, System 2: Boiler: C rated Combi': 'gas combi boiler', + + 'IDEAL ISAR HE30': 'gas combi boiler', + 'WORCESTER GREENSTAR 25 SI': 'gas combi boiler', + 'POTTERTON PROMAX COMBI 28 HE PLUS': 'gas combi boiler', + 'WORCESTER GREENSTAR 28I JUNIOR': 'gas combi boiler', + 'BAXI ASSURE 25 COMBI': 'gas combi boiler', + 'POTTERTON PROMAX COMBI 28 HE PLUS A': 'gas combi boiler', + 'WORCESTER GREENSTAR 30 SI': 'gas combi boiler', + 'POTTERTON SUPRIMA 40L': 'gas boiler, radiators', + 'POTTERTON ASSURE 30 COMBI': 'gas combi boiler', + 'POTTERTON PROMAX 28 COMBI ERP': 'gas combi boiler', + 'BAXI ASSURE 30 COMBI': 'gas combi boiler', + 'POTTERTON PROMAX 18 SYSTEM ERP': 'gas boiler, radiators', + 'POTTERTON PROMAX COMBI 33 HE PLUS A': 'gas combi boiler', + 'POTTERTON SUPRIMA 40 HE': 'gas boiler, radiators', + 'FERROLI MODENA 102': 'gas boiler, radiators', + 'POTTERTON PROMAX COMBI 24 HE PLUS A': 'gas combi boiler' } diff --git a/asset_list/mappings/property_type.py b/asset_list/mappings/property_type.py index 177a7549..71788c25 100644 --- a/asset_list/mappings/property_type.py +++ b/asset_list/mappings/property_type.py @@ -444,6 +444,9 @@ PROPERTY_MAPPING = { 'Warden Bungalow': 'bungalow', 'Warden Flat': 'flat', 'Upper Floor Flat': 'flat', - 'Extracare Scheme': 'other' + 'Extracare Scheme': 'other', + + 'SHELTERED': 'unknown', + 'PARLOUR': 'unknown', } diff --git a/asset_list/mappings/roof.py b/asset_list/mappings/roof.py index 70cc8742..192238e0 100644 --- a/asset_list/mappings/roof.py +++ b/asset_list/mappings/roof.py @@ -320,6 +320,8 @@ ROOF_CONSTRUCTION_MAPPINGS = { 'Pitched (slates or tiles) access to loft, 100mm': 'pitched insulated', 'Pitched (slates or tiles) no loft access, 200mm': 'pitched insulated', 'Pitched (slates or tiles) access to loft, 200mm': 'pitched insulated', - 'Pitched (slates or tiles) access to loft, 50mm': 'pitched less than 100mm insulation' + 'Pitched (slates or tiles) access to loft, 50mm': 'pitched less than 100mm insulation', + + 'Pitched roofs': 'pitched unknown insulation', } diff --git a/asset_list/mappings/walls.py b/asset_list/mappings/walls.py index 1a252b33..c369204d 100644 --- a/asset_list/mappings/walls.py +++ b/asset_list/mappings/walls.py @@ -369,6 +369,9 @@ WALL_CONSTRUCTION_MAPPINGS = { 'Solid Brick, As built': 'solid brick unknown insulation', 'System built, As built': 'system built unknown insulation', 'Timber frame, As built': 'timber frame unknown insulation', - 'Cavity, As built': 'cavity unknown insulation' + 'Cavity, As built': 'cavity unknown insulation', + 'FILLED CAVITY': 'filled cavity', + 'EXTERNAL': 'insulated solid brick', + 'AS BUILT': 'other' } diff --git a/asset_list/requirements.txt b/asset_list/requirements.txt index dc7e572e..56469fc0 100644 --- a/asset_list/requirements.txt +++ b/asset_list/requirements.txt @@ -5,7 +5,7 @@ epc-api-python==1.0.2 thefuzz boto3 openpyxl -openai>=1.3.5 +openai==1.93.0 tiktoken msgpack beautifulsoup4 diff --git a/backend/.env.test b/backend/.env.test index 5b77f243..34a1803d 100644 --- a/backend/.env.test +++ b/backend/.env.test @@ -19,4 +19,5 @@ PLAN_TRIGGER_BUCKET=test DATA_BUCKET=test EPC_AUTH_TOKEN=test ENGINE_SQS_URL=test -ENERGY_ASSESSMENTS_BUCKET=test \ No newline at end of file +CATEGORISATION_SQS_URL=test +ENERGY_ASSESSMENTS_BUCKET=test diff --git a/backend/Outputs.py b/backend/Outputs.py index f9538709..7111e4d3 100644 --- a/backend/Outputs.py +++ b/backend/Outputs.py @@ -8,7 +8,11 @@ from utils.s3 import read_from_s3, save_excel_to_s3 from backend.app.utils import sap_to_epc from backend.app.db.connection import db_engine from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel -from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations +from backend.app.db.models.recommendations import ( + Recommendation, + PlanModel, + PlanRecommendations, +) class Outputs: @@ -42,7 +46,7 @@ class Outputs: "flat_roof_insulation": "Flat roof (Out of scope - prov sum only)", "room_in_roof_insulation": "RIR (POA - Prov sum only)", "ev_charging": "EV Charging", - "battery": "Battery" + "battery": "Battery", } def __init__(self, format, portfolio_id): @@ -67,28 +71,38 @@ class Outputs: # Download cleaned data self.cleaned_epc_lookup = read_from_s3( s3_file_name="cleaned_epc_data/cleaned.bson", - bucket_name="retrofit-data-dev" + bucket_name="retrofit-data-dev", ) self.cleaned_epc_lookup = msgpack.unpackb(self.cleaned_epc_lookup, raw=False) def get_properties_from_db(self): # Get properties and their details for a specific portfolio - properties_query = self.session.query( - PropertyModel, - PropertyDetailsEpcModel - ).join( - PropertyDetailsEpcModel, - PropertyModel.id == PropertyDetailsEpcModel.property_id - ).filter( - PropertyModel.portfolio_id == self.portfolio_id # Filter by portfolio ID - ).all() + properties_query = ( + self.session.query(PropertyModel, PropertyDetailsEpcModel) + .join( + PropertyDetailsEpcModel, + PropertyModel.id == PropertyDetailsEpcModel.property_id, + ) + .filter( + PropertyModel.portfolio_id + == self.portfolio_id # Filter by portfolio ID + ) + .all() + ) # Transform properties data to include all fields dynamically properties_data = [ - {**{col.name: getattr(prop.PropertyModel, col.name) for col in PropertyModel.__table__.columns}, - **{col.name: getattr(prop.PropertyDetailsEpcModel, col.name) for col in - PropertyDetailsEpcModel.__table__.columns}} + { + **{ + col.name: getattr(prop.PropertyModel, col.name) + for col in PropertyModel.__table__.columns + }, + **{ + col.name: getattr(prop.PropertyDetailsEpcModel, col.name) + for col in PropertyDetailsEpcModel.__table__.columns + }, + } for prop in properties_query ] @@ -96,10 +110,14 @@ class Outputs: def get_plans_from_db(self): - plans_query = self.session.query(Plan).filter(Plan.portfolio_id == self.portfolio_id).all() + plans_query = ( + self.session.query(PlanModel) + .filter(PlanModel.portfolio_id == self.portfolio_id) + .all() + ) # Transform plans data to include all fields dynamically plans_data = [ - {col.name: getattr(plan, col.name) for col in Plan.__table__.columns} + {col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns} for plan in plans_query ] @@ -107,28 +125,38 @@ class Outputs: def get_recommendations_from_db(self, plan_ids): # Get recommendations through PlanRecommendations for those plans and that are default - recommendations_query = self.session.query( - Recommendation, - Plan.scenario_id - ).join( - PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id - ).join( - Plan, Plan.id == PlanRecommendations.plan_id # Join with Plan to access scenario_id - ).filter( - PlanRecommendations.plan_id.in_(plan_ids), - Recommendation.default == True # Filtering for default recommendations - ).all() + recommendations_query = ( + self.session.query(Recommendation, PlanModel.scenario_id) + .join( + PlanRecommendations, + Recommendation.id == PlanRecommendations.recommendation_id, + ) + .join( + PlanModel, + PlanModel.id + == PlanRecommendations.plan_id, # Join with Plan to access scenario_id + ) + .filter( + PlanRecommendations.plan_id.in_(plan_ids), + Recommendation.default == True, # Filtering for default recommendations + ) + .all() + ) # Transform recommendations data to include all fields dynamically and include scenario_id recommendations_data = [ { **{ - col.name: getattr(rec.Recommendation, col.name) if - hasattr(rec, 'Recommendation') else getattr(rec, col.name) + col.name: ( + getattr(rec.Recommendation, col.name) + if hasattr(rec, "Recommendation") + else getattr(rec, col.name) + ) for col in Recommendation.__table__.columns }, - "Scenario ID": rec.scenario_id - } for rec in recommendations_query + "Scenario ID": rec.scenario_id, + } + for rec in recommendations_query ] return recommendations_data @@ -148,7 +176,9 @@ class Outputs: measure_label = self.MDS_MEASURE_MAPPING.get(measure_type, None) # If the property_id already exists in the collected rows, update it - existing_row = next((item for item in rows if item["property_id"] == property_id), None) + existing_row = next( + (item for item in rows if item["property_id"] == property_id), None + ) if existing_row is None: # Create a new row if the property_id doesn't exist new_row = {measure: None for measure in all_measures} @@ -196,7 +226,7 @@ class Outputs: properties_data = self.get_properties_from_db() plans_data = self.get_plans_from_db() - plan_ids = [plan['id'] for plan in plans_data] + plan_ids = [plan["id"] for plan in plans_data] recommendations_data = self.get_recommendations_from_db(plan_ids) self.session.close() @@ -209,50 +239,54 @@ class Outputs: scenario_ids = plans_df["scenario_id"].unique() # We start to create the MDS sheet - mds = properties_df[ - [ - "property_id", - "address", - "postcode", - "uprn", - "current_epc_rating", - "current_sap_points", - "primary_energy_consumption", - "property_type", - "built_form", - "total_floor_area", - "walls", - "tenure", - "mainfuel", - # The bills columns are split out - we include them and aggregate, without appliances - "heating_cost_current", - "hot_water_cost_current", - "lighting_cost_current", - "gas_standing_charge", - "electricity_standing_charge" + mds = ( + properties_df[ + [ + "property_id", + "address", + "postcode", + "uprn", + "current_epc_rating", + "current_sap_points", + "primary_energy_consumption", + "property_type", + "built_form", + "total_floor_area", + "walls", + "tenure", + "mainfuel", + # The bills columns are split out - we include them and aggregate, without appliances + "heating_cost_current", + "hot_water_cost_current", + "lighting_cost_current", + "gas_standing_charge", + "electricity_standing_charge", + ] ] - ].copy().rename( - columns={ - "address": "Address", - "postcode": "Postcode", - "uprn": "UPRN", - "current_epc_rating": "Pre EPC", - "current_sap_points": "EPC Source", - "primary_energy_consumption": "Existing Heating Demand Kwh/m2/y", - "property_type": "Property Type", - "built_form": "Built Form", - "total_floor_area": "Floor area m2 (If known)", - "walls": "Wall Type (Mandatory field)", - "tenure": "Tenure", - } + .copy() + .rename( + columns={ + "address": "Address", + "postcode": "Postcode", + "uprn": "UPRN", + "current_epc_rating": "Pre EPC", + "current_sap_points": "EPC Source", + "primary_energy_consumption": "Existing Heating Demand Kwh/m2/y", + "property_type": "Property Type", + "built_form": "Built Form", + "total_floor_area": "Floor area m2 (If known)", + "walls": "Wall Type (Mandatory field)", + "tenure": "Tenure", + } + ) ) mds["Estimated bill (£ per year)"] = ( - mds["heating_cost_current"] + - mds["hot_water_cost_current"] + - mds["lighting_cost_current"] + - mds["gas_standing_charge"] + - mds["electricity_standing_charge"] + mds["heating_cost_current"] + + mds["hot_water_cost_current"] + + mds["lighting_cost_current"] + + mds["gas_standing_charge"] + + mds["electricity_standing_charge"] ) mds = mds.drop( @@ -261,65 +295,84 @@ class Outputs: "hot_water_cost_current", "lighting_cost_current", "gas_standing_charge", - "electricity_standing_charge" + "electricity_standing_charge", ] ) # Formatting - Pre EPC is an enum mds["Pre EPC"] = [x.value for x in mds["Pre EPC"].values] - mds["Wall Type (Mandatory field)"] = mds["Wall Type (Mandatory field)"].str.split(",").str[0] + mds["Wall Type (Mandatory field)"] = ( + mds["Wall Type (Mandatory field)"].str.split(",").str[0] + ) # Remove average thermal transmittance field mds["Wall Type (Mandatory field)"] = np.where( - mds["Wall Type (Mandatory field)"].str.contains("Average thermal transmittance"), + mds["Wall Type (Mandatory field)"].str.contains( + "Average thermal transmittance" + ), "", - mds["Wall Type (Mandatory field)"] + mds["Wall Type (Mandatory field)"], ) mds = mds.merge( - pd.DataFrame(self.cleaned_epc_lookup["main-fuel"])[["clean_description", "fuel_type"]], + pd.DataFrame(self.cleaned_epc_lookup["main-fuel"])[ + ["clean_description", "fuel_type"] + ], left_on="mainfuel", right_on="clean_description", - how="left" + how="left", + ) + mds = mds.rename(columns={"fuel_type": "Existing Fuel Type"}).drop( + columns=["clean_description", "mainfuel"] ) - mds = mds.rename(columns={"fuel_type": "Existing Fuel Type"}).drop(columns=["clean_description", "mainfuel"]) mds["Existing Fuel Type"].value_counts() mds_output_by_scenario = {} for scenario_id in scenario_ids: - scenario_recommendations = recommendations_df[recommendations_df["Scenario ID"] == scenario_id] + scenario_recommendations = recommendations_df[ + recommendations_df["Scenario ID"] == scenario_id + ] # For each measure, we create the measure matrix - scenario_measure_matrix = self.make_mds_measure_matrix(scenario_recommendations) + scenario_measure_matrix = self.make_mds_measure_matrix( + scenario_recommendations + ) # Calculate the predicted impact on: SAP, heat demand, bills, kwh - recommendation_impacts = scenario_recommendations.groupby("property_id")[ - ["sap_points", "heat_demand", "kwh_savings", "energy_cost_savings"] - ].sum().reset_index() + recommendation_impacts = ( + scenario_recommendations.groupby("property_id")[ + ["sap_points", "heat_demand", "kwh_savings", "energy_cost_savings"] + ] + .sum() + .reset_index() + ) scenario_mds = mds.merge( scenario_measure_matrix, how="left", on="property_id" - ).merge( - recommendation_impacts, how="left", on="property_id" - ) + ).merge(recommendation_impacts, how="left", on="property_id") # If we have no recommendations, sap_points, kwh_savings, head_demand will be NaN to_clean = [c for c in recommendation_impacts.columns if c != "property_id"] for col in to_clean: scenario_mds[col].fillna(0, inplace=True) scenario_mds.fillna(0, inplace=True) - scenario_mds["Post SAP"] = scenario_mds["EPC Source"] + scenario_mds["sap_points"] + scenario_mds["Post SAP"] = ( + scenario_mds["EPC Source"] + scenario_mds["sap_points"] + ) # Round Post SAP down to the nearest integer scenario_mds["Post SAP"] = scenario_mds["Post SAP"].apply(lambda x: int(x)) - scenario_mds["Post EPC"] = scenario_mds["Post SAP"].apply(lambda x: sap_to_epc(x)) + scenario_mds["Post EPC"] = scenario_mds["Post SAP"].apply( + lambda x: sap_to_epc(x) + ) scenario_mds["Heating Demand Kwh/m2/y"] = ( - scenario_mds["Existing Heating Demand Kwh/m2/y"] - scenario_mds["heat_demand"] + scenario_mds["Existing Heating Demand Kwh/m2/y"] + - scenario_mds["heat_demand"] ) scenario_mds = scenario_mds.rename( columns={ "sap_points": "Predicted SAP Points", "kwh_savings": "Energy Saving (Kwh)", - "energy_cost_savings": "Bill Reduction (£ per yr)" + "energy_cost_savings": "Bill Reduction (£ per yr)", } ) @@ -330,7 +383,7 @@ class Outputs: save_excel_to_s3( df=scenario_mds, file_key=f"engine_outputs/{self.format}/{self.today}_scenario_id={scenario_id}.xlsx", - bucket_name="retrofit-data-dev" + bucket_name="retrofit-data-dev", ) def export(self): diff --git a/backend/Property.py b/backend/Property.py index 5e9e5e84..491b74b3 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -490,7 +490,7 @@ class Property: for rec_id in rec_ids: sim_epc = self.simulation_epcs[rec_id].copy() rec_impact = [x for x in impact_summary if x["recommendation_id"] == rec_id][0] - # We update all of the features that should have an impact on the kwh model + # We update all features that should have an impact on the kwh model sim_epc.update( { @@ -660,8 +660,6 @@ class Property: self.set_floor_type() self.set_floor_level() self.set_windows_count() - self.set_energy_source() - self.find_energy_sources() self.set_current_energy(kwh_client, kwh_predictions) def set_solar_panel_configuration(self, solar_panel_configuration): @@ -1168,202 +1166,6 @@ class Property: if condition_data.get("windows_area") is not None \ else None - def set_energy_source(self): - """ - This method sets the energy source of the property, based on the mains gas flag and energy tariff. - """ - # Default to "electricity_and_gas" to cover most scenarios including when mains_gas_flag is True - energy_source = "electricity_and_gas" - - # If the tariff explicitly indicates electricity use without a dual indication and mains_gas_flag is not True - # We check for the common electricity tariffs - if not self.data["mains-gas-flag"] and self.data["energy-tariff"] in [ - "Single", - "off-peak 7 hour", - "off-peak 10 hour", - "off-peak 18 hour", - "standard tariff", - "24 hour", - ]: - energy_source = "electricity" - - # Set the energy source based on the conditions above - self.energy_source = energy_source - - def find_energy_sources(self): - # Based on the heating and the hot water - heating_fuel_mapping = { - 'has_mains_gas': 'Natural Gas', - 'has_electric': 'Electricity', - 'has_oil': 'Oil', - 'has_wood_logs': 'Wood Logs', - 'has_coal': 'Coal', - 'has_anthracite': 'Anthracite', - 'has_smokeless_fuel': 'Smokeless Fuel', - 'has_lpg': 'LPG', - 'has_b30k': 'B30K Biofuel', - 'has_air_source_heat_pump': 'Electricity', - 'has_ground_source_heat_pump': 'Electricity', - 'has_water_source_heat_pump': 'Electricity', - 'has_electric_heat_pump': 'Electricity', - 'has_solar_assisted_heat_pump': 'Electricity', - 'has_exhaust_source_heat_pump': 'Electricity', - 'has_community_heat_pump': 'Electricity', - 'has_wood_pellets': 'Wood Pellets', - 'has_community_scheme': 'Varied (Community Scheme)', - "has_dual_fuel_mineral_and_wood": 'Wood Logs', - "has_electricaire": 'Electricity', - "has_wood_chips": 'Wood Logs' - } - - # Hot water - heater_type_to_fuel = { - 'gas instantaneous': 'Natural Gas', - 'electric heat pump': 'Electricity', - 'electric immersion': 'Electricity', - 'gas boiler': 'Natural Gas', - 'oil boiler': 'Oil', - 'electric instantaneous': 'Electricity', - 'gas multipoint': 'Natural Gas', - 'heat pump': 'Electricity', - 'solid fuel boiler': 'Solid Fuel', - 'solid fuel range cooker': 'Solid Fuel', - 'room heaters': 'Varied', # Could be any fuel, further specifics needed based on context - "single-point gas": "Natural Gas" - } - - # Define a mapping from system types to general categories or modifications of fuel types - system_type_modification = { - 'from main system': 'Main System', - 'from secondary system': 'Secondary System', - 'from second main heating system': 'Secondary System', - 'community scheme': 'Community Scheme' - } - - hotwater_appliance_to_fuel = { - 'gas range cooker': 'Natural Gas', - 'oil range cooker': 'Oil' - } - - fuel_map = { - None: "Natural Gas (Community Scheme)", - "mains gas": "Natural Gas (Community Scheme)", - "biomass": "Smokeless Fuel", - "electricity": "Electricity", - "biogas": "Smokeless Fuel", - "heat network": "Natural Gas (Community Scheme)", - "lpg": 'LPG', - "biodiesel": "Smokeless Fuel", - "b30d": "B30K Biofuel", - "coal": "Coal", - "oil": "Oil", - "unknown": None # Handle - anything post 2020 is electricity else gas - } - - self.heating_energy_source = list({ - fuel for key, fuel in heating_fuel_mapping.items() if self.main_heating.get(key, False) - }) - - if set(self.heating_energy_source) == {'Electricity', 'Natural Gas'}: - # It means they have mixed heating so we take the primary one, based on main fuel - # This will probably happen in the case of an extension - if self.main_fuel["clean_description"] in ["Mains gas not community", "Mains gas community"]: - self.heating_energy_source = ['Natural Gas'] - else: - self.heating_energy_source = ['Electricity'] - - if set(self.heating_energy_source) == {'Electricity', 'LPG'}: - if self.main_fuel["clean_description"] in ["Lpg not community", "Lpg community"]: - self.heating_energy_source = ['LPG'] - else: - self.heating_energy_source = ['Electricity'] - - if set(self.heating_energy_source) == {'Natural Gas', 'Wood Logs'}: - # It means they have mixed heating so we take the primary one, based on main fuel - # This will probably happen in the case of an extension - if self.main_fuel["clean_description"] in ["Mains gas not community", "Mains gas community"]: - self.heating_energy_source = ['Natural Gas'] - else: - self.heating_energy_source = ['Wood Logs'] - - if len(self.heating_energy_source) > 1 and "Varied (Community Scheme)" not in self.heating_energy_source: - # We might have something like heating energy source equal to ['Natural Gas', 'Varied (Community Scheme)'] - # so we treat this as community heating - raise Exception("Investigate me") - - if len(self.heating_energy_source) == 0: - heating_flags = { - v for k, v in self.main_heating.items() if k not in ["original_description", "clean_description"] - } - hotwater_flags = { - v for k, v in self.hotwater.items() if k not in ["original_description", "clean_description"] - } - - # If all flags are zero, we have a no data example - if (heating_flags == {False} or hotwater_flags == {None}) and ( - hotwater_flags == {False} or hotwater_flags == {None}): - # We have nodata so we try and rely on main fuel - if self.main_fuel["fuel_type"] in fuel_map: # We assume when None as it's unknown - mapped_fuel = fuel_map[self.main_fuel["fuel_type"]] - self.heating_energy_source = mapped_fuel - self.hot_water_energy_source = mapped_fuel - return - else: - raise NotImplementedError(f"Unhandled fuel {self.main_fuel['fuel_type']}") - - # We handle edge case where no heating system is indicated - if self.main_fuel["fuel_type"] in fuel_map: - mapped_fuel = fuel_map[self.main_fuel["fuel_type"]] - self.heating_energy_source = mapped_fuel - self.hot_water_energy_source = mapped_fuel - return - - if len(self.heating_energy_source) > 1: - # We treat this as a community scheme - self.heating_energy_source = ["Varied (Community Scheme)"] - - self.heating_energy_source = self.heating_energy_source[0] - - if self.heating_energy_source == "Varied (Community Scheme)": - - if self.main_fuel["fuel_type"] in fuel_map: # We assume when None as it's unknown - mapped_to = fuel_map[self.main_fuel["fuel_type"]] - if mapped_to is None and self.main_fuel["fuel_type"] == "unknown": - # Handle logic based on age band - if self.year_built >= 2020: - self.heating_energy_source = "Electricity" - else: - self.heating_energy_source = "Natural Gas (Community Scheme)" - - else: - self.heating_energy_source = mapped_to - else: - raise NotImplementedError(f"Unhandled fuel {self.main_fuel['fuel_type']}") - - if self.hotwater["heater_type"] is not None: - self.hot_water_energy_source = heater_type_to_fuel[self.hotwater["heater_type"]] - - if self.hotwater["extra_features"] == "plus solar": - self.hot_water_energy_source = self.heating_energy_source + " + Solar Thermal" - return - elif self.hotwater["system_type"] is not None: - fuel = system_type_modification[self.hotwater["system_type"]] - - if self.hotwater["extra_features"] == "plus solar": - self.hot_water_energy_source = self.heating_energy_source + " + Solar Thermal" - return - - if fuel in ['Main System', "Community Scheme"]: - self.hot_water_energy_source = self.heating_energy_source - elif fuel in ['Secondary System']: - # Check the secondary heating system - secondary_heating = self.data["secondheat-description"] - self.hot_water_energy_source = assumptions.DESCRIPTIONS_TO_FUEL_TYPES[secondary_heating]["fuel"] - else: - raise NotImplementedError(f"Investiage me - unhandled hot water fuel {fuel}") - else: - self.hot_water_energy_source = hotwater_appliance_to_fuel[self.hotwater["appliance"]] - def is_ashp_valid(self, measures): if "air_source_heat_pump" in self.non_invasive_recommendations: diff --git a/backend/README.md b/backend/README.md index 005d6fc4..b8e859c2 100644 --- a/backend/README.md +++ b/backend/README.md @@ -45,12 +45,14 @@ cp .env.example .env ## Running the Application -from within the application you can run with the following command: +from `model/backend/` you can run with the following command: ```commandline uvicorn app.main:app --reload ``` +Or run `sh run_local.sh`, which runs that same uvicorn command. + You application will be available at the designated url ## API Documentation @@ -172,7 +174,7 @@ For instance, if your server is running locally on port 8000, you can use curl to get a dummy token: ```commandline -curl http://localhost:8000/dummy-token +curl http://localhost:8000/local/dummy-token ``` You will receive a response containing the dummy JWT diff --git a/backend/address2UPRN/handler/Dockerfile b/backend/address2UPRN/handler/Dockerfile index 5a09bd44..07159357 100644 --- a/backend/address2UPRN/handler/Dockerfile +++ b/backend/address2UPRN/handler/Dockerfile @@ -1,4 +1,17 @@ FROM public.ecr.aws/lambda/python:3.10 +# FROM python:3.11.10-bullseye + + +ARG DEV_DB_HOST +ARG DEV_DB_PORT +ARG DEV_DB_NAME +ARG EPC_AUTH_TOKEN + +ENV DB_HOST=${DEV_DB_HOST} +ENV DB_PORT=${DEV_DB_PORT} +ENV DB_NAME=${DEV_DB_NAME} +ENV EPC_AUTH_TOKEN=${EPC_AUTH_TOKEN} + # Set working directory (Lambda task root) WORKDIR /var/task @@ -8,13 +21,17 @@ WORKDIR /var/task # ----------------------------- COPY backend/address2UPRN/handler/requirements.txt . + # Install dependencies into Lambda runtime RUN pip install --no-cache-dir -r requirements.txt -# ----------------------------- -# Copy application code -# ----------------------------- + +# Copy necessary files for database and utility imports COPY utils/ utils/ +COPY backend/ backend/ +COPY datatypes/ datatypes/ + +# Copy the handler COPY backend/address2UPRN/main.py . # ----------------------------- diff --git a/backend/address2UPRN/handler/requirements.txt b/backend/address2UPRN/handler/requirements.txt index bc753841..6ef41b2d 100644 --- a/backend/address2UPRN/handler/requirements.txt +++ b/backend/address2UPRN/handler/requirements.txt @@ -1,3 +1,11 @@ -epc-api-python==1.0.2 +pandas==2.2.2 +numpy<2.0 +requests tqdm -pandas \ No newline at end of file +openpyxl +epc-api-python==1.0.2 +boto3==1.35.44 +sqlmodel +sqlalchemy==2.0.36 +psycopg2-binary==2.9.10 +pydantic-settings==2.6.0 \ No newline at end of file diff --git a/backend/address2UPRN/main.py b/backend/address2UPRN/main.py index ba386e0a..af29a095 100644 --- a/backend/address2UPRN/main.py +++ b/backend/address2UPRN/main.py @@ -3,12 +3,23 @@ import os from urllib.parse import urlencode import pandas as pd from difflib import SequenceMatcher -from tqdm import tqdm from utils.logger import setup_logger +import re +from typing import Set +import json +import requests +from uuid import UUID +import uuid +from backend.app.db.functions.tasks.Tasks import SubTaskInterface +from utils.s3 import ( + save_csv_to_s3, + read_csv_from_s3 as read_csv_from_s3_dict, + parse_s3_uri, +) +from datetime import datetime logger = setup_logger() -import re EPC_AUTH_TOKEN = os.getenv( "EPC_AUTH_TOKEN", @@ -17,9 +28,28 @@ EPC_AUTH_TOKEN = os.getenv( if EPC_AUTH_TOKEN is None: raise RuntimeError("EPC_AUTH_TOKEN not defined in env") -import re -from difflib import SequenceMatcher -from typing import Set + +def is_valid_postcode(postcode_clean: str) -> bool: + """ + Validate postcode using postcodes.io. + + Expects a sanitised postcode (e.g. E84SQ). + Returns True if valid, False otherwise. + """ + POSTCODES_IO_VALIDATE_URL = "https://api.postcodes.io/postcodes/{postcode}/validate" + if not postcode_clean: + return False + + try: + resp = requests.get( + POSTCODES_IO_VALIDATE_URL.format(postcode=postcode_clean), + timeout=5, + ) + resp.raise_for_status() + return resp.json().get("result", False) + except requests.RequestException: + # Network issues, rate limits, etc. + return False def levenshtein(a: str, b: str) -> float: @@ -300,27 +330,29 @@ def get_uprn_candidates( ) -def get_uprn(user_inputed_address: str, postcode: str, return_address=False): +def get_uprn_with_epc_df( + user_inputed_address: str, + epc_df: pd.DataFrame, + verbose: bool = False, +): """ - Return uprn (str) - Return False if failed to find a sensible matching epc - Return Nons when epc found but no UPRN + Return uprn (str) using a pre-fetched EPC dataframe. + This avoids calling the API multiple times for the same postcode. """ - df = get_epc_data_with_postcode(postcode=postcode) - - if df.empty: + if epc_df.empty: return None scored_df = get_uprn_candidates( - df, + epc_df, user_address=user_inputed_address, ) # Best score best_score = scored_df.iloc[0]["lexiscore"] - if best_score <= 0: - return None + # # Return None if score is below threshold + # if best_score < 0.7: + # return None # All rank-1 rows (possible draw) top_rank_df = scored_df[scored_df["lexirank"] == 1] @@ -330,18 +362,41 @@ def get_uprn(user_inputed_address: str, postcode: str, return_address=False): return None address = top_rank_df["address"].values[0] - lexiscore = float(top_rank_df["lexiscore"].values[0]) + score = float(top_rank_df["lexiscore"].values[0]) - logger.info(f"Address found to be: {address}, with lexiscore {lexiscore}") + logger.info(f"Address found to be: {address}, with lexiscore {score}") # Safe to return the agreed UPRN found_uprn = top_rank_df.iloc[0]["uprn"] if found_uprn == "": return None - if return_address: - return found_uprn, address - return found_uprn + if verbose: + return (found_uprn, address, score) + else: + return found_uprn + + +def get_uprn( + user_inputed_address: str, + postcode: str, + verbose: bool = False, +): + """ + Return uprn (str) + Return False if failed to find a sensible matching epc + Return None when epc found but no UPRN + + This function fetches EPC data via API for a single postcode. + For processing multiple addresses in the same postcode, use get_uprn_with_epc_df instead. + """ + df = get_epc_data_with_postcode(postcode=postcode) + + return get_uprn_with_epc_df( + user_inputed_address=user_inputed_address, + epc_df=df, + verbose=verbose, + ) def resolve_uprns_for_postcode_group( @@ -424,148 +479,302 @@ def resolve_uprns_for_postcode_group( ) -def test(a, b): - assert a == b, f"erorr: {a}{type(a)} != {b}: {type(b)}" +def save_results_to_s3( + results_df: pd.DataFrame, task_id: str, sub_task_id: str, bucket_name: str = None +) -> bool: + """ + Save results DataFrame to S3 as CSV. + + :param results_df: The DataFrame containing results + :param task_id: The task ID (used for file naming) + :param bucket_name: The S3 bucket name (defaults to env variable) + :return: True if successful, False otherwise + """ + if bucket_name is None: + bucket_name = os.getenv("S3_BUCKET_NAME") + + if not bucket_name: + logger.error( + "S3 bucket name not provided and S3_BUCKET_NAME environment variable not set" + ) + return False + + try: + # Create a filename with the task ID + file_name = f"{datetime.now().isoformat()}_{str(uuid.uuid4())[:8]}" + file_key = f"ara_raw_outputs/{task_id}/{sub_task_id}/{file_name}.csv" + + # Save to S3 + success = save_csv_to_s3(results_df, bucket_name, file_key) + + if success: + logger.info(f"Successfully saved results to s3://{bucket_name}/{file_key}") + return True + else: + logger.error(f"Failed to save results to S3") + return False + + except Exception as e: + logger.error(f"Error saving results to S3: {str(e)}") + return False -def run_all_test(): - # Basic usage with different post codes styles - test(get_epc_data_with_postcode("b93 8sy").shape[0], 63) - test(get_epc_data_with_postcode("B938sy").shape[0], 63) - test(get_epc_data_with_postcode("b93 8Sy").shape[0], 63) - test(get_epc_data_with_postcode("b93 8Sy").shape[0], 63) +def handler(event, context, local=False): + print("=== Address2UPRN Lambda Handler ===") + print(f"Function: {context.function_name}") + print(f"Request ID: {context.aws_request_id}") - test(get_uprn("68", "b93 8sy"), "100070989938") - test(get_uprn("68 Glendon Way", "b93 8sy"), "100070989938") - test(get_uprn("Flat A, 28, Nelgarde Road", "se6 4tf"), "100023278633") - test(get_uprn("28 A", "se6 4tf"), "100023278633") - test(get_uprn("28A", "se6 4tf"), "100023278633") - test(get_uprn("6 Aitken Close", "E8 4SQ"), False) + # Handle local testing + if local is True: + event = { + "Records": [ + { + "body": json.dumps( + { + "task_id": "e31f2f21-175b-4a91-a3ec-a6baa325e917", + "sub_task_id": "6a427b6e-1ece-4983-b1e5-9bffccc53d1d", + "s3_uri": "s3://retrofit-data-dev/ara_postcode_splitter_batches/e31f2f21-175b-4a91-a3ec-a6baa325e917/8673913b-1a88-42d7-8578-0449123d94b0/2026-02-16T12:00:20.257856_7b520c0e.csv", + } + ) + } + ] + } - # unique case - test(get_uprn("Flat 5, 1, Semley Gate", "e9 5nh"), "10008238198") - test(get_uprn("5 , 1 Semley Gate", "e9 5nh"), "10008238198") - test(get_uprn("5 Semley Gate", "e9 5nh"), "10008238198") - test(get_uprn("1, 5 Semley Gate", "e9 5nh"), False) - test( - get_uprn("1 Semley Gate", "e9 5nh"), "10008238188" - ) # this one return "flat 1, in 1 semley gate" - test( - get_uprn("48 Oswald Street", "E5 0BT"), False - ) # this one return "flat 1, in 1 semley gate" - test( - get_uprn("42 Oswald Street", "E5 0BT"), False - ) # this one return "flat 1, in 1 semley gate" - test( - get_uprn("46 Oswald Street", "E5 0BT"), False - ) # this one return "flat 1, in 1 semley gate" - get_uprn_candidates(get_epc_data_with_postcode("e5 0bt"), "48 Oswald Street") - get_uprn_candidates( - get_epc_data_with_postcode("Cr2 7dl"), - "FLAT 3; 42 MORETON ROAD, SOUTH CROYDON, SURREY", - ) + print(f"Event: {json.dumps(event, indent=2, default=str)}") + print("===================================") + # Handle both single event and batch events (SQS, etc.) + records = event.get("Records", [event]) + results = [] + errors = [] + subtask_interface = SubTaskInterface() -if __name__ == "__main__": - INPUT_FILE = "hackney.xlsx" - - ADDRESS_COL = "Address 1" - POSTCODE_COL = "Postcode" - UPRN_COL = "UPRN" - - df = pd.read_excel(INPUT_FILE) - - failures = [] - - for _, row in tqdm( - df.iterrows(), - total=len(df), - desc="Auditing UPRNs", - ): - input_address = str(row[ADDRESS_COL]).strip() - postcode = str(row[POSTCODE_COL]).strip() - - expected_uprn = None if pd.isna(row[UPRN_COL]) else str(int(row[UPRN_COL])) - + for record in records: + task_id = None + subtask_id = None try: - epc_df = get_epc_data_with_postcode(postcode) + # Parse body (inputs) + if isinstance(record.get("body"), str): + body = json.loads(record["body"]) + else: + body = record.get("body", {}) - if epc_df.empty: - failures.append( - { - **row.to_dict(), - "found_uprn": None, - "best_match_uprn": None, - "best_match_address": None, - "best_match_lexiscore": None, - "status": "no_epc_results", - } + # Validate required fields + task_id = body.get("task_id") + subtask_id = body.get("sub_task_id") + s3_uri = body.get("s3_uri") + + if not task_id: + errors.append({"error": "Missing required field: task_id"}) + continue + + if not subtask_id: + errors.append({"error": "Missing required field: sub_task_id"}) + continue + + if not s3_uri: + errors.append({"error": "Missing required field: s3_uri"}) + continue + + # Convert task_id to UUID + try: + task_id = UUID(task_id) if isinstance(task_id, str) else task_id + except ValueError as e: + errors.append({"error": f"Invalid UUID format for task_id: {str(e)}"}) + continue + + # Convert sub_task_id to UUID + try: + subtask_id = ( + UUID(subtask_id) if isinstance(subtask_id, str) else subtask_id + ) + except ValueError as e: + errors.append( + {"error": f"Invalid UUID format for sub_task_id: {str(e)}"} ) continue - scored_df = get_uprn_candidates( - epc_df, - user_address=input_address, - ) + # Update existing subtask to 'in progress' + subtask_interface.update_subtask_status(subtask_id, "in progress") + logger.info(f"Processing subtask {subtask_id} for task {task_id}") - best_row = scored_df.iloc[0] + # Parse S3 URI and read CSV from S3 + logger.info(f"Reading data from S3: {s3_uri}") + try: + bucket, key = parse_s3_uri(s3_uri) + csv_data = read_csv_from_s3_dict(bucket, key) + df = pd.DataFrame(csv_data) + logger.info(f"Loaded {len(df)} rows from S3") + except Exception as s3_error: + logger.error(f"Failed to read data from S3: {s3_error}") + errors.append( + {"error": "Failed to read data from S3", "details": str(s3_error)} + ) + try: + subtask_interface.update_subtask_status( + subtask_id, "failed", outputs={"error": str(s3_error)} + ) + except Exception as db_error: + logger.error(f"Failed to update subtask status: {db_error}") + continue - best_match_uprn = str(best_row["uprn"]) - best_match_address = best_row["address"] - best_match_lexiscore = round(float(best_row["lexiscore"]), 4) + # Process the rows + logger.info(f"Processing {len(df)} rows for task {task_id}") - found_uprn = get_uprn(input_address, postcode) + # Create user_input column by concatenating Address columns if not already present + if "user_input" not in df.columns: + df["user_input"] = ( + df["Address 1"].fillna("") + + " " + + df["Address 2"].fillna("") + + " " + + df["Address 3"].fillna("") + ).str.strip() + logger.info(f"Created user_input column from Address 1 and Address 2") + else: + logger.info(f"user_input column already present in data") + + clean_df = df.dropna(subset=["postcode_clean"]) + + postcode_to_addresses = { + postcode: group.to_dict(orient="records") + for postcode, group in clean_df.groupby("postcode_clean", sort=False) + } + + logger.info(f"Total postcodes: {len(postcode_to_addresses)}") + + # Process each postcode group + + results_data = [] + + for postcode, postcode_rows in postcode_to_addresses.items(): + logger.info( + f"Processing postcode: {postcode} with {len(postcode_rows)} rows" + ) + + # Validate postcode before processing + if not is_valid_postcode(postcode): + logger.warning(f"Postcode {postcode} is invalid, skipping") + continue + + # Fetch EPC data once per postcode + try: + epc_df = get_epc_data_with_postcode(postcode=postcode) + logger.info( + f"Fetched {len(epc_df)} EPC records for postcode {postcode}" + ) + except Exception as e: + logger.error( + f"Failed to fetch EPC data for postcode {postcode}: {e}" + ) + continue + + # Process each address in this postcode with the same EPC data + for row in postcode_rows: + try: + user_input = row.get("user_input", "") + if not user_input: + logger.warning( + f"Skipping row with missing user_input for postcode {postcode}" + ) + continue + + # Get UPRN using the pre-fetched EPC data with all return options + result = get_uprn_with_epc_df( + user_inputed_address=user_input, epc_df=epc_df, verbose=True + ) + + # Parse result tuple if successful + if result: + uprn, found_address, score = result + logger.info( + f"Found UPRN for {user_input} in {postcode}: {uprn} (score: {score})" + ) + + results_data.append( + { + **row, # Include all original data + "uprn": uprn, + "domna_found_address": found_address, + "domna_lexiscore": score, + } + ) + else: + logger.warning( + f"No UPRN found for {user_input} in {postcode}" + ) + results_data.append( + { + **row, # Include all original data + "uprn": None, + "domna_found_address": None, + "domna_lexiscore": None, + } + ) + + except Exception as e: + logger.error( + f"Error processing address {row.get('user_input', 'unknown')}: {e}" + ) + # Still add the row with error markers + results_data.append( + { + **row, + "uprn": None, + "domna_found_address": None, + "domna_lexiscore": None, + "error": str(e), + } + ) + continue + + # Create results DataFrame + result_df = pd.DataFrame(results_data) + + # Save results to S3 + try: + save_results_to_s3(result_df, str(task_id), str(subtask_id)) + except Exception as s3_error: + logger.error(f"Failed to save results to S3: {s3_error}") + + # Mark subtask as completed + try: + subtask_interface.update_subtask_status( + subtask_id, + "completed", + outputs={"rows_processed": "todo -> show sensible output"}, + ) + logger.info(f"Marked subtask {subtask_id} as completed") + except Exception as db_error: + logger.error(f"Failed to mark subtask as completed: {db_error}") except Exception as e: - failures.append( - { - **row.to_dict(), - "found_uprn": None, - "best_match_uprn": None, - "best_match_address": None, - "best_match_lexiscore": None, - "status": "exception", - "error": str(e), - } - ) - continue + logger.error(f"Unexpected error processing record: {e}", exc_info=True) + errors.append({"error": "Unexpected error", "details": str(e)}) + # Mark subtask as failed if we have one + if subtask_id: + try: + subtask_interface.update_subtask_status( + subtask_id, "failed", outputs={"error": str(e)} + ) + except Exception as db_error: + logger.error(f"Failed to update subtask status: {db_error}") - found_uprn_norm = None if not found_uprn else str(found_uprn) + # Return error if all records failed + logger.info(results_data) + logger.info(results) + if errors and not results: + return {"statusCode": 500, "body": json.dumps({"errors": errors})} - if found_uprn_norm != expected_uprn: - failures.append( - { - **row.to_dict(), - "found_uprn": found_uprn_norm, - "best_match_uprn": best_match_uprn, - "best_match_address": best_match_address, - "best_match_lexiscore": best_match_lexiscore, - "status": ("no_match" if found_uprn_norm is None else "mismatch"), - } - ) - - failures_df = pd.DataFrame(failures) - - print("===================================") - print(f"Total rows : {len(df)}") - print(f"Failures : {len(failures_df)}") - print("===================================") - - failures_df.to_excel( - "hackney_uprn_failures.xlsx", - index=False, - ) + return { + "statusCode": 200, + "body": json.dumps( + {"processed": results, "errors": errors if errors else None} + ), + } -def handler(event, context): - print("hello world") - return {"statusCode": 200, "body": "hello world"} - - -# TO do function dispatcher, - -# get_uprn_candidates(get_epc_data_with_postcode("E9 5NH"),"Flat 1, 5 Semley Gate" and Flat 5, 1 Semley Gate) -# fix that -# Look again at flat 1 -# pandas reader the seperate postcode_splitter -# dump into s3 +# TODO: +# Don't add results to return messages as its too verbose +# capture the exepection as e, into s3, to find the logs go to s3 +# Upload results to s3 as well as csv diff --git a/backend/address2UPRN/script.py b/backend/address2UPRN/script.py index a71b5827..090ac5ae 100644 --- a/backend/address2UPRN/script.py +++ b/backend/address2UPRN/script.py @@ -1,3 +1,5 @@ +# one time script for a customer forhousing + import pandas as pd from tqdm import tqdm from backend.address2UPRN.main import get_uprn @@ -5,20 +7,35 @@ from backend.address2UPRN.main import get_uprn # Enable tqdm for pandas tqdm.pandas() -df = pd.read_excel("address2.xlsx") +file_name = "forhousing.xlsx" + +df = pd.read_excel(file_name) def extract_uprn(row): - print(row["User Input"], row["Postcode"]) - result = get_uprn(row["User Input"], row["Postcode"], return_address=True) + user_input = "Address" + postcode = "Postcode" + result = get_uprn( + row[user_input], + row[postcode], + return_address=True, + return_EPC=True, + return_score=True, + ) if result is None: - return pd.Series([None, None]) + return pd.Series([None, None, None, None]) - uprn, found_address = result - return pd.Series([uprn, found_address]) + uprn, found_address, epc, score = result + return pd.Series([uprn, found_address, epc, score]) -df[["juntes uprn", "junte found address"]] = df.progress_apply(extract_uprn, axis=1) +df[["juntes uprn", "junte found address", "junte found epc", "junte score"]] = ( + df.progress_apply(extract_uprn, axis=1) +) -df.to_excel("outputs2.xlsx", index=False) +df.to_excel(f"{file_name}_outputs.xlsx", index=False) + +# TODO: add lexiscore +# TODO: run it +# TODO: give it to danny diff --git a/backend/app/config.py b/backend/app/config.py index 41552ae5..26fb6b8b 100644 --- a/backend/app/config.py +++ b/backend/app/config.py @@ -1,54 +1,67 @@ import os from functools import lru_cache +from pathlib import Path from pydantic_settings import BaseSettings, SettingsConfigDict from typing import Optional +from utils.logger import setup_logger + +logger = setup_logger() + def resolve_env_file() -> Optional[str]: env = os.getenv("ENVIRONMENT", "local") + backend_dir = Path(__file__).resolve().parents[1] + if env == "local": - return "backend/.env" + env_file = backend_dir / ".env" + print("USING ENV FILE:", env_file) + logger.debug("USING ENV FILE:", env_file) + return str(env_file) if env == "test": - return "backend/.env.test" + env_file = backend_dir / ".env.test" + logger.debug("USING ENV FILE:", env_file) + return str(env_file) # prod = no env file return None class Settings(BaseSettings): - API_KEY: str + API_KEY: str = "changeme" API_KEY_NAME: str = "X-API-KEY" - SECRET_KEY: str - ENVIRONMENT: str - DATA_BUCKET: str + SECRET_KEY: str = "changeme" + ENVIRONMENT: str = "changeme" + DATA_BUCKET: str = "changeme" PLAN_TRIGGER_BUCKET: str - ENGINE_SQS_URL: str + ENGINE_SQS_URL: str = "changeme" + CATEGORISATION_SQS_URL: str = "changeme" # Third parties - EPC_AUTH_TOKEN: str - GOOGLE_SOLAR_API_KEY: str + EPC_AUTH_TOKEN: str = "changeme" + GOOGLE_SOLAR_API_KEY: str = "changeme" # Database settings - DB_HOST: str - DB_PASSWORD: str - DB_USERNAME: str - DB_PORT: str - DB_NAME: str + DB_HOST: str = "changeme" + DB_PASSWORD: str = "changeme" + DB_USERNAME: str = "changeme" + DB_PORT: str = "changeme" + DB_NAME: str = "changeme" # Prediction buckets - SAP_PREDICTIONS_BUCKET: str - CARBON_PREDICTIONS_BUCKET: str - HEAT_PREDICTIONS_BUCKET: str + SAP_PREDICTIONS_BUCKET: str = "changeme" + CARBON_PREDICTIONS_BUCKET: str = "changeme" + HEAT_PREDICTIONS_BUCKET: str = "changeme" # LIGHTING_COST_PREDICTIONS_BUCKET: str # HEATING_COST_PREDICTIONS_BUCKET: str # HOT_WATER_COST_PREDICTIONS_BUCKET: str - HEATING_KWH_PREDICTIONS_BUCKET: str - HOTWATER_KWH_PREDICTIONS_BUCKET: str + HEATING_KWH_PREDICTIONS_BUCKET: str = "changeme" + HOTWATER_KWH_PREDICTIONS_BUCKET: str = "changeme" # Other S3 buckts - ENERGY_ASSESSMENTS_BUCKET: str + ENERGY_ASSESSMENTS_BUCKET: str = "changeme" # Optional AWS creds (only required in local) AWS_ACCESS_KEY_ID: Optional[str] = None diff --git a/backend/app/db/base.py b/backend/app/db/base.py new file mode 100644 index 00000000..fa2b68a5 --- /dev/null +++ b/backend/app/db/base.py @@ -0,0 +1,5 @@ +from sqlalchemy.orm import DeclarativeBase + + +class Base(DeclarativeBase): + pass diff --git a/backend/app/db/functions/portfolio_functions.py b/backend/app/db/functions/portfolio_functions.py index fa97c206..ae48afed 100644 --- a/backend/app/db/functions/portfolio_functions.py +++ b/backend/app/db/functions/portfolio_functions.py @@ -1,5 +1,10 @@ from sqlalchemy import func -from backend.app.db.models.recommendations import Plan, PlanRecommendations, Recommendation, Scenario +from backend.app.db.models.recommendations import ( + PlanModel, + PlanRecommendations, + Recommendation, + ScenarioModel, +) def aggregate_portfolio_recommendations( @@ -8,7 +13,7 @@ def aggregate_portfolio_recommendations( scenario_id: int, total_valuation_increase: float, labour_days: float, - aggregated_data: dict + aggregated_data: dict, ): # Aggregate multiple fields aggregates = ( @@ -16,15 +21,20 @@ def aggregate_portfolio_recommendations( func.sum(Recommendation.estimated_cost).label("cost"), func.sum(Recommendation.total_work_hours).label("total_work_hours"), func.sum(Recommendation.kwh_savings).label("energy_savings"), - func.sum(Recommendation.co2_equivalent_savings).label("co2_equivalent_savings"), + func.sum(Recommendation.co2_equivalent_savings).label( + "co2_equivalent_savings" + ), func.sum(Recommendation.energy_cost_savings).label("energy_cost_savings"), ) - .join(PlanRecommendations, PlanRecommendations.recommendation_id == Recommendation.id) - .join(Plan, Plan.id == PlanRecommendations.plan_id) + .join( + PlanRecommendations, + PlanRecommendations.recommendation_id == Recommendation.id, + ) + .join(PlanModel, PlanModel.id == PlanRecommendations.plan_id) .filter( - Plan.portfolio_id == portfolio_id, - Plan.scenario_id == scenario_id, - Recommendation.default == True + PlanModel.portfolio_id == portfolio_id, + PlanModel.scenario_id == scenario_id, + Recommendation.default == True, ) .one() ) @@ -36,11 +46,11 @@ def aggregate_portfolio_recommendations( "energy_savings": aggregates.energy_savings or 0, "co2_equivalent_savings": aggregates.co2_equivalent_savings or 0, "energy_cost_savings": aggregates.energy_cost_savings or 0, - **aggregated_data + **aggregated_data, } # Get the scenario and update the fields. This data needs to be stored against the scenario, not the portfolio - portfolio_scenario = session.query(Scenario).filter_by(id=scenario_id).one() + portfolio_scenario = session.query(ScenarioModel).filter_by(id=scenario_id).one() # Update the data for key, value in aggregates_dict.items(): diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py index 51562f55..ed3fb435 100644 --- a/backend/app/db/functions/recommendations_functions.py +++ b/backend/app/db/functions/recommendations_functions.py @@ -1,17 +1,42 @@ -from sqlalchemy import text -from sqlalchemy import insert, delete -from sqlalchemy.orm import Session +from typing import Any, Dict, List, Optional +from sqlalchemy import ( + ColumnElement, + and_, + func, + inspect, + text, + insert, + delete, + select, +) +from sqlalchemy.orm import Session, Mapper from sqlalchemy.exc import SQLAlchemyError +from sqlmodel import Session + from backend.app.db.models.recommendations import ( - Plan, Recommendation, RecommendationMaterials, PlanRecommendations, Scenario + PlanModel, + Recommendation, + RecommendationMaterials, + PlanRecommendations, + ScenarioModel, ) from backend.app.db.models.portfolio import PropertyModel from backend.app.db.connection import db_session, db_read_session def prepare_plan_data( - p, body, scenario_id, eco_packages, valuations, new_sap_points, new_epc, default_recommendations, - rebaselining_carbon=0, rebaselining_heat_demand=0, rebaselining_kwh=0, rebaselining_bills=0, + p, + body, + scenario_id, + eco_packages, + valuations, + new_sap_points, + new_epc, + default_recommendations, + rebaselining_carbon=0, + rebaselining_heat_demand=0, + rebaselining_kwh=0, + rebaselining_bills=0, ): """ Utility function to prepare the data that goes into the production of a plan. Is a fairly rough and unstructured @@ -32,21 +57,37 @@ def prepare_plan_data( """ # Plan carbon savings co2_savings = sum( - [r["co2_equivalent_savings"] for r in default_recommendations if not r.get("already_installed", False)] + [ + r["co2_equivalent_savings"] + for r in default_recommendations + if not r.get("already_installed", False) + ] ) post_co2_emissions = p.energy["co2_emissions"] - rebaselining_carbon - co2_savings # Plan bill savings energy_bill_savings = sum( - [r["energy_cost_savings"] for r in default_recommendations if not r.get("already_installed", False)] + [ + r["energy_cost_savings"] + for r in default_recommendations + if not r.get("already_installed", False) + ] + ) + post_energy_bill = ( + sum(p.current_energy_bill.values()) - rebaselining_bills - energy_bill_savings ) - post_energy_bill = sum(p.current_energy_bill.values()) - rebaselining_bills - energy_bill_savings # energy consumption energy_consumption_savings = sum( - [r["kwh_savings"] for r in default_recommendations if not r.get("already_installed", False)] + [ + r["kwh_savings"] + for r in default_recommendations + if not r.get("already_installed", False) + ] + ) + post_energy_consumption = ( + p.current_energy_consumption - rebaselining_kwh - energy_consumption_savings ) - post_energy_consumption = p.current_energy_consumption - rebaselining_kwh - energy_consumption_savings valuation_post_retrofit, valuation_increase = None, None if valuations["current_value"]: @@ -54,9 +95,19 @@ def prepare_plan_data( valuation_post_retrofit = valuations["average_increased_value"] # plan costing data - cost_of_works = sum([r["total"] for r in default_recommendations if not r.get("already_installed", False)]) + cost_of_works = sum( + [ + r["total"] + for r in default_recommendations + if not r.get("already_installed", False) + ] + ) contingency_cost = sum( - [r.get("contingency", 0) for r in default_recommendations if not r.get("already_installed", False)] + [ + r.get("contingency", 0) + for r in default_recommendations + if not r.get("already_installed", False) + ] ) return { @@ -86,7 +137,7 @@ def prepare_plan_data( "valuation_increase": valuation_increase, "cost_of_works": float(cost_of_works), "contingency_cost": float(contingency_cost), - "plan_type": eco_packages.get(p.id, (None, None, None))[2] + "plan_type": eco_packages.get(p.id, (None, None, None))[2], } @@ -97,7 +148,7 @@ def create_plan(session: Session, plan): :param plan: dictionary of data representing a plan to be created """ try: - new_plan = Plan(**plan) + new_plan = PlanModel(**plan) session.add(new_plan) session.flush() session.commit() @@ -120,9 +171,7 @@ def bulk_create_plans(session: Session, plans_to_create: list[dict]) -> dict[int ] stmt = ( - insert(Plan) - .values(payload) - .returning(Plan.id, Plan.property_id) + insert(PlanModel).values(payload).returning(PlanModel.id, PlanModel.property_id) ) result = session.execute(stmt).all() @@ -133,14 +182,14 @@ def bulk_create_plans(session: Session, plans_to_create: list[dict]) -> dict[int def create_scenario(session: Session, scenario: dict) -> int: existing_scenario = ( - session.query(Scenario) + session.query(ScenarioModel) .filter_by(portfolio_id=scenario["portfolio_id"]) .first() ) scenario["is_default"] = not bool(existing_scenario) - new_scenario = Scenario(**scenario) + new_scenario = ScenarioModel(**scenario) session.add(new_scenario) session.flush() # ensures ID is populated @@ -167,7 +216,9 @@ def create_recommendation(session: Session, recommendation): raise e -def create_recommendation_material(session: Session, recommendation_id, material_id, depth): +def create_recommendation_material( + session: Session, recommendation_id, material_id, depth +): """ This function will create a record for the recommendation_material in the database if it does not exist. :param session: The databse session @@ -177,9 +228,7 @@ def create_recommendation_material(session: Session, recommendation_id, material """ new_recommendation_material = RecommendationMaterials( - recommendation_id=recommendation_id, - material_id=material_id, - depth=depth + recommendation_id=recommendation_id, material_id=material_id, depth=depth ) session.add(new_recommendation_material) session.flush() @@ -196,13 +245,17 @@ def create_plan_recommendations(session: Session, plan_id, recommendation_ids): """ # Prepare a list of dictionaries for bulk insert - data = [{"plan_id": plan_id, "recommendation_id": rid} for rid in recommendation_ids] + data = [ + {"plan_id": plan_id, "recommendation_id": rid} for rid in recommendation_ids + ] # Bulk insert using SQLAlchemy's core API session.execute(insert(PlanRecommendations).values(data)) -def upload_recommendations(session: Session, recommendations_to_upload, property_id, new_plan_id): +def upload_recommendations( + session: Session, recommendations_to_upload, property_id, new_plan_id +): try: # Prepare data for bulk insert for Recommendation recommendations_data = [ @@ -213,8 +266,14 @@ def upload_recommendations(session: Session, recommendations_to_upload, property "description": rec["description"], "estimated_cost": float(rec["total"]), "default": rec["default"], - "starting_u_value": float(rec.get("starting_u_value")) if rec.get("starting_u_value") else None, - "new_u_value": float(rec.get("new_u_value")) if rec.get("new_u_value") else None, + "starting_u_value": ( + float(rec.get("starting_u_value")) + if rec.get("starting_u_value") + else None + ), + "new_u_value": ( + float(rec.get("new_u_value")) if rec.get("new_u_value") else None + ), "sap_points": float(rec["sap_points"]), "energy_savings": float(rec["heat_demand"]), "kwh_savings": float(rec["kwh_savings"]), @@ -223,13 +282,17 @@ def upload_recommendations(session: Session, recommendations_to_upload, property "energy_cost_savings": float(rec["energy_cost_savings"]), "labour_days": float(rec["labour_days"]), "already_installed": rec["already_installed"], - "heat_demand": float(rec["heat_demand"]) + "heat_demand": float(rec["heat_demand"]), } for rec in recommendations_to_upload ] # Insert the recommendations, get back the IDs - stmt = insert(Recommendation).returning(Recommendation.id).values(recommendations_data) + stmt = ( + insert(Recommendation) + .returning(Recommendation.id) + .values(recommendations_data) + ) result = session.execute(stmt) uploaded_recommendation_ids = [row[0] for row in result] @@ -243,11 +306,15 @@ def upload_recommendations(session: Session, recommendations_to_upload, property "quantity_unit": part.get("quantity_unit", None), "estimated_cost": float(part.get("total", part.get("total_cost"))), } - for rec, recommendation_id in zip(recommendations_to_upload, uploaded_recommendation_ids) + for rec, recommendation_id in zip( + recommendations_to_upload, uploaded_recommendation_ids + ) for part in rec["parts"] ] - session.bulk_insert_mappings(RecommendationMaterials, recommendation_materials_data) + session.bulk_insert_mappings( + RecommendationMaterials, recommendation_materials_data + ) # flush the changes to get the newly created IDs session.flush() @@ -283,25 +350,27 @@ def bulk_upload_recommendations_and_materials( plan_ids_by_index = [] for rec in recommendation_payload: - recommendation_rows.append({ - "property_id": rec["property_id"], - "type": rec["type"], - "measure_type": rec["measure_type"], - "description": rec["description"], - "estimated_cost": rec["estimated_cost"], - "default": rec["default"], - "starting_u_value": rec["starting_u_value"], - "new_u_value": rec["new_u_value"], - "sap_points": rec["sap_points"], - "heat_demand": rec["heat_demand"], - "kwh_savings": rec["kwh_savings"], - "co2_equivalent_savings": rec["co2_equivalent_savings"], - "energy_savings": rec["energy_savings"], - "energy_cost_savings": rec["energy_cost_savings"], - "total_work_hours": rec["total_work_hours"], - "labour_days": rec["labour_days"], - "already_installed": rec["already_installed"], - }) + recommendation_rows.append( + { + "property_id": rec["property_id"], + "type": rec["type"], + "measure_type": rec["measure_type"], + "description": rec["description"], + "estimated_cost": rec["estimated_cost"], + "default": rec["default"], + "starting_u_value": rec["starting_u_value"], + "new_u_value": rec["new_u_value"], + "sap_points": rec["sap_points"], + "heat_demand": rec["heat_demand"], + "kwh_savings": rec["kwh_savings"], + "co2_equivalent_savings": rec["co2_equivalent_savings"], + "energy_savings": rec["energy_savings"], + "energy_cost_savings": rec["energy_cost_savings"], + "total_work_hours": rec["total_work_hours"], + "labour_days": rec["labour_days"], + "already_installed": rec["already_installed"], + } + ) parts_by_index.append(rec["parts"]) plan_ids_by_index.append(rec["plan_id"]) @@ -310,9 +379,7 @@ def bulk_upload_recommendations_and_materials( # 2. Insert recommendations and get IDs # --------------------------------------------------------- result = session.execute( - insert(Recommendation) - .values(recommendation_rows) - .returning(Recommendation.id) + insert(Recommendation).values(recommendation_rows).returning(Recommendation.id) ) recommendation_ids = [row[0] for row in result] @@ -324,19 +391,19 @@ def bulk_upload_recommendations_and_materials( for recommendation_id, parts in zip(recommendation_ids, parts_by_index): for part in parts: - materials_rows.append({ - "recommendation_id": recommendation_id, - "material_id": part["material_id"], - "depth": part["depth"], - "quantity": part["quantity"], - "quantity_unit": part["quantity_unit"], - "estimated_cost": part["estimated_cost"], - }) + materials_rows.append( + { + "recommendation_id": recommendation_id, + "material_id": part["material_id"], + "depth": part["depth"], + "quantity": part["quantity"], + "quantity_unit": part["quantity_unit"], + "estimated_cost": part["estimated_cost"], + } + ) if materials_rows: - session.execute( - insert(RecommendationMaterials).values(materials_rows) - ) + session.execute(insert(RecommendationMaterials).values(materials_rows)) # --------------------------------------------------------- # 4. Insert plan ↔ recommendation links @@ -346,26 +413,22 @@ def bulk_upload_recommendations_and_materials( "plan_id": plan_id, "recommendation_id": recommendation_id, } - for plan_id, recommendation_id in zip( - plan_ids_by_index, recommendation_ids - ) + for plan_id, recommendation_id in zip(plan_ids_by_index, recommendation_ids) ] - session.execute( - insert(PlanRecommendations).values(plan_recommendation_rows) - ) + session.execute(insert(PlanRecommendations).values(plan_recommendation_rows)) def chunked(iterable, size=100): for i in range(0, len(iterable), size): - yield iterable[i:i + size] + yield iterable[i : i + size] def get_property_ids(portfolio_id: int) -> list[int]: with db_read_session() as session: return [ - pid for (pid,) in - session.query(PropertyModel.id) + pid + for (pid,) in session.query(PropertyModel.id) .filter(PropertyModel.portfolio_id == portfolio_id) .all() ] @@ -381,12 +444,14 @@ def delete_property_batch(session: Session, property_ids: list[int]): # recommendation_materials (via recommendation) # -------------------------------------------------- session.execute( - text(""" + text( + """ DELETE FROM recommendation_materials rm USING recommendation r WHERE rm.recommendation_id = r.id AND r.property_id = ANY(:property_ids) - """), + """ + ), params, ) @@ -394,12 +459,14 @@ def delete_property_batch(session: Session, property_ids: list[int]): # plan_recommendations (via plan) # -------------------------------------------------- session.execute( - text(""" + text( + """ DELETE FROM plan_recommendations pr USING plan p WHERE pr.plan_id = p.id AND p.property_id = ANY(:property_ids) - """), + """ + ), params, ) @@ -407,13 +474,15 @@ def delete_property_batch(session: Session, property_ids: list[int]): # funding_package_measures # -------------------------------------------------- session.execute( - text(""" + text( + """ DELETE FROM funding_package_measures fpm USING funding_package fp, plan p WHERE fpm.funding_package_id = fp.id AND fp.plan_id = p.id AND p.property_id = ANY(:property_ids) - """), + """ + ), params, ) @@ -421,10 +490,12 @@ def delete_property_batch(session: Session, property_ids: list[int]): # inspections (direct) # -------------------------------------------------- session.execute( - text(""" + text( + """ DELETE FROM inspections WHERE property_id = ANY(:property_ids) - """), + """ + ), params, ) @@ -432,12 +503,14 @@ def delete_property_batch(session: Session, property_ids: list[int]): # funding_package # -------------------------------------------------- session.execute( - text(""" + text( + """ DELETE FROM funding_package fp USING plan p WHERE fp.plan_id = p.id AND p.property_id = ANY(:property_ids) - """), + """ + ), params, ) @@ -445,10 +518,12 @@ def delete_property_batch(session: Session, property_ids: list[int]): # recommendation (direct — CRITICAL FIX) # -------------------------------------------------- session.execute( - text(""" + text( + """ DELETE FROM recommendation WHERE property_id = ANY(:property_ids) - """), + """ + ), params, ) @@ -456,10 +531,12 @@ def delete_property_batch(session: Session, property_ids: list[int]): # plan (direct) # -------------------------------------------------- session.execute( - text(""" + text( + """ DELETE FROM plan WHERE property_id = ANY(:property_ids) - """), + """ + ), params, ) @@ -467,18 +544,22 @@ def delete_property_batch(session: Session, property_ids: list[int]): # property-scoped tables # -------------------------------------------------- session.execute( - text(""" + text( + """ DELETE FROM property_details_epc WHERE property_id = ANY(:property_ids) - """), + """ + ), params, ) session.execute( - text(""" + text( + """ DELETE FROM property_targets WHERE property_id = ANY(:property_ids) - """), + """ + ), params, ) @@ -486,10 +567,12 @@ def delete_property_batch(session: Session, property_ids: list[int]): # properties LAST # -------------------------------------------------- session.execute( - text(""" + text( + """ DELETE FROM property WHERE id = ANY(:property_ids) - """), + """ + ), params, ) @@ -510,8 +593,7 @@ def delete_portfolio_scenarios_if_empty(portfolio_id: int): with db_session() as session: session.execute( - delete(Scenario) - .where(Scenario.portfolio_id == portfolio_id) + delete(ScenarioModel).where(ScenarioModel.portfolio_id == portfolio_id) ) print("Deleted scenarios for empty portfolio") @@ -530,6 +612,7 @@ def clear_portfolio_in_batches( total = (len(property_ids) + property_batch_size - 1) // property_batch_size import time + for i, batch in enumerate(chunked(property_ids, property_batch_size), start=1): print(f"Deleting batch {i}/{total} ({len(batch)} properties)") start_time = time.time() @@ -542,3 +625,163 @@ def clear_portfolio_in_batches( delete_portfolio_scenarios_if_empty(portfolio_id) print("Portfolio cleared in batches.") + + +def get_plans_by_scenario_ids(ids: List[int]) -> List[PlanModel]: + stmt = select(PlanModel).where(PlanModel.scenario_id.in_(ids)) + with db_read_session() as session: + session_any: Any = session # Typehint as Any to satisfy Pylance... + return session_any.exec(stmt).scalars().all() + + +def get_most_recent_plans_by_portfolio_id( + portfolio_id: int, + min_property_id: Optional[int] = None, + max_property_id: Optional[int] = None, +) -> List[PlanModel]: + filters = [PlanModel.portfolio_id == portfolio_id] + + if min_property_id is not None: + filters.append(PlanModel.property_id >= min_property_id) + if max_property_id is not None: + filters.append(PlanModel.property_id <= max_property_id) + + # NOTE: This statement works for Postgres only, because of the Distinct + stmt = ( + select(PlanModel) + .where(and_(*filters)) + .distinct( + PlanModel.property_id, PlanModel.scenario_id + ) # one plan per property per scenario + .order_by( + PlanModel.property_id, + PlanModel.scenario_id, + PlanModel.created_at.desc(), + PlanModel.id.desc(), + ) + ) + with db_read_session() as session: + session_any: Any = session # Typehint as Any to satisfy Pylance... + return session_any.exec(stmt).scalars().all() + + +def get_most_recent_plans_by_scenario_ids( + scenario_ids: List[int], + min_property_id: Optional[int] = None, + max_property_id: Optional[int] = None, +) -> List[PlanModel]: + if not scenario_ids: + return [] + + # Base filter: scenario_id in provided list + filters: List[ColumnElement[bool]] = [PlanModel.scenario_id.in_(scenario_ids)] + + # Add optional property ID range filters + if min_property_id is not None: + filters.append(PlanModel.property_id >= min_property_id) + if max_property_id is not None: + filters.append(PlanModel.property_id <= max_property_id) + + # NOTE: This statement works for Postgres only, because of the Distinct + stmt = ( + select(PlanModel) + .where(and_(*filters)) + .distinct( + PlanModel.property_id, PlanModel.scenario_id + ) # one plan per property per scenario + .order_by( + PlanModel.property_id, + PlanModel.scenario_id, + PlanModel.created_at.desc(), + PlanModel.id.desc(), + ) + ) + + with db_read_session() as session: + session_any: Any = session # Typehint as Any to satisfy Pylance + return session_any.exec(stmt).scalars().all() + + +def get_scenarios_by_portfolio_id(portfolio_id: int) -> List[ScenarioModel]: + stmt = select(ScenarioModel).where(ScenarioModel.portfolio_id == portfolio_id) + with db_read_session() as session: + session_any: Any = session # Typehint as Any to satisfy Pylance... + return session_any.exec(stmt).scalars().all() + + +def get_scenarios_count_by_portfolio_id(portfolio_id: int) -> int: + stmt = ( + select(func.count()) + .select_from(ScenarioModel) + .where(ScenarioModel.portfolio_id == portfolio_id) + ) + with db_read_session() as session: + session_any: Any = session # Typehint as Any to satisfy Pylance... + return session_any.exec(stmt).scalar_one() + + +def get_default_plans( + portfolio_id: int, + min_property_id: Optional[int] = None, + max_property_id: Optional[int] = None, +) -> List[PlanModel]: + filters: List[ColumnElement[bool]] = [ + PlanModel.portfolio_id == portfolio_id, + PlanModel.is_default.is_(True), + ] + + if min_property_id is not None: + filters.append(PlanModel.property_id >= min_property_id) + if max_property_id is not None: + filters.append(PlanModel.property_id <= max_property_id) + + stmt = select(PlanModel).where(and_(*filters)) + + with db_read_session() as session: + session_any: Any = session # Typehint as Any to satisfy Pylance... + plans: List[PlanModel] = session_any.exec(stmt).scalars().all() + return plans + + +def bulk_update_plans( + plan_models: List[PlanModel], + scenario_models: List[ScenarioModel], +) -> int: + if not plan_models: + return 0 + + with db_read_session() as session: + + plan_mapper: Mapper[Any] = inspect(PlanModel) + scenario_mapper: Mapper[Any] = inspect(ScenarioModel) + + plan_mappings: List[Dict[str, Any]] = ( + [] + ) # Typehint as Any to satisfy Pylance... + for plan in plan_models: + data: Dict[str, Any] = { + c.name: getattr(plan, c.name) + for c in plan.__table__.columns + if c.name != "id" + } + data["id"] = plan.id + plan_mappings.append(data) + + session.bulk_update_mappings(plan_mapper, plan_mappings) + + scenario_mappings: List[Dict[str, Any]] = ( + [] + ) # Typehint as Any to satisfy Pylance... + for scenario in scenario_models: + data: Dict[str, Any] = { + c.name: getattr(scenario, c.name) + for c in scenario.__table__.columns + if c.name not in {"id", "portfolio_id"} + } + data["id"] = scenario.id + scenario_mappings.append(data) + + session.bulk_update_mappings(scenario_mapper, scenario_mappings) + + session.commit() + return len(plan_models) diff --git a/backend/app/db/functions/tasks/Tasks.py b/backend/app/db/functions/tasks/Tasks.py index d1ab9536..7ba3dd35 100644 --- a/backend/app/db/functions/tasks/Tasks.py +++ b/backend/app/db/functions/tasks/Tasks.py @@ -11,7 +11,7 @@ from sqlmodel import Session, select from backend.app.db.connection import get_db_session # ---- Models ---- -from backend.app.db.models.tasks import Task, SubTask +from backend.app.db.models.tasks import SourceEnum, Task, SubTask # ============================================================ @@ -25,7 +25,12 @@ class SubTaskInterface: # -------------------------------------------------------- # CREATE SUBTASK # -------------------------------------------------------- - def create_subtask(self, task_id: UUID, inputs: Optional[Dict[str, Any]] = None, status=None): + def create_subtask( + self, + task_id: UUID, + inputs: Optional[Dict[str, Any]] = None, + status: Optional[str] = None, + ): now = datetime.now(timezone.utc) with get_db_session() as session: @@ -56,8 +61,12 @@ class SubTaskInterface: # UPDATE STATUS (in progress, complete, failed) # -------------------------------------------------------- def update_subtask_status( - self, subtask_id: UUID, status: str, outputs=None, cloud_logs_url=None - ): + self, + subtask_id: UUID, + status: str, + outputs: Optional[Dict[str, str]] = None, + cloud_logs_url: Optional[str] = None, + ) -> SubTask: """ Update the status of a subtask, and recalculate the parent task progress. :param subtask_id: UUID of the subtask to update @@ -177,9 +186,7 @@ class SubTaskInterface: if not task: return - subtasks = session.exec( - select(SubTask).where(SubTask.task_id == task_id) - ).all() + subtasks = session.exec(select(SubTask).where(SubTask.task_id == task_id)).all() statuses = [s.status.lower() for s in subtasks] now = datetime.now(timezone.utc) @@ -211,7 +218,7 @@ class SubTaskInterface: subtask_id: UUID, status: str, outputs: Optional[Dict[str, Any]], - cloud_logs_url: Optional[str] + cloud_logs_url: Optional[str], ): now = datetime.now(timezone.utc) @@ -261,6 +268,8 @@ class TasksInterface: service: Optional[str] = None, inputs: Optional[Dict[str, Any]] = None, task_only: bool = False, + source: Optional[SourceEnum] = None, + source_id: Optional[str] = None, ): """ Create a new Task record, and an initial SubTask in waiting state. Can also be used to create just @@ -279,6 +288,8 @@ class TasksInterface: status="waiting", job_started=now, job_completed=None, + source=source, + source_id=source_id, ) session.add(task) diff --git a/backend/app/db/functions/tasks/__init__.py b/backend/app/db/functions/tasks/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/backend/app/db/models/addresses.py b/backend/app/db/models/addresses.py index 51e9540f..a813f58d 100644 --- a/backend/app/db/models/addresses.py +++ b/backend/app/db/models/addresses.py @@ -7,9 +7,7 @@ from sqlalchemy import ( func, UniqueConstraint, ) -from sqlalchemy.orm import declarative_base - -Base = declarative_base() +from backend.app.db.base import Base class PostcodeSearch(Base): diff --git a/backend/app/db/models/condition.py b/backend/app/db/models/condition.py index 77043366..96f601a7 100644 --- a/backend/app/db/models/condition.py +++ b/backend/app/db/models/condition.py @@ -7,12 +7,12 @@ from sqlalchemy import ( String, Enum as SqlEnum, ) -from sqlalchemy.orm import declarative_base, relationship +from sqlalchemy.orm import relationship from backend.condition.domain.aspect_type import AspectType from backend.condition.domain.element_type import ElementType -Base = declarative_base() +from backend.app.db.base import Base ElementTypeDb = SqlEnum( ElementType, diff --git a/backend/app/db/models/energy_assessments.py b/backend/app/db/models/energy_assessments.py index 46912c9b..41967903 100644 --- a/backend/app/db/models/energy_assessments.py +++ b/backend/app/db/models/energy_assessments.py @@ -1,10 +1,8 @@ -from sqlalchemy import Column, Integer, BigInteger, Text, Float, DateTime, Boolean, Date, ForeignKey -from sqlalchemy.ext.declarative import declarative_base -from sqlalchemy.dialects.postgresql import ENUM as PgEnum import enum from datetime import datetime - -Base = declarative_base() +from backend.app.db.base import Base +from sqlalchemy import Column, Integer, BigInteger, Text, Float, DateTime, Boolean, Date, ForeignKey +from sqlalchemy.dialects.postgresql import ENUM as PgEnum class EnergyAssessment(Base): @@ -190,7 +188,7 @@ class EnergyAssessmentDocuments(Base): id = Column(BigInteger, primary_key=True, autoincrement=True) uprn = Column(BigInteger, nullable=False) energy_assessment_id = Column(BigInteger, ForeignKey('energy_assessments.id'), nullable=False) - document_type = Column(PgEnum(DocumentTypeEnum, name="document_type", create_type=False), nullable=False) + document_type = Column(PgEnum(DocumentTypeEnum, name="document_type"), nullable=False) document_location = Column(Text, nullable=False) uploaded_at = Column(DateTime(timezone=True), nullable=False, default=datetime.utcnow) scenario_id = Column(BigInteger, ForeignKey('energy_assessment_scenarios.id'), nullable=True) diff --git a/backend/app/db/models/epc.py b/backend/app/db/models/epc.py index 5a216040..ff0b40a0 100644 --- a/backend/app/db/models/epc.py +++ b/backend/app/db/models/epc.py @@ -4,11 +4,8 @@ from sqlalchemy import ( String, JSON, TIMESTAMP, - UniqueConstraint, ) -from sqlalchemy.orm import declarative_base - -Base = declarative_base() +from backend.app.db.base import Base class EpcStore(Base): diff --git a/backend/app/db/models/funding.py b/backend/app/db/models/funding.py index 6ea8364e..19e8203d 100644 --- a/backend/app/db/models/funding.py +++ b/backend/app/db/models/funding.py @@ -1,13 +1,19 @@ import enum -from sqlalchemy import Column, Integer, String, Float, Enum, TIMESTAMP, BigInteger, ForeignKey -from sqlalchemy.orm import declarative_base +from sqlalchemy import ( + Column, + Integer, + Float, + Enum, + TIMESTAMP, + BigInteger, + ForeignKey, +) from sqlalchemy.sql import func -from backend.app.db.models.recommendations import Plan +from backend.app.db.base import Base +from backend.app.db.models.recommendations import PlanModel from backend.app.db.models.materials import MaterialType, Material -Base = declarative_base() - class SchemeEnum(enum.Enum): eco4 = "eco4" @@ -17,13 +23,17 @@ class SchemeEnum(enum.Enum): class FundingPackage(Base): - __tablename__ = 'funding_package' + __tablename__ = "funding_package" id = Column(Integer, primary_key=True, autoincrement=True) - plan_id = Column(BigInteger, ForeignKey(Plan.id), nullable=False) + plan_id = Column(BigInteger, ForeignKey(PlanModel.id), nullable=False) scheme = Column( - Enum(SchemeEnum, values_callable=lambda x: [e.value for e in x], create_constraint=False), - nullable=False + Enum( + SchemeEnum, + values_callable=lambda x: [e.value for e in x], + create_constraint=False, + ), + nullable=False, ) created_at = Column(TIMESTAMP, nullable=False, server_default=func.now()) project_funding = Column(Float) @@ -34,15 +44,23 @@ class FundingPackage(Base): class FundingPackageMeasures(Base): - __tablename__ = 'funding_package_measures' + __tablename__ = "funding_package_measures" id = Column(Integer, primary_key=True, autoincrement=True) - funding_package_id = Column(BigInteger, ForeignKey(FundingPackage.id), nullable=False) - measure = Column( - Enum(MaterialType, values_callable=lambda x: [e.value for e in x], create_constraint=False), - nullable=False + funding_package_id = Column( + BigInteger, ForeignKey(FundingPackage.id), nullable=False ) - material_id = Column(BigInteger, ForeignKey(Material.id), nullable=False) # Assuming material table exists + measure = Column( + Enum( + MaterialType, + values_callable=lambda x: [e.value for e in x], + create_constraint=False, + ), + nullable=False, + ) + material_id = Column( + BigInteger, ForeignKey(Material.id), nullable=False + ) # Assuming material table exists innovation_uplift = Column(Float) partial_project_score = Column(Float) uplift_project_score = Column(Float) diff --git a/backend/app/db/models/inspections.py b/backend/app/db/models/inspections.py index 473f8a02..2a42f589 100644 --- a/backend/app/db/models/inspections.py +++ b/backend/app/db/models/inspections.py @@ -9,11 +9,9 @@ from sqlalchemy import ( Enum, ForeignKey, ) -from sqlalchemy.ext.declarative import declarative_base +from backend.app.db.base import Base from backend.app.db.models.portfolio import PropertyModel -Base = declarative_base() - # ------------------------------------------------------------------- # ENUM DEFINITIONS (equivalent to drizzle pgEnum calls) diff --git a/backend/app/db/models/materials.py b/backend/app/db/models/materials.py index 8a524491..101ac021 100644 --- a/backend/app/db/models/materials.py +++ b/backend/app/db/models/materials.py @@ -1,10 +1,9 @@ import enum from sqlalchemy import Column, Integer, String, Float, Enum, TIMESTAMP, Boolean -from sqlalchemy.orm import declarative_base from sqlalchemy.sql import func -Base = declarative_base() +from backend.app.db.base import Base class MaterialType(enum.Enum): diff --git a/backend/app/db/models/non_intrusive_surveys.py b/backend/app/db/models/non_intrusive_surveys.py index bc2d8adc..bbfb7a54 100644 --- a/backend/app/db/models/non_intrusive_surveys.py +++ b/backend/app/db/models/non_intrusive_surveys.py @@ -1,7 +1,5 @@ from sqlalchemy import Column, BigInteger, String, TIMESTAMP, ForeignKey, Integer -from sqlalchemy.orm import declarative_base - -Base = declarative_base() +from backend.app.db.base import Base class NonIntrusiveSurvey(Base): diff --git a/backend/app/db/models/portfolio.py b/backend/app/db/models/portfolio.py index d151bdc4..9eb26597 100644 --- a/backend/app/db/models/portfolio.py +++ b/backend/app/db/models/portfolio.py @@ -1,13 +1,22 @@ import enum import pytz import datetime -from sqlalchemy import Column, Integer, Text, Boolean, Float, DateTime, Enum, ForeignKey, CheckConstraint -from sqlalchemy.ext.declarative import declarative_base +from sqlalchemy import ( + Column, + Integer, + BigInteger, + Text, + Boolean, + Float, + DateTime, + Enum, + ForeignKey, + CheckConstraint, +) +from backend.app.db.base import Base from backend.app.db.models.users import UserModel # noqa from backend.app.db.models.materials import MaterialType -Base = declarative_base() - class PortfolioStatus(enum.Enum): SCOPING = "scoping" @@ -22,7 +31,7 @@ class PortfolioStatus(enum.Enum): NEEDS_REVIEW = "needs review" -class PortfolioGoal(enum.Enum): +class PortfolioGoal(enum.Enum): # TODO: Move to domain? VALUATION_IMPROVEMENT = "Valuation Improvement" INCREASING_EPC = "Increasing EPC" REDUCING_CO2_EMISSIONS = "Reducing CO2 emissions" @@ -31,23 +40,43 @@ class PortfolioGoal(enum.Enum): class Portfolio(Base): - __tablename__ = 'portfolio' + __tablename__ = "portfolio" id = Column(Integer, primary_key=True, autoincrement=True) name = Column(Text, nullable=False) budget = Column(Float) - status = Column(Enum(PortfolioStatus, values_callable=lambda x: [e.value for e in x]), nullable=False) - goal = Column(Enum(PortfolioGoal, values_callable=lambda x: [e.value for e in x]), nullable=False) + status = Column( + Enum(PortfolioStatus, values_callable=lambda x: [e.value for e in x]), + nullable=False, + ) + goal = Column( + Enum(PortfolioGoal, values_callable=lambda x: [e.value for e in x]), + nullable=False, + ) cost = Column(Float) number_of_properties = Column(Integer) - co2_equivalent_savings = Column(Float) # Unit is always tonnes so we don't need to store the unit - energy_savings = Column(Float) # Unit is always kWh so we don't need to store the unit - energy_cost_savings = Column(Float) # Unit is always £ so we don't need to store the unit for the moment - property_valuation_increase = Column(Float) # Unit is always £ so we don't need to store the unit for the moment - rental_yield_increase = Column(Float) # Unit is always £ so we don't need to store the unit for the moment + co2_equivalent_savings = Column( + Float + ) # Unit is always tonnes so we don't need to store the unit + energy_savings = Column( + Float + ) # Unit is always kWh so we don't need to store the unit + energy_cost_savings = Column( + Float + ) # Unit is always £ so we don't need to store the unit for the moment + property_valuation_increase = Column( + Float + ) # Unit is always £ so we don't need to store the unit for the moment + rental_yield_increase = Column( + Float + ) # Unit is always £ so we don't need to store the unit for the moment total_work_hours = Column(Float) labour_days = Column(Float) - created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)) - updated_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)) + created_at = Column( + DateTime, nullable=False, default=datetime.datetime.now(pytz.utc) + ) + updated_at = Column( + DateTime, nullable=False, default=datetime.datetime.now(pytz.utc) + ) # Aggregations for summary epc_breakdown_pre_retrofit = Column(Text) epc_breakdown_post_retrofit = Column(Text) @@ -71,7 +100,7 @@ class PropertyCreationStatus(enum.Enum): ERROR = "ERROR" -class Epc(enum.Enum): +class Epc(enum.Enum): # TODO: Move to domain? A = "A" B = "B" C = "C" @@ -82,20 +111,27 @@ class Epc(enum.Enum): class PropertyModel(Base): - __tablename__ = 'property' + __tablename__ = "property" id = Column(Integer, primary_key=True, autoincrement=True) - portfolio_id = Column(Integer, ForeignKey('portfolio.id'), nullable=False) + portfolio_id = Column(Integer, ForeignKey("portfolio.id"), nullable=False) creation_status = Column(Enum(PropertyCreationStatus), nullable=False) - uprn = Column(Integer) + uprn = Column(BigInteger) landlord_property_id = Column(Text) - building_reference_number = Column(Integer) - status = Column(Enum(PortfolioStatus, values_callable=lambda x: [e.value for e in x]), nullable=False) + building_reference_number = Column(BigInteger) + status = Column( + Enum(PortfolioStatus, values_callable=lambda x: [e.value for e in x]), + nullable=False, + ) address = Column(Text) postcode = Column(Text) has_pre_condition_report = Column(Boolean) has_recommendations = Column(Boolean) - created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)) - updated_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)) + created_at = Column( + DateTime, nullable=False, default=datetime.datetime.now(pytz.utc) + ) + updated_at = Column( + DateTime, nullable=False, default=datetime.datetime.now(pytz.utc) + ) property_type = Column(Text) built_form = Column(Text) local_authority = Column(Text) @@ -127,7 +163,7 @@ rating_lookup = { "Average": FeatureRating.AVERAGE, "Poor": FeatureRating.POOR, "Very Poor": FeatureRating.VERY_POOR, - "N/A": FeatureRating.NA + "N/A": FeatureRating.NA, } @@ -136,32 +172,45 @@ def get_feature_rating_from_string(rating_str: str): class PropertyDetailsEpcModel(Base): - __tablename__ = 'property_details_epc' + __tablename__ = "property_details_epc" id = Column(Integer, primary_key=True, autoincrement=True) - property_id = Column(Integer, ForeignKey('property.id'), nullable=False) - portfolio_id = Column(Integer, ForeignKey('portfolio.id'), nullable=False) + property_id = Column(Integer, ForeignKey("property.id"), nullable=False) + portfolio_id = Column(Integer, ForeignKey("portfolio.id"), nullable=False) full_address = Column(Text) lodgement_date = Column(DateTime) is_expired = Column(Boolean) total_floor_area = Column(Float) walls = Column(Text) - walls_rating = Column(Integer, CheckConstraint('walls_rating>=1 AND walls_rating<=5')) + walls_rating = Column( + Integer, CheckConstraint("walls_rating>=1 AND walls_rating<=5") + ) roof = Column(Text) - roof_rating = Column(Integer, CheckConstraint('roof_rating>=1 AND roof_rating<=5')) + roof_rating = Column(Integer, CheckConstraint("roof_rating>=1 AND roof_rating<=5")) floor = Column(Text) - floor_rating = Column(Integer, CheckConstraint('floor_rating>=1 AND floor_rating<=5')) + floor_rating = Column( + Integer, CheckConstraint("floor_rating>=1 AND floor_rating<=5") + ) windows = Column(Text) - windows_rating = Column(Integer, CheckConstraint('windows_rating>=1 AND windows_rating<=5')) + windows_rating = Column( + Integer, CheckConstraint("windows_rating>=1 AND windows_rating<=5") + ) heating = Column(Text) - heating_rating = Column(Integer, CheckConstraint('heating_rating>=1 AND heating_rating<=5')) + heating_rating = Column( + Integer, CheckConstraint("heating_rating>=1 AND heating_rating<=5") + ) heating_controls = Column(Text) heating_controls_rating = Column( - Integer, CheckConstraint('heating_controls_rating>=1 AND heating_controls_rating<=5') + Integer, + CheckConstraint("heating_controls_rating>=1 AND heating_controls_rating<=5"), ) hot_water = Column(Text) - hot_water_rating = Column(Integer, CheckConstraint('hot_water_rating>=1 AND hot_water_rating<=5')) + hot_water_rating = Column( + Integer, CheckConstraint("hot_water_rating>=1 AND hot_water_rating<=5") + ) lighting = Column(Text) - lighting_rating = Column(Integer, CheckConstraint('lighting_rating>=1 AND lighting_rating<=5')) + lighting_rating = Column( + Integer, CheckConstraint("lighting_rating>=1 AND lighting_rating<=5") + ) mainfuel = Column(Text) ventilation = Column(Text) solar_pv = Column(Text) @@ -219,7 +268,7 @@ class PropertyDetailsSpatial(Base): class PropertyDetailsMeter(Base): - __tablename__ = 'property_details_meter' + __tablename__ = "property_details_meter" id = Column(Integer, primary_key=True, autoincrement=True) uprn = Column(Integer, nullable=False) energy_supplier = Column(Text) @@ -230,11 +279,13 @@ class PropertyDetailsMeter(Base): class PropertyTargetsModel(Base): - __tablename__ = 'property_targets' + __tablename__ = "property_targets" id = Column(Integer, primary_key=True, autoincrement=True) - property_id = Column(Integer, ForeignKey('property.id'), nullable=False) - portfolio_id = Column(Integer, ForeignKey('portfolio.id'), nullable=False) - created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)) + property_id = Column(Integer, ForeignKey("property.id"), nullable=False) + portfolio_id = Column(Integer, ForeignKey("portfolio.id"), nullable=False) + created_at = Column( + DateTime, nullable=False, default=datetime.datetime.now(pytz.utc) + ) epc = Column(Enum(Epc)) heat_demand = Column(Text) @@ -242,23 +293,36 @@ class PropertyTargetsModel(Base): class PortfolioUsers(Base): __tablename__ = "portfolioUsers" id = Column(Integer, primary_key=True, autoincrement=True) - user_id = Column(Integer, ForeignKey('user.id'), nullable=False) - portfolioId = Column(Integer, ForeignKey('portfolio.id'), nullable=False) + user_id = Column(Integer, ForeignKey("user.id"), nullable=False) + portfolioId = Column(Integer, ForeignKey("portfolio.id"), nullable=False) role = Column(Text, nullable=False) - created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)) - updated_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)) + created_at = Column( + DateTime, nullable=False, default=datetime.datetime.now(pytz.utc) + ) + updated_at = Column( + DateTime, nullable=False, default=datetime.datetime.now(pytz.utc) + ) class PropertyInstalledMeasures(Base): """ This model keeps a record of the installed measures for each property, at the UPRN level """ - __tablename__ = 'property_installed_measures' + + __tablename__ = "property_installed_measures" id = Column(Integer, primary_key=True, autoincrement=True) uprn = Column(Integer, nullable=False) measure_type = Column( - Enum(MaterialType, values_callable=lambda x: [e.value for e in x], create_constraint=False), - nullable=False + Enum( + MaterialType, + values_callable=lambda x: [e.value for e in x], + create_constraint=False, + ), + nullable=False, + ) + created_at = Column( + DateTime, nullable=False, default=datetime.datetime.now(pytz.utc) + ) + installed_at = Column( + DateTime, nullable=False, default=datetime.datetime.now(pytz.utc) ) - created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)) - installed_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc)) diff --git a/backend/app/db/models/recommendations.py b/backend/app/db/models/recommendations.py index ed1fcefa..27d03303 100644 --- a/backend/app/db/models/recommendations.py +++ b/backend/app/db/models/recommendations.py @@ -1,17 +1,32 @@ -from sqlalchemy import Column, BigInteger, String, Float, Boolean, TIMESTAMP, ForeignKey, Enum -from sqlalchemy.orm import declarative_base +import enum +from typing import Iterable, List, NamedTuple, Optional, Type +from sqlalchemy import ( + Column, + BigInteger, + String, + Float, + Boolean, + TIMESTAMP, + ForeignKey, + Enum, +) +from sqlalchemy.orm import Mapped, mapped_column from sqlalchemy.sql import func -from backend.app.db.models.portfolio import Portfolio, PropertyModel +from datetime import datetime + +from backend.app.db.base import Base +from backend.app.db.models.portfolio import Portfolio, PortfolioGoal, PropertyModel from backend.app.db.models.materials import Material from backend.app.db.models.portfolio import Epc from datatypes.enums import QuantityUnits -import enum -Base = declarative_base() + +def portfolio_goal_values(enum_cls: Type[PortfolioGoal]) -> List[str]: + return [e.value for e in enum_cls] class Recommendation(Base): - __tablename__ = 'recommendation' + __tablename__ = "recommendation" id = Column(BigInteger, primary_key=True, autoincrement=True) property_id = Column(BigInteger, ForeignKey(PropertyModel.id), nullable=False) @@ -37,19 +52,52 @@ class Recommendation(Base): class RecommendationMaterials(Base): - __tablename__ = 'recommendation_materials' + __tablename__ = "recommendation_materials" - id = Column(BigInteger, primary_key=True, autoincrement=True) - recommendation_id = Column(BigInteger, ForeignKey('recommendation.id'), nullable=False) - material_id = Column(BigInteger, ForeignKey(Material.id), nullable=False) - created_at = Column(TIMESTAMP, nullable=False, server_default=func.now()) - depth = Column(Float, nullable=False) - quantity = Column(Float, nullable=False) - quantity_unit = Column(Enum(QuantityUnits, values_callable=lambda x: [e.value for e in x]), nullable=False) - estimated_cost = Column(Float, nullable=False) + id: Mapped[int] = mapped_column( + BigInteger, primary_key=True, autoincrement=True + ) + + recommendation_id: Mapped[int] = mapped_column( + BigInteger, + ForeignKey("recommendation.id"), + nullable=False, + ) + + material_id: Mapped[int] = mapped_column( + BigInteger, + ForeignKey(Material.id), + nullable=False, + ) + + created_at: Mapped[datetime] = mapped_column( + TIMESTAMP, + nullable=False, + server_default=func.now(), + ) + + depth: Mapped[float] = mapped_column( + Float, + nullable=False, + ) + + quantity: Mapped[float] = mapped_column( + Float, + nullable=False, + ) + + quantity_unit: Mapped[QuantityUnits] = mapped_column( + Enum(QuantityUnits, values_callable=lambda x: [e.value for e in x]), + nullable=False, + ) + + estimated_cost: Mapped[float] = mapped_column( + Float, + nullable=False, + ) -class PlanTypeEnum(enum.Enum): +class PlanTypeEnum(enum.Enum): # TODO: move this to domain? SOLAR_ECO4 = "solar_eco4" SOLAR_HHRSH_ECO4 = "solar_hhrsh_eco4" EMPTY_CAVITY_ECO = "empty_cavity_eco" @@ -57,20 +105,36 @@ class PlanTypeEnum(enum.Enum): EXTRACTION_ECO = "extraction_eco" -class Plan(Base): - __tablename__ = 'plan' +class PlanModel(Base): + __tablename__ = "plan" - id = Column(BigInteger, primary_key=True, autoincrement=True) - name = Column(String, nullable=True, default="") - portfolio_id = Column(BigInteger, ForeignKey(Portfolio.id), nullable=False) - property_id = Column(BigInteger, ForeignKey(PropertyModel.id), nullable=False) - scenario_id = Column(BigInteger, ForeignKey('scenario.id')) # Doesn't have to be linked to a scenario - created_at = Column(TIMESTAMP, nullable=False, server_default=func.now()) - is_default = Column(Boolean, nullable=False) - valuation_increase_lower_bound = Column(Float) - valuation_increase_upper_bound = Column(Float) - valuation_increase_average = Column(Float) - plan_type = Column( + id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True) + + name: Mapped[Optional[str]] = mapped_column(String, nullable=True, default="") + + portfolio_id: Mapped[int] = mapped_column( + BigInteger, ForeignKey(Portfolio.id), nullable=False + ) + + property_id: Mapped[int] = mapped_column( + BigInteger, ForeignKey(PropertyModel.id), nullable=False + ) + + scenario_id: Mapped[Optional[int]] = mapped_column( + BigInteger, ForeignKey("scenario.id") + ) + + created_at: Mapped[datetime] = mapped_column( # type: ignore + TIMESTAMP, nullable=False, server_default=func.now() + ) + + is_default: Mapped[bool] = mapped_column(Boolean, nullable=False) + + valuation_increase_lower_bound: Mapped[Optional[float]] = mapped_column(Float) + valuation_increase_upper_bound: Mapped[Optional[float]] = mapped_column(Float) + valuation_increase_average: Mapped[Optional[float]] = mapped_column(Float) + + plan_type: Mapped[Optional[PlanTypeEnum]] = mapped_column( Enum( PlanTypeEnum, name="plan_type", @@ -79,73 +143,90 @@ class Plan(Base): ), nullable=True, ) - post_sap_points = Column(Float) - post_epc_rating = Column(Enum(Epc)) - post_co2_emissions = Column(Float) - co2_savings = Column(Float) - post_energy_bill = Column(Float) - energy_bill_savings = Column(Float) - post_energy_consumption = Column(Float) # energy demand in kWh/year - energy_consumption_savings = Column(Float) - valuation_post_retrofit = Column(Float) - valuation_increase = Column(Float) + + post_sap_points: Mapped[Optional[float]] = mapped_column(Float) + post_epc_rating: Mapped[Optional[Epc]] = mapped_column(Enum(Epc)) + post_co2_emissions: Mapped[Optional[float]] = mapped_column(Float) + co2_savings: Mapped[Optional[float]] = mapped_column(Float) + post_energy_bill: Mapped[Optional[float]] = mapped_column(Float) + energy_bill_savings: Mapped[Optional[float]] = mapped_column(Float) + post_energy_consumption: Mapped[Optional[float]] = mapped_column(Float) + energy_consumption_savings: Mapped[Optional[float]] = mapped_column(Float) + valuation_post_retrofit: Mapped[Optional[float]] = mapped_column(Float) + valuation_increase: Mapped[Optional[float]] = mapped_column(Float) + # Financial metrics, excluding funding - cost_of_works = Column(Float) - contingency_cost = Column(Float) + cost_of_works: Mapped[Optional[float]] = mapped_column(Float) + contingency_cost: Mapped[Optional[float]] = mapped_column(Float) class PlanRecommendations(Base): - __tablename__ = 'plan_recommendations' + __tablename__ = "plan_recommendations" id = Column(BigInteger, primary_key=True, autoincrement=True) - plan_id = Column(BigInteger, ForeignKey('plan.id'), nullable=False) - recommendation_id = Column(BigInteger, ForeignKey('recommendation.id'), nullable=False) + plan_id = Column(BigInteger, ForeignKey("plan.id"), nullable=False) + recommendation_id = Column( + BigInteger, ForeignKey("recommendation.id"), nullable=False + ) -class Scenario(Base): - __tablename__ = 'scenario' +class ScenarioModel(Base): + __tablename__ = "scenario" - id = Column(BigInteger, primary_key=True, autoincrement=True) - name = Column(String, nullable=False) - created_at = Column(TIMESTAMP, nullable=False, server_default=func.now()) - budget = Column(Float) - portfolio_id = Column(BigInteger, ForeignKey(Portfolio.id), nullable=False) - housing_type = Column(String, nullable=False) - goal = Column(String, nullable=False) - goal_value = Column(String, nullable=False) - trigger_file_path = Column(String, nullable=False) - already_installed_file_path = Column(String) - patches_file_path = Column(String) - non_invasive_recommendations_file_path = Column(String) - exclusions = Column(String) - multi_plan = Column(Boolean, default=False) - is_default = Column(Boolean, default=False, nullable=False) + id: Mapped[int] = mapped_column(BigInteger, primary_key=True, autoincrement=True) + name: Mapped[str] = mapped_column(String, nullable=False) + created_at: Mapped[datetime] = mapped_column( + TIMESTAMP, nullable=False, server_default=func.now() + ) + budget: Mapped[Optional[float]] = mapped_column(Float) + portfolio_id: Mapped[int] = mapped_column( + BigInteger, ForeignKey(Portfolio.id), nullable=False + ) + housing_type: Mapped[str] = mapped_column(String, nullable=False) + goal: Mapped[PortfolioGoal] = mapped_column( + Enum(PortfolioGoal, values_callable=portfolio_goal_values, name="goal"), + nullable=False, + ) + goal_value: Mapped[str] = mapped_column(String, nullable=False) + trigger_file_path: Mapped[str] = mapped_column(String, nullable=False) + already_installed_file_path: Mapped[Optional[str]] = mapped_column(String) + patches_file_path: Mapped[Optional[str]] = mapped_column(String) + non_invasive_recommendations_file_path: Mapped[Optional[str]] = mapped_column( + String + ) + exclusions: Mapped[Optional[str]] = mapped_column(String) + multi_plan: Mapped[bool] = mapped_column(Boolean, default=False) + is_default: Mapped[bool] = mapped_column(Boolean, default=False, nullable=False) # Add in the fields we need, which were previously sitting at the portfolio level - cost = Column(Float) - contingency = Column(Float) - funding = Column(Float) - total_work_hours = Column(Float) - energy_savings = Column(Float) - co2_equivalent_savings = Column(Float) - energy_cost_savings = Column(Float) - epc_breakdown_pre_retrofit = Column(String) - epc_breakdown_post_retrofit = Column(String) - number_of_properties = Column(BigInteger) - n_units_to_retrofit = Column(BigInteger) - co2_per_unit_pre_retrofit = Column(String) - co2_per_unit_post_retrofit = Column(String) - energy_bill_per_unit_pre_retrofit = Column(String) - energy_bill_per_unit_post_retrofit = Column(String) - energy_consumption_per_unit_pre_retrofit = Column(String) - energy_consumption_per_unit_post_retrofit = Column(String) - valuation_improvement_per_unit = Column(String) - cost_per_unit = Column(String) - cost_per_co2_saved = Column(String) - cost_per_sap_point = Column(String) - valuation_return_on_investment = Column(String) - property_valuation_increase = Column(Float) - labour_days = Column(Float) + cost: Mapped[Optional[float]] = mapped_column(Float) + contingency: Mapped[Optional[float]] = mapped_column(Float) + funding: Mapped[Optional[float]] = mapped_column(Float) + total_work_hours: Mapped[Optional[float]] = mapped_column(Float) + energy_savings: Mapped[Optional[float]] = mapped_column(Float) + co2_equivalent_savings: Mapped[Optional[float]] = mapped_column(Float) + energy_cost_savings: Mapped[Optional[float]] = mapped_column(Float) + epc_breakdown_pre_retrofit: Mapped[Optional[str]] = mapped_column(String) + epc_breakdown_post_retrofit: Mapped[Optional[str]] = mapped_column(String) + number_of_properties: Mapped[Optional[int]] = mapped_column(BigInteger) + n_units_to_retrofit: Mapped[Optional[int]] = mapped_column(BigInteger) + co2_per_unit_pre_retrofit: Mapped[Optional[str]] = mapped_column(String) + co2_per_unit_post_retrofit: Mapped[Optional[str]] = mapped_column(String) + energy_bill_per_unit_pre_retrofit: Mapped[Optional[str]] = mapped_column(String) + energy_bill_per_unit_post_retrofit: Mapped[Optional[str]] = mapped_column(String) + energy_consumption_per_unit_pre_retrofit: Mapped[Optional[str]] = mapped_column( + String + ) + energy_consumption_per_unit_post_retrofit: Mapped[Optional[str]] = mapped_column( + String + ) + valuation_improvement_per_unit: Mapped[Optional[str]] = mapped_column(String) + cost_per_unit: Mapped[Optional[str]] = mapped_column(String) + cost_per_co2_saved: Mapped[Optional[str]] = mapped_column(String) + cost_per_sap_point: Mapped[Optional[str]] = mapped_column(String) + valuation_return_on_investment: Mapped[Optional[str]] = mapped_column(String) + property_valuation_increase: Mapped[Optional[float]] = mapped_column(Float) + labour_days: Mapped[Optional[float]] = mapped_column(Float) class MeasureType(enum.Enum): @@ -201,3 +282,12 @@ class InstalledMeasure(Base): heat_demand_savings = Column(Float) source = Column(String) is_active = Column(Boolean, nullable=False, default=True) + + +def enum_values(e: Iterable[PlanTypeEnum]) -> list[str]: + return [m.value for m in e] + + +class PlanPersistence(NamedTuple): + plan: PlanModel + scenario: ScenarioModel diff --git a/backend/app/db/models/solar.py b/backend/app/db/models/solar.py index 88372bd3..dc1846f3 100644 --- a/backend/app/db/models/solar.py +++ b/backend/app/db/models/solar.py @@ -2,9 +2,7 @@ import datetime import pytz from enum import Enum as PyEnum from sqlalchemy import Column, Integer, Float, DateTime, JSON, BigInteger, ForeignKey, Enum, Boolean -from sqlalchemy.ext.declarative import declarative_base - -Base = declarative_base() +from backend.app.db.base import Base class Solar(Base): diff --git a/backend/app/db/models/tasks.py b/backend/app/db/models/tasks.py index cfe18d83..e97a939f 100644 --- a/backend/app/db/models/tasks.py +++ b/backend/app/db/models/tasks.py @@ -1,14 +1,24 @@ +import enum from typing import Optional from datetime import datetime from uuid import UUID, uuid4 +from sqlalchemy import Column, Enum from sqlmodel import SQLModel, Field, Relationship +class SourceEnum(enum.Enum): # TODO: move to domain? + PORTFOLIO = "portfolio_id" + + class Task(SQLModel, table=True): __tablename__ = "tasks" - id: UUID = Field(default_factory=uuid4, primary_key=True, index=True, ) + id: UUID = Field( + default_factory=uuid4, + primary_key=True, + index=True, + ) task_source: str job_started: Optional[datetime] = None job_completed: Optional[datetime] = None @@ -16,13 +26,32 @@ class Task(SQLModel, table=True): service: Optional[str] = None updated_at: datetime = Field(default_factory=datetime.utcnow) + # source: Mapped[Optional[SourceEnum]] = mapped_column(Enum(SourceEnum)) <- SQLAlchemy not SQLModel + + source: Optional[SourceEnum] = Field( + default=None, + sa_column=Column( + Enum( + SourceEnum, + name="source", + values_callable=lambda e: [m.value for m in e], + ), + nullable=True, + ), + ) + source_id: Optional[str] = None + sub_tasks: list["SubTask"] = Relationship(back_populates="task") class SubTask(SQLModel, table=True): __tablename__ = "sub_task" - id: UUID = Field(default_factory=uuid4, primary_key=True, index=True, ) + id: UUID = Field( + default_factory=uuid4, + primary_key=True, + index=True, + ) task_id: UUID = Field(foreign_key="tasks.id") job_started: Optional[datetime] = None diff --git a/backend/app/db/models/users.py b/backend/app/db/models/users.py index 6e243815..7952b9b7 100644 --- a/backend/app/db/models/users.py +++ b/backend/app/db/models/users.py @@ -1,8 +1,6 @@ from sqlalchemy import Column, Integer, String, DateTime -from sqlalchemy.ext.declarative import declarative_base from sqlalchemy.sql import func - -Base = declarative_base() +from backend.app.db.base import Base class UserModel(Base): diff --git a/backend/app/db/models/whlg.py b/backend/app/db/models/whlg.py index 29d907e4..5c5b7172 100644 --- a/backend/app/db/models/whlg.py +++ b/backend/app/db/models/whlg.py @@ -1,4 +1,3 @@ -import uuid from typing import Optional from sqlmodel import SQLModel, Field @@ -12,4 +11,4 @@ class Whlg(SQLModel, table=True): index=True, ) - postcode: str = Field(nullable=False) \ No newline at end of file + postcode: str = Field(nullable=False) diff --git a/backend/app/domain/classes/plan.py b/backend/app/domain/classes/plan.py new file mode 100644 index 00000000..e7455427 --- /dev/null +++ b/backend/app/domain/classes/plan.py @@ -0,0 +1,160 @@ +from __future__ import annotations +from dataclasses import replace +from typing import Optional + +from backend.app.db.models.portfolio import PortfolioGoal +from backend.app.db.models.recommendations import ( + PlanModel, + PlanPersistence, + ScenarioModel, +) +from backend.app.domain.classes.scenario import Scenario +from backend.app.domain.records.plan_record import PlanRecord +from backend.app.utils import sap_to_epc + + +class Plan: + def __init__( + self, record: PlanRecord, scenario: Scenario, id: Optional[int] = None + ): + self.id: Optional[int] = id + self.record: PlanRecord = record + self.scenario: Scenario = scenario + + @classmethod + def from_sqlalchemy(cls, plan_model: PlanModel, scenario: Scenario) -> Plan: + if not scenario: + raise ValueError(f"No Scenario associated with Plan of ID {plan_model.id}") + + record = PlanRecord( + property_id=plan_model.property_id, + portfolio_id=plan_model.portfolio_id, + created_at=plan_model.created_at, + is_default=plan_model.is_default, + valuation_increase_lower_bound=plan_model.valuation_increase_lower_bound, + valuation_increase_upper_bound=plan_model.valuation_increase_upper_bound, + valuation_increase_average=plan_model.valuation_increase_average, + plan_type=plan_model.plan_type, + post_sap_points=plan_model.post_sap_points, + post_epc_rating=plan_model.post_epc_rating, + post_co2_emissions=plan_model.post_co2_emissions, + co2_savings=plan_model.co2_savings, + post_energy_bill=plan_model.post_energy_bill, + energy_bill_savings=plan_model.energy_bill_savings, + post_energy_consumption=plan_model.post_energy_consumption, + energy_consumption_savings=plan_model.energy_consumption_savings, + valuation_post_retrofit=plan_model.valuation_post_retrofit, + valuation_increase=plan_model.valuation_increase, + cost_of_works=plan_model.cost_of_works, + contingency_cost=plan_model.contingency_cost, + name=plan_model.name, + ) + return cls(record=record, scenario=scenario, id=plan_model.id) + + @property + def is_compliant(self) -> bool: + goal: PortfolioGoal = self.scenario.record.goal + + match goal: + case PortfolioGoal.INCREASING_EPC: + return self._is_compliant_epc() + case _: + raise NotImplementedError + + @property + def cost(self) -> float: + return ( + self.record.cost_of_works + if self.record.cost_of_works is not None + else float("inf") + ) + + def to_sqlalchemy(self) -> PlanPersistence: + scenario_record = self.scenario.record + + scenario_model = ScenarioModel( + id=self.scenario.id, + name=scenario_record.name, + created_at=scenario_record.created_at, + housing_type=scenario_record.housing_type, + goal=scenario_record.goal, + goal_value=scenario_record.goal_value, + trigger_file_path=scenario_record.trigger_file_path, + multi_plan=scenario_record.multi_plan, + is_default=scenario_record.is_default, + budget=scenario_record.budget, + already_installed_file_path=scenario_record.already_installed_file_path, + patches_file_path=scenario_record.patches_file_path, + non_invasive_recommendations_file_path=scenario_record.non_invasive_recommendations_file_path, + exclusions=scenario_record.exclusions, + cost=scenario_record.cost, + contingency=scenario_record.contingency, + funding=scenario_record.funding, + total_work_hours=scenario_record.total_work_hours, + energy_savings=scenario_record.energy_savings, + co2_equivalent_savings=scenario_record.co2_equivalent_savings, + energy_cost_savings=scenario_record.energy_cost_savings, + epc_breakdown_pre_retrofit=scenario_record.epc_breakdown_pre_retrofit, + epc_breakdown_post_retrofit=scenario_record.epc_breakdown_post_retrofit, + number_of_properties=scenario_record.number_of_properties, + n_units_to_retrofit=scenario_record.n_units_to_retrofit, + co2_per_unit_pre_retrofit=scenario_record.co2_per_unit_pre_retrofit, + co2_per_unit_post_retrofit=scenario_record.co2_per_unit_post_retrofit, + energy_bill_per_unit_pre_retrofit=scenario_record.energy_bill_per_unit_pre_retrofit, + energy_bill_per_unit_post_retrofit=scenario_record.energy_bill_per_unit_post_retrofit, + energy_consumption_per_unit_pre_retrofit=scenario_record.energy_consumption_per_unit_pre_retrofit, + energy_consumption_per_unit_post_retrofit=scenario_record.energy_consumption_per_unit_post_retrofit, + valuation_improvement_per_unit=scenario_record.valuation_improvement_per_unit, + cost_per_unit=scenario_record.cost_per_unit, + cost_per_co2_saved=scenario_record.cost_per_co2_saved, + cost_per_sap_point=scenario_record.cost_per_sap_point, + valuation_return_on_investment=scenario_record.valuation_return_on_investment, + property_valuation_increase=scenario_record.property_valuation_increase, + labour_days=scenario_record.labour_days, + ) + + record = self.record + + plan_model = PlanModel( + id=self.id, + property_id=record.property_id, + portfolio_id=record.portfolio_id, + scenario_id=self.scenario.id, + created_at=record.created_at, + is_default=record.is_default, + valuation_increase_lower_bound=record.valuation_increase_lower_bound, + valuation_increase_upper_bound=record.valuation_increase_upper_bound, + valuation_increase_average=record.valuation_increase_average, + plan_type=record.plan_type, + post_sap_points=record.post_sap_points, + post_epc_rating=record.post_epc_rating, + post_co2_emissions=record.post_co2_emissions, + co2_savings=record.co2_savings, + post_energy_bill=record.post_energy_bill, + energy_bill_savings=record.energy_bill_savings, + post_energy_consumption=record.post_energy_consumption, + energy_consumption_savings=record.energy_consumption_savings, + valuation_post_retrofit=record.valuation_post_retrofit, + valuation_increase=record.valuation_increase, + cost_of_works=record.cost_of_works, + contingency_cost=record.contingency_cost, + name=record.name, + ) + + return PlanPersistence(plan=plan_model, scenario=scenario_model) + + def set_default(self, value: bool) -> None: + self.record = replace(self.record, is_default=value) + self.scenario.record = replace(self.scenario.record, is_default=value) + + def _is_compliant_epc(self) -> bool: + goal_value: str = self.scenario.record.goal_value + + if self.record.post_epc_rating: + post_epc = self.record.post_epc_rating.value + elif self.record.post_sap_points: + post_epc = sap_to_epc(self.record.post_sap_points) + else: + return False + + return post_epc <= goal_value diff --git a/backend/app/domain/classes/scenario.py b/backend/app/domain/classes/scenario.py new file mode 100644 index 00000000..3c22657e --- /dev/null +++ b/backend/app/domain/classes/scenario.py @@ -0,0 +1,58 @@ +from __future__ import annotations +from dataclasses import replace +from typing import Optional + +from backend.app.db.models.recommendations import ScenarioModel +from backend.app.domain.records.scenario_record import ScenarioRecord + + +class Scenario: + def __init__(self, record: ScenarioRecord, id: Optional[int] = None): + self.id = id + self.record = record + + @classmethod + def from_sqlalchemy(cls, scenario_model: ScenarioModel) -> Scenario: + record = ScenarioRecord( + name=scenario_model.name, + created_at=scenario_model.created_at, + housing_type=scenario_model.housing_type, + goal=scenario_model.goal, + goal_value=scenario_model.goal_value, + trigger_file_path=scenario_model.trigger_file_path, + multi_plan=scenario_model.multi_plan, + is_default=scenario_model.is_default, + budget=scenario_model.budget, + already_installed_file_path=scenario_model.already_installed_file_path, + patches_file_path=scenario_model.patches_file_path, + non_invasive_recommendations_file_path=scenario_model.non_invasive_recommendations_file_path, + exclusions=scenario_model.exclusions, + cost=scenario_model.cost, + contingency=scenario_model.contingency, + funding=scenario_model.funding, + total_work_hours=scenario_model.total_work_hours, + energy_savings=scenario_model.energy_savings, + co2_equivalent_savings=scenario_model.co2_equivalent_savings, + energy_cost_savings=scenario_model.energy_cost_savings, + epc_breakdown_pre_retrofit=scenario_model.epc_breakdown_pre_retrofit, + epc_breakdown_post_retrofit=scenario_model.epc_breakdown_post_retrofit, + number_of_properties=scenario_model.number_of_properties, + n_units_to_retrofit=scenario_model.n_units_to_retrofit, + co2_per_unit_pre_retrofit=scenario_model.co2_per_unit_pre_retrofit, + co2_per_unit_post_retrofit=scenario_model.co2_per_unit_post_retrofit, + energy_bill_per_unit_pre_retrofit=scenario_model.energy_bill_per_unit_pre_retrofit, + energy_bill_per_unit_post_retrofit=scenario_model.energy_bill_per_unit_post_retrofit, + energy_consumption_per_unit_pre_retrofit=scenario_model.energy_consumption_per_unit_pre_retrofit, + energy_consumption_per_unit_post_retrofit=scenario_model.energy_consumption_per_unit_post_retrofit, + valuation_improvement_per_unit=scenario_model.valuation_improvement_per_unit, + cost_per_unit=scenario_model.cost_per_unit, + cost_per_co2_saved=scenario_model.cost_per_co2_saved, + cost_per_sap_point=scenario_model.cost_per_sap_point, + valuation_return_on_investment=scenario_model.valuation_return_on_investment, + property_valuation_increase=scenario_model.property_valuation_increase, + labour_days=scenario_model.labour_days, + ) + return cls(record, scenario_model.id) + + def set_default(self, value: bool) -> None: + self.record = replace(self.record, is_default=value) diff --git a/backend/app/domain/records/plan_record.py b/backend/app/domain/records/plan_record.py new file mode 100644 index 00000000..63a82993 --- /dev/null +++ b/backend/app/domain/records/plan_record.py @@ -0,0 +1,32 @@ +from dataclasses import dataclass +from datetime import datetime +from typing import Optional + +from backend.app.db.models.portfolio import Epc +from backend.app.db.models.recommendations import PlanTypeEnum + + +@dataclass(frozen=True) +class PlanRecord: + property_id: int + portfolio_id: int + created_at: datetime + is_default: bool + + valuation_increase_lower_bound: Optional[float] = None + valuation_increase_upper_bound: Optional[float] = None + valuation_increase_average: Optional[float] = None + plan_type: Optional[PlanTypeEnum] = None + post_sap_points: Optional[float] = None + post_epc_rating: Optional[Epc] = None + post_co2_emissions: Optional[float] = None + co2_savings: Optional[float] = None + post_energy_bill: Optional[float] = None + energy_bill_savings: Optional[float] = None + post_energy_consumption: Optional[float] = None + energy_consumption_savings: Optional[float] = None + valuation_post_retrofit: Optional[float] = None + valuation_increase: Optional[float] = None + cost_of_works: Optional[float] = None + contingency_cost: Optional[float] = None + name: Optional[str] = None diff --git a/backend/app/domain/records/scenario_record.py b/backend/app/domain/records/scenario_record.py new file mode 100644 index 00000000..0865cc88 --- /dev/null +++ b/backend/app/domain/records/scenario_record.py @@ -0,0 +1,47 @@ +from dataclasses import dataclass +from datetime import datetime +from typing import Optional + +from backend.app.db.models.portfolio import PortfolioGoal + + +@dataclass(frozen=True) +class ScenarioRecord: + name: str + created_at: datetime + housing_type: str + goal: PortfolioGoal + goal_value: str + trigger_file_path: str + multi_plan: bool + is_default: bool + budget: Optional[float] = None + already_installed_file_path: Optional[str] = None + patches_file_path: Optional[str] = None + non_invasive_recommendations_file_path: Optional[str] = None + exclusions: Optional[str] = None + + cost: Optional[float] = None + contingency: Optional[float] = None + funding: Optional[float] = None + total_work_hours: Optional[float] = None + energy_savings: Optional[float] = None + co2_equivalent_savings: Optional[float] = None + energy_cost_savings: Optional[float] = None + epc_breakdown_pre_retrofit: Optional[str] = None + epc_breakdown_post_retrofit: Optional[str] = None + number_of_properties: Optional[int] = None + n_units_to_retrofit: Optional[int] = None + co2_per_unit_pre_retrofit: Optional[str] = None + co2_per_unit_post_retrofit: Optional[str] = None + energy_bill_per_unit_pre_retrofit: Optional[str] = None + energy_bill_per_unit_post_retrofit: Optional[str] = None + energy_consumption_per_unit_pre_retrofit: Optional[str] = None + energy_consumption_per_unit_post_retrofit: Optional[str] = None + valuation_improvement_per_unit: Optional[str] = None + cost_per_unit: Optional[str] = None + cost_per_co2_saved: Optional[str] = None + cost_per_sap_point: Optional[str] = None + valuation_return_on_investment: Optional[str] = None + property_valuation_increase: Optional[float] = None + labour_days: Optional[float] = None diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index ea41162f..27151437 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -1,21 +1,29 @@ +from typing import List +from uuid import UUID + import boto3 import json import math import asyncio -from contextlib import contextmanager -from sqlmodel import Session from datetime import datetime from fastapi import APIRouter, Depends +from backend.app.db.connection import db_session +from backend.app.db.models.tasks import SourceEnum from backend.app.dependencies import validate_token from backend.app.plan.schemas import PlanTriggerRequest from backend.app.config import get_settings -from sqlalchemy.orm import sessionmaker +from backend.categorisation.categorisation_trigger_request import ( + CategorisationTriggerRequest, +) from utils.logger import setup_logger -from backend.app.db.connection import db_engine -from backend.app.db.functions.recommendations_functions import create_scenario +from backend.app.db.functions.recommendations_functions import ( + create_scenario, + get_property_ids, + get_scenarios_count_by_portfolio_id, +) from backend.app.db.functions.tasks.Tasks import TasksInterface, SubTaskInterface logger = setup_logger() @@ -24,23 +32,88 @@ router = APIRouter( prefix="/plan", tags=["plan"], dependencies=[Depends(validate_token)], - responses={404: {"description": "Not found"}} + responses={404: {"description": "Not found"}}, ) -sqs_client = boto3.client("sqs") +settings = get_settings() +sqs_client = boto3.client("sqs", settings.AWS_DEFAULT_REGION) -@contextmanager -def db_session(): - session = Session(db_engine) - try: - yield session - session.commit() - except Exception: - session.rollback() - raise - finally: - session.close() +@router.post("/categorisation", status_code=202) +async def trigger_categorisation( + body: CategorisationTriggerRequest, +) -> dict[str, str]: + payload: CategorisationTriggerRequest = CategorisationTriggerRequest.model_validate( + body + ) + + logger.info("API triggered with body: %s", payload) + + property_ids: list[int] = get_property_ids(payload.portfolio_id) + property_ids.sort() + + num_scenarios: int = get_scenarios_count_by_portfolio_id(payload.portfolio_id) + total_plans_to_update: int = len(property_ids) * num_scenarios + + max_writes_per_batch: int = 1000 + properties_per_batch: int = max(1, max_writes_per_batch // num_scenarios) + + num_property_batches: int = math.ceil(len(property_ids) / properties_per_batch) + + logger.info("total_plans_to_update: %s", total_plans_to_update) + logger.info("properties_per_batch: %s", properties_per_batch) + logger.info("num_property_batchess: %s", num_property_batches) + + # Create task + task_id, _ = TasksInterface.create_task( + task_source="backend/plan/router.py:trigger_categorisation", + service="plan_categorisation", + inputs=payload.model_dump(), + task_only=True, + source=SourceEnum.PORTFOLIO, + source_id=str(payload.portfolio_id), + ) + + # Dispatch requests to lambdas + subtask_interface = SubTaskInterface() + + for batch_index in range(num_property_batches): + + start: int = batch_index * properties_per_batch + end: int = start + properties_per_batch + + batch_property_ids: List[int] = property_ids[start:end] + + if not batch_property_ids: + continue + + batch_request: CategorisationTriggerRequest = CategorisationTriggerRequest( + portfolio_id=payload.portfolio_id, + scenarios_to_consider=payload.scenarios_to_consider, + scenario_priority_order=payload.scenario_priority_order, + min_property_id=min(batch_property_ids), + max_property_id=max(batch_property_ids), + ) + # Create sub-task for each + subtask_id: UUID = subtask_interface.create_subtask( + task_id=task_id, inputs=batch_request.model_dump() + ) + batch_request.subtask_id = str(subtask_id) + + response = sqs_client.send_message( + QueueUrl=settings.CATEGORISATION_SQS_URL, + MessageBody=batch_request.model_dump_json(), + ) + + logger.info( + f"Chunk {batch_index} sent to SQS. {len(batch_property_ids)} Property IDs in batch (total " + f"{len(property_ids)}). Property IDs {min(batch_property_ids)}–{max(batch_property_ids)}. Message ID: " + f"{response.get('MessageId')}" + ) + + await asyncio.sleep(0.05) # Small delay to avoid SQS throttling + + return {"message": "Categorisation jobs distributed"} @router.post("/trigger", status_code=202) @@ -50,8 +123,6 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest): """ logger.info("API triggered with body: %s", body) - settings = get_settings() - try: data = body.model_dump() except Exception as e: @@ -59,7 +130,10 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest): return {"message": "Invalid request"}, 400 # If file_format is domna_asset_list and type is xlsx, read and chunk it - if data.get("file_format") == "domna_asset_list" and data.get("file_type") == "xlsx": + if ( + data.get("file_format") == "domna_asset_list" + and data.get("file_type") == "xlsx" + ): try: total_rows = data.get("sheet_count", 0) @@ -88,8 +162,8 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest): "patches_file_path": body.patches_file_path, "non_invasive_recommendations_file_path": body.non_invasive_recommendations_file_path, "exclusions": body.exclusions, - "multi_plan": body.multi_plan - } + "multi_plan": body.multi_plan, + }, ) # Insert the scenario ID into the data payload data["scenario_id"] = scenario_id @@ -99,7 +173,7 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest): task_source="backend/plan/router.py:trigger_plan_entrypoint", service="plan_engine", inputs=data, - task_only=True + task_only=True, ) subtask_interface = SubTaskInterface() @@ -109,13 +183,14 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest): index_end = min((i + 1) * chunk_size, total_rows) message_payload = { - **data, "index_start": index_start, "index_end": index_end, + **data, + "index_start": index_start, + "index_end": index_end, } # Create a subtask for this chunk subtask_id = subtask_interface.create_subtask( - task_id=task_id, - inputs=message_payload + task_id=task_id, inputs=message_payload ) # Add task and subtask to message @@ -125,8 +200,7 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest): message_body = json.dumps(message_payload) response = sqs_client.send_message( - QueueUrl=settings.ENGINE_SQS_URL, - MessageBody=message_body + QueueUrl=settings.ENGINE_SQS_URL, MessageBody=message_body ) logger.info( f"Chunk {i} sent to SQS. Rows {index_start}–{index_end}. Message ID: {response.get('MessageId')}" @@ -153,8 +227,7 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest): data["subtask_id"] = str(subtask_id) message_body = json.dumps(data) response = sqs_client.send_message( - QueueUrl=settings.ENGINE_SQS_URL, - MessageBody=message_body + QueueUrl=settings.ENGINE_SQS_URL, MessageBody=message_body ) logger.info(f"SQS message sent. Message ID: {response.get('MessageId')}") except Exception as e: diff --git a/backend/app/plan/schemas.py b/backend/app/plan/schemas.py index afea49e7..3941e2e5 100644 --- a/backend/app/plan/schemas.py +++ b/backend/app/plan/schemas.py @@ -12,6 +12,10 @@ WALL_INSULATION_MEASURES = ["internal_wall_insulation", "external_wall_insulatio ROOF_INSULATION_MEASURES = [ "loft_insulation", "flat_roof_insulation", "room_roof_insulation", "sloping_ceiling_insulation" ] +WALL_INSULATION_WITH_VENTILATION_MEASURES = [ + "internal_wall_insulation+mechanical_ventilation", "external_wall_insulation+mechanical_ventilation", + "cavity_wall_insulation+mechanical_ventilation" +] # Both all and roof insulaiton measures are eligible for ECO4. These are the remaining fabric and heating measures # This is based on th measures we have recommendations for diff --git a/backend/app/plan/utils.py b/backend/app/plan/utils.py index 10d7fb06..7dfe5538 100644 --- a/backend/app/plan/utils.py +++ b/backend/app/plan/utils.py @@ -1,5 +1,6 @@ import ast import os +from typing import Optional import msgpack from uuid import UUID from utils.s3 import read_from_s3 @@ -24,7 +25,7 @@ def get_cleaned(): cleaned = read_from_s3( s3_file_name="cleaned_epc_data/cleaned.bson", - bucket_name=get_settings().DATA_BUCKET + bucket_name=get_settings().DATA_BUCKET, ) cleaned = msgpack.unpackb(cleaned, raw=False) @@ -56,32 +57,45 @@ def extract_property_request_data( ): patch_has_uprn = "uprn" in patches[0] if patches else True if patch_has_uprn: - patch = next(( - x for x in patches if str(x["uprn"]) == str(address.uprn) - ), {}) + patch = next((x for x in patches if str(x["uprn"]) == str(address.uprn)), {}) else: - patch = next(( - x for x in patches if (x["address"] == address.address) and (x["postcode"] == address.postcode) - ), {}) + patch = next( + ( + x + for x in patches + if (x["address"] == address.address) + and (x["postcode"] == address.postcode) + ), + {}, + ) # Because we have some non-invasive recommendations that match on address and postcode, but not UPRN # we need to check existence of uprn - has_uprn = "uprn" in non_invasive_recommendations[0] if non_invasive_recommendations else False + has_uprn = ( + "uprn" in non_invasive_recommendations[0] + if non_invasive_recommendations + else False + ) if has_uprn: has_uprn = non_invasive_recommendations[0]["uprn"] not in ["", None] if has_uprn: - property_non_invasive_recommendations = next(( - x for x in non_invasive_recommendations if - (str(x["uprn"]) == str(uprn)) - ), {}) + property_non_invasive_recommendations = next( + (x for x in non_invasive_recommendations if (str(x["uprn"]) == str(uprn))), + {}, + ) # We patch the non-invasive recs that are ['cavity_extract_and_refill'] else: - property_non_invasive_recommendations = next(( - x for x in non_invasive_recommendations if - (x["address"] == address.address) and (x["postcode"] == address.postcode) - ), {}) + property_non_invasive_recommendations = next( + ( + x + for x in non_invasive_recommendations + if (x["address"] == address.address) + and (x["postcode"] == address.postcode) + ), + {}, + ) if isinstance(property_non_invasive_recommendations.get("recommendations"), str): property_non_invasive_recommendations["recommendations"] = ast.literal_eval( @@ -90,7 +104,11 @@ def extract_property_request_data( transformed = [] for rec in property_non_invasive_recommendations["recommendations"]: if isinstance(rec, str): - transformed.append({"type": rec, }) + transformed.append( + { + "type": rec, + } + ) else: transformed.append(rec) @@ -102,26 +120,36 @@ def extract_property_request_data( valuation_has_uprn = valuation_data[0]["uprn"] not in ["", None] if valuation_has_uprn: - property_valuation = next(( - float(x["valuation"]) for x in valuation_data if - (str(x["uprn"]) == str(uprn)) - ), None) + property_valuation = next( + ( + float(x["valuation"]) + for x in valuation_data + if (str(x["uprn"]) == str(uprn)) + ), + None, + ) else: - property_valuation = next(( - float(x["valuation"]) for x in valuation_data if - (x["address"] == address.address) and (x["postcode"] == address.postcode) - ), None) + property_valuation = next( + ( + float(x["valuation"]) + for x in valuation_data + if (x["address"] == address.address) + and (x["postcode"] == address.postcode) + ), + None, + ) # Return data class to give a structured format return PropertyRequestData( patch=patch, non_invasive_recommendations=property_non_invasive_recommendations, - valuation=property_valuation + valuation=property_valuation, ) -def parse_eco_packages(addr: Address, prepared_epc) -> tuple[list[str], int, str, list[str]] | tuple[ - None, None, None, list]: +def parse_eco_packages( + addr: Address, prepared_epc +) -> tuple[list[str], int, str, list[str]] | tuple[None, None, None, list]: solar_identification = addr.solar_reason cavity_identification = addr.cavity_reason if not solar_identification and not cavity_identification: @@ -140,47 +168,51 @@ def parse_eco_packages(addr: Address, prepared_epc) -> tuple[list[str], int, str "Solar Eligible": { "measures": ["solar_pv", "loft_insulation", "mechanical_ventilation"], "target_sap": 86, # High B - "plan_type": "solar_eco4" + "plan_type": "solar_eco4", }, "Solar Eligible, Solid Wall Uninsulated, EPC E or Below": { "measures": ["solar_pv", "loft_insulation", "mechanical_ventilation"], "target_sap": 86, # High B - "plan_type": "solar_eco4" + "plan_type": "solar_eco4", }, "Solar Eligible, Needs Heating Upgrade": { - "measures": ["solar_pv", "loft_insulation", "high_heat_retention_storage_heaters", - "mechanical_ventilation"], + "measures": [ + "solar_pv", + "loft_insulation", + "high_heat_retention_storage_heaters", + "mechanical_ventilation", + ], "target_sap": 86, # High B - "plan_type": "solar_hhrsh_eco4" + "plan_type": "solar_hhrsh_eco4", }, "Non-Intrusive Data Shows Empty Cavity": { "measures": ["cavity_wall_insulation", "mechanical_ventilation"], "target_sap": 69, # Low C - "plan_type": "empty_cavity_eco" + "plan_type": "empty_cavity_eco", }, - 'Non-Intrusive Data Shows Empty Cavity, built after 2002': { + "Non-Intrusive Data Shows Empty Cavity, built after 2002": { "measures": ["cavity_wall_insulation", "mechanical_ventilation"], "target_sap": 69, # Low C - "plan_type": "empty_cavity_eco" + "plan_type": "empty_cavity_eco", }, "EPC Shows Empty Cavity, inspections show retro drilled": { # EPC Indicates it's empty, so we simulate a fill "measures": ["cavity_wall_insulation", "mechanical_ventilation"], "target_sap": 69, # Low C - "plan_type": "extraction_eco" + "plan_type": "extraction_eco", }, "EPC Shows Empty Cavity, inspections show filled at build": { # EPC Indicates it's empty, so we simulate a fill "measures": ["cavity_wall_insulation", "mechanical_ventilation"], "target_sap": 69, # Low C - "plan_type": "extraction_eco" + "plan_type": "extraction_eco", }, "EPC Shows Empty Cavity": { # EPC Indicates it's empty, so we simulate a fill "measures": ["cavity_wall_insulation", "mechanical_ventilation"], "target_sap": 69, # Low C - "plan_type": "empty_cavity_eco" - } + "plan_type": "empty_cavity_eco", + }, } # Always prioritise solar @@ -214,9 +246,13 @@ def build_cloudwatch_log_url(start_ms: int) -> str: Build a CloudWatch Logs URL for the current Lambda invocation, including timestamp window from start_ms to end_ms (epoch ms). """ + logger.info("Building cloudwatch logs URL") region = os.environ["AWS_REGION"] + logger.info("Building cloudwatch logs URL: Got AWS region") log_group = os.environ["AWS_LAMBDA_LOG_GROUP_NAME"] + logger.info("Building cloudwatch logs URL: Got lambda log group name") log_stream = os.environ["AWS_LAMBDA_LOG_STREAM_NAME"] + logger.info("Building cloudwatch logs URL: Got lambda log stream name") # CloudWatch console requires / encoded as $252F encoded_group = log_group.replace("/", "$252F") @@ -232,15 +268,21 @@ def build_cloudwatch_log_url(start_ms: int) -> str: ) -def handle_error(msg, e, subtask_id, status=500, start_ms=None): +def handle_error( + msg: str, + exception: Exception, + subtask_id: str, + status_code: int = 500, + start_ms: Optional[int] = None, +): # When the pipeline fails, handles error process cloud_logs_url = build_cloudwatch_log_url(start_ms) SubTaskInterface().update_subtask_status( subtask_id=UUID(subtask_id), status="failed", - outputs=str(e), - cloud_logs_url=cloud_logs_url + outputs=str(exception), + cloud_logs_url=cloud_logs_url, ) logger.error(msg, exc_info=True) - return Response(status_code=status, content=msg) + return Response(status_code=status_code, content=msg) diff --git a/backend/app/requirements/requirements.txt b/backend/app/requirements/requirements.txt index 3124034e..9fdbfe4c 100644 --- a/backend/app/requirements/requirements.txt +++ b/backend/app/requirements/requirements.txt @@ -10,7 +10,7 @@ mangum==0.19.0 # AWS boto3==1.35.44 # Data -openpyxl==3.1.2 +openpyxl==3.1.5 # Basic pytz sqlmodel \ No newline at end of file diff --git a/backend/app/tasks/router.py b/backend/app/tasks/router.py index 90b62dd1..1c266f2c 100644 --- a/backend/app/tasks/router.py +++ b/backend/app/tasks/router.py @@ -9,7 +9,7 @@ from backend.app.tasks.schema import ( CreateSubTaskRequest, UpdateSubTaskStatusRequest, FinalizeSubTaskRequest, - TaskSqsTriggerRequest + TaskSqsTriggerRequest, ) # Correct location of interfaces @@ -51,18 +51,18 @@ async def get_task(task_id: UUID): if not task: raise HTTPException(status_code=404, detail="Task not found") - subtasks = session.exec( - select(SubTask).where(SubTask.taskId == task_id) - ).all() + subtasks = session.exec(select(SubTask).where(SubTask.taskId == task_id)).all() formatted = [] for st in subtasks: - formatted.append({ - **st.dict(), - "inputs": json.loads(st.inputs) if st.inputs else None, - "outputs": json.loads(st.outputs) if st.outputs else None, - "cloud_logs_url": st.cloudLogsURL, - }) + formatted.append( + { + **st.dict(), + "inputs": json.loads(st.inputs) if st.inputs else None, + "outputs": json.loads(st.outputs) if st.outputs else None, + "cloud_logs_url": st.cloudLogsURL, + } + ) return { "task": task, @@ -111,7 +111,10 @@ async def update_subtask_status(subtask_id: UUID, req: UpdateSubTaskStatusReques # === # Sub task is complete -@router.post("/subtask/{subtask_id}/finalize", summary="Finalize a subtask with status, outputs, logs") +@router.post( + "/subtask/{subtask_id}/finalize", + summary="Finalize a subtask with status, outputs, logs", +) async def finalize_subtask(subtask_id: UUID, req: FinalizeSubTaskRequest): subtasks = SubTaskInterface() @@ -120,7 +123,7 @@ async def finalize_subtask(subtask_id: UUID, req: FinalizeSubTaskRequest): subtask_id=subtask_id, status=req.status, outputs=req.outputs, - cloud_logs_url=req.cloud_logs_url + cloud_logs_url=req.cloud_logs_url, ) return { @@ -142,9 +145,10 @@ from backend.app.tasks.schema import TaskSqsTriggerRequest from backend.app.db.functions.tasks.Tasks import TasksInterface, SubTaskInterface from backend.app.config import get_settings -sqs = boto3.client("sqs") -@router.post("/trigger", summary="Create task + subtask and publish to SQS", status_code=202) +@router.post( + "/trigger", summary="Create task + subtask and publish to SQS", status_code=202 +) async def trigger_task(req: TaskSqsTriggerRequest): """ Creates a Task + SubTask, then pushes the SubTask into SQS so a Lambda can process it. @@ -152,11 +156,12 @@ async def trigger_task(req: TaskSqsTriggerRequest): """ settings = get_settings() + sqs = boto3.client("sqs", settings.AWS_DEFAULT_REGION) tasks = TasksInterface() # ---- Normalize empty inputs ---- - inputs = req.inputs or {} # ensures {} even if null + inputs = req.inputs or {} # ensures {} even if null # ---- 1. Create Task + SubTask ---- task_id, subtask_id = tasks.create_task( @@ -174,8 +179,8 @@ async def trigger_task(req: TaskSqsTriggerRequest): try: response = sqs.send_message( QueueUrl=f"https://sqs.{settings.AWS_REGION}.amazonaws.com/" - f"{settings.AWS_ACCOUNT_ID}/lambda-example-queue", - MessageBody=json.dumps(sqs_payload) + f"{settings.AWS_ACCOUNT_ID}/lambda-example-queue", + MessageBody=json.dumps(sqs_payload), ) except Exception as e: raise HTTPException(status_code=500, detail=f"SQS error: {e}") @@ -186,4 +191,4 @@ async def trigger_task(req: TaskSqsTriggerRequest): "subtask_id": subtask_id, "sqs_message_id": response.get("MessageId"), "inputs_sent": inputs, - } \ No newline at end of file + } diff --git a/backend/categorisation/__init__.py b/backend/categorisation/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/backend/categorisation/categorisation_trigger_request.py b/backend/categorisation/categorisation_trigger_request.py new file mode 100644 index 00000000..62879b5d --- /dev/null +++ b/backend/categorisation/categorisation_trigger_request.py @@ -0,0 +1,17 @@ +from typing import List, Optional +from pydantic import BaseModel + + +class CategorisationTriggerRequest(BaseModel): + portfolio_id: int + + scenarios_to_consider: Optional[List[int]] = None + scenario_priority_order: Optional[List[int]] = None + + min_property_id: Optional[int] = None + max_property_id: Optional[int] = None + + subtask_id: Optional[str] = None + + +# {"portfolio_id": 556, "scenarios_to_consider": [1039,1041], "scenario_priority_order": [1041,1039]} diff --git a/backend/categorisation/handler/Dockerfile b/backend/categorisation/handler/Dockerfile new file mode 100644 index 00000000..0a92eaba --- /dev/null +++ b/backend/categorisation/handler/Dockerfile @@ -0,0 +1,42 @@ +FROM public.ecr.aws/lambda/python:3.11 +# For local running: +# FROM python:3.11.10-bullseye + +ARG DEV_DB_HOST +ARG DEV_DB_PORT +ARG DEV_DB_NAME + + +# Set working directory (Lambda task root) +WORKDIR /var/task + +# Environment +ENV DB_HOST=${DEV_DB_HOST} +ENV DB_PORT=${DEV_DB_PORT} +ENV DB_NAME=${DEV_DB_NAME} + +COPY backend/.env.test backend/.env + +# ----------------------------- +# Copy requirements FIRST (for Docker layer caching) +# ----------------------------- +COPY backend/categorisation/handler/requirements.txt . + +# Install dependencies into Lambda runtime +RUN pip install --no-cache-dir -r requirements.txt + +# ----------------------------- +# Copy application code +# ----------------------------- +COPY utils/ utils/ +# NOTE: if build is ever slow we can be more specific with which files are copied +COPY backend/ backend/ +COPY datatypes/ datatypes/ + + +# ----------------------------- +# Lambda handler +# ----------------------------- +CMD ["backend/categorisation/handler/handler.handler"] +# For local running +# CMD ["python", "-m", "backend.categorisation.handler.handler"] diff --git a/backend/categorisation/handler/handler.py b/backend/categorisation/handler/handler.py new file mode 100644 index 00000000..a1f69ea6 --- /dev/null +++ b/backend/categorisation/handler/handler.py @@ -0,0 +1,34 @@ +import json +import time +from typing import Any, Mapping + +from backend.app.db.functions.tasks.Tasks import SubTaskInterface +from backend.app.plan.utils import build_cloudwatch_log_url +from backend.categorisation.categorisation_trigger_request import ( + CategorisationTriggerRequest, +) +from backend.categorisation.processor import process_portfolio +from utils.logger import setup_logger + + +logger = setup_logger() + + +def handler(event: Mapping[str, Any], context: Any) -> None: + + logger.info("Received message") + + logger.info(f"Number of events: {len(event.get('Records', []))}") + + for record in event.get("Records", []): + try: + body_dict = json.loads(record["body"]) + logger.debug("Validating request body") + payload = CategorisationTriggerRequest.model_validate(body_dict) + + logger.debug("Successfully validated request body") + + process_portfolio(payload) + except Exception as e: + logger.info("Handler exception") + logger.error(f"Failed to process record: {e}") diff --git a/backend/categorisation/handler/requirements.txt b/backend/categorisation/handler/requirements.txt new file mode 100644 index 00000000..6e737772 --- /dev/null +++ b/backend/categorisation/handler/requirements.txt @@ -0,0 +1,10 @@ +sqlmodel +pydantic-settings +psycopg2-binary==2.9.10 +starlette + +# Not used but needed to satisfy imports +pytz==2024.2 +msgpack==1.1.0 +numpy<2 +pandas==2.2.3 \ No newline at end of file diff --git a/backend/categorisation/local_handler/docker-compose.yml b/backend/categorisation/local_handler/docker-compose.yml new file mode 100644 index 00000000..9529fdb2 --- /dev/null +++ b/backend/categorisation/local_handler/docker-compose.yml @@ -0,0 +1,11 @@ +version: "3.9" + +services: + categorisation-lambda: + build: + context: ../../../ + dockerfile: backend/categorisation/handler/Dockerfile + ports: + - "9000:8080" + env_file: + - ../../../.env \ No newline at end of file diff --git a/backend/categorisation/local_handler/invoke_local_lambda.py b/backend/categorisation/local_handler/invoke_local_lambda.py new file mode 100644 index 00000000..5aa82846 --- /dev/null +++ b/backend/categorisation/local_handler/invoke_local_lambda.py @@ -0,0 +1,31 @@ +#!/usr/bin/env python3 +import json +import requests + +HOST = "localhost" +PORT = "9000" + +LAMBDA_URL = f"http://{HOST}:{PORT}/2015-03-31/functions/function/invocations" + +payload = { + "Records": [ + { + "body": json.dumps( + { + "portfolio_id": 569, + "scenarios_to_consider": [], + "scenario_priority_order": [], + "min_property_id": 660418, + "max_property_id": 660917, + "subtask_id": "6a0bcbac-ddab-435f-8708-8acd4662b067", + } + ) + } + ] +} + +response = requests.post(LAMBDA_URL, json=payload) + +print("Status code:", response.status_code) +print("Response:") +print(response.text) diff --git a/backend/categorisation/local_runner.py b/backend/categorisation/local_runner.py new file mode 100644 index 00000000..384ce5ef --- /dev/null +++ b/backend/categorisation/local_runner.py @@ -0,0 +1,24 @@ +from typing import List + +from backend.categorisation.categorisation_trigger_request import ( + CategorisationTriggerRequest, +) +from backend.categorisation.processor import process_portfolio + + +def main() -> None: + portfolio_id = 556 + scenarios_to_consider: List[int] = [] + scenario_priority_order: List[int] = [] + + process_portfolio( + CategorisationTriggerRequest( + portfolio_id=portfolio_id, + scenarios_to_consider=scenarios_to_consider, + scenario_priority_order=scenario_priority_order, + ) + ) + + +if __name__ == "__main__": + main() diff --git a/backend/categorisation/processor.py b/backend/categorisation/processor.py new file mode 100644 index 00000000..88bc121e --- /dev/null +++ b/backend/categorisation/processor.py @@ -0,0 +1,261 @@ +import time +from collections import defaultdict +from typing import Dict, List, Optional +from uuid import UUID +from starlette.responses import Response + +from backend.app.db.functions.recommendations_functions import ( + bulk_update_plans, + get_default_plans, + get_most_recent_plans_by_portfolio_id, + get_most_recent_plans_by_scenario_ids, + get_scenarios_by_portfolio_id, +) +from backend.app.db.functions.tasks.Tasks import SubTaskInterface +from backend.app.db.models.recommendations import PlanModel, ScenarioModel +from backend.app.domain.classes.plan import Plan +from backend.app.domain.classes.scenario import Scenario +from backend.app.plan.utils import build_cloudwatch_log_url, handle_error +from backend.categorisation.categorisation_trigger_request import ( + CategorisationTriggerRequest, +) +from utils.logger import setup_logger + +logger = setup_logger() + + +def process_portfolio( + body: CategorisationTriggerRequest, +) -> Response: # TODO: make this a class + portfolio_id: int = body.portfolio_id + scenarios_to_consider: Optional[List[int]] = body.scenarios_to_consider + scenario_priority_order: Optional[List[int]] = body.scenario_priority_order + min_property_id: Optional[int] = body.min_property_id + max_property_id: Optional[int] = body.max_property_id + subtask_id: Optional[str] = body.subtask_id + + logger.info(f"Processing portfolio {portfolio_id}") + start_ms = int(time.time() * 1000) + cloud_logs_url = build_cloudwatch_log_url(start_ms) + + if body.subtask_id: + SubTaskInterface().update_subtask_status( + subtask_id=UUID(subtask_id), + status="in progress", + cloud_logs_url=cloud_logs_url, + ) + + try: + + all_scenarios: List[Scenario] = _load_scenarios_for_portfolio(portfolio_id) + plans_by_id: Dict[int, Plan] = ( + {} + ) # TODO: make this an in-memory repository class + + if scenarios_to_consider: + if len(scenarios_to_consider) < 2: + raise ValueError( + "Cannot run auto categorisation for fewer than 2 scenarios" + ) + + # first get all plans that we're interested in + plans_for_consideration: List[Plan] = _load_plans_for_portfolio( + portfolio_id, + all_scenarios, + scenarios_to_consider, + min_property_id, + max_property_id, + ) + for plan in plans_for_consideration: + if plan.id is not None: # just in case + plans_by_id[plan.id] = plan + + # then unset existing defaults on domain objects regardless of whether they're under consideration or not + default_plans: List[Plan] = _get_default_plans( + portfolio_id, all_scenarios, min_property_id, max_property_id + ) + for plan in default_plans: + plan.set_default(False) + if plan.id is not None: # just in case + plans_by_id[plan.id] = plan + + logger.info(f"Successfully unset {len(default_plans)} default plan(s)") + + # then set new defaults on domain objects under consideration + plans_for_consideration_by_property: Dict[int, List[Plan]] = ( + _group_plans_by_property(plans_for_consideration) + ) + + for property_id, property_plans in plans_for_consideration_by_property.items(): + if not property_plans: + raise ValueError(f"No plans for property {property_id}") + + try: + cheapest_plan = choose_cheapest_relevant_plan( + property_plans, scenario_priority_order + ) + except Exception: + logger.error(f"Failed to find cheapest plan for property {property_id}") + raise + + property_plans = _update_plan_objects(property_plans, cheapest_plan) + for plan in property_plans: + if plan.id is not None: # just in case + plans_by_id[plan.id] = plan + + logger.info("Successfully set defaults on Plan objects in memory") + + # then pass all domain objects to database to update (regardless of whether they've changed) + _update_plans_in_db(list(plans_by_id.values())) + + # Mark the subtask as successful + logger.info(f"Successfully updated {len(plans_by_id)} Plans in database") + if body.subtask_id: + SubTaskInterface().update_subtask_status( + subtask_id=UUID(subtask_id), + status="complete", + cloud_logs_url=cloud_logs_url, + ) + + return Response(status_code=200) + except Exception as e: + if subtask_id: + return handle_error( + "Exception during Categorisation processing.", + e, + subtask_id, + 500, + start_ms, + ) + + raise + + +def choose_cheapest_relevant_plan( + plans: List[Plan], scenario_priority_order: Optional[List[int]] = None +) -> Plan: + scenario_priority_order = scenario_priority_order or [] + + eligible_plans: List[Plan] = [plan for plan in plans if plan.is_compliant] or plans + if not eligible_plans: + raise ValueError("No plans available to choose from.") + + for plan in eligible_plans: + if plan.id is None: + # This should never actually happen, but plan.id is optional to cater + # for new plans. We are only working with already persisted plans here + raise ValueError( + f"All plans must have an ID, but found a plan with no ID: {plan}" + ) + + min_cost: float = min(plan.cost for plan in eligible_plans) + + cheapest_plans: List[Plan] = [ + plan for plan in eligible_plans if plan.cost == min_cost + ] + + for priority_scenario_id in scenario_priority_order: + for plan in cheapest_plans: + if plan.scenario.id == priority_scenario_id: + return plan + + return cheapest_plans[0] + + +def _get_default_plans( + portfolio_id: int, + scenarios: List[Scenario], + min_property_id: Optional[int] = None, + max_property_id: Optional[int] = None, +) -> List[Plan]: + default_plan_models = get_default_plans( + portfolio_id, min_property_id, max_property_id + ) + + scenario_map = {s.id: s for s in scenarios} + + return [ + Plan.from_sqlalchemy(p, scenario_map[p.scenario_id]) + for p in default_plan_models + if p.scenario_id in scenario_map + ] + + +def _load_scenarios_for_portfolio(portfolio_id: int) -> List[Scenario]: + scenario_models: List[ScenarioModel] = get_scenarios_by_portfolio_id(portfolio_id) + + return [Scenario.from_sqlalchemy(s) for s in scenario_models] + + +def _load_plans_for_portfolio( + portfolio_id: int, + all_scenarios: List[Scenario], + scenarios_to_consider: Optional[List[int]] = None, + min_property_id: Optional[int] = None, + max_property_id: Optional[int] = None, +) -> List[Plan]: + + if scenarios_to_consider: + logger.info(f"Getting plans for {len(scenarios_to_consider)} scenarios") + plan_models: List[PlanModel] = get_most_recent_plans_by_scenario_ids( + scenarios_to_consider, min_property_id, max_property_id + ) + logger.info(f"Got {len(plan_models)} plan models from database") + else: + logger.info( + f"No list of Plans to consider provided. Getting all Plans for portfolio {portfolio_id}" + ) + plan_models: List[PlanModel] = get_most_recent_plans_by_portfolio_id( + portfolio_id, min_property_id, max_property_id + ) + + plans: List[Plan] = [] + + if not all_scenarios: + raise Exception(f"No scenarios found for Portfolio {portfolio_id}") + + for model in plan_models: + + scenario = next((s for s in all_scenarios if s.id == model.scenario_id)) + if not scenario: + logger.info(f"No Scenario associated with Plan of ID {model.id}") + continue + + plans.append(Plan.from_sqlalchemy(model, scenario)) + + logger.info(f"Got {len(plans)} Plans") + return plans + + +def _group_plans_by_property(plans: List[Plan]) -> Dict[int, List[Plan]]: + grouped: dict[int, List[Plan]] = defaultdict(list) + + for plan in plans: + grouped[plan.record.property_id].append(plan) + + return grouped + + +def _update_plan_objects(plans: List[Plan], cheapest_plan: Plan) -> List[Plan]: + for plan in plans: + should_be_default: bool = plan.id == cheapest_plan.id + plan.set_default(should_be_default) + + if should_be_default: + logger.debug( + f"Setting Plan {plan.id} (Scenario Name: {plan.scenario.record.name}) to default" + ) + + return plans + + +def _update_plans_in_db(plans: List[Plan]) -> None: + plan_models: List[PlanModel] = [] + scenario_models: List[ScenarioModel] = [] + + for plan in plans: + plan_model, scenario_model = plan.to_sqlalchemy() + plan_models.append(plan_model) + scenario_models.append(scenario_model) + + bulk_update_plans(plan_models, scenario_models) diff --git a/backend/categorisation/tests/test_plan_is_compliant.py b/backend/categorisation/tests/test_plan_is_compliant.py new file mode 100644 index 00000000..62756652 --- /dev/null +++ b/backend/categorisation/tests/test_plan_is_compliant.py @@ -0,0 +1,73 @@ +from typing import Callable +import pytest +from datetime import datetime + +from backend.app.domain.classes.plan import Plan +from backend.app.domain.classes.scenario import Scenario +from backend.app.domain.records.plan_record import PlanRecord +from backend.app.domain.records.scenario_record import ScenarioRecord +from backend.app.db.models.portfolio import Epc, PortfolioGoal + + +@pytest.fixture +def created_at_datetime() -> datetime: + return datetime.now() + + +@pytest.fixture +def epc_c_scenario(created_at_datetime: datetime) -> "Scenario": + # arrange + scenario_record = ScenarioRecord( + name="EPC C", + created_at=created_at_datetime, + housing_type="", + goal=PortfolioGoal.INCREASING_EPC, + goal_value="C", + trigger_file_path="", + multi_plan=False, + is_default=False, + ) + return Scenario(record=scenario_record, id=1) + + +@pytest.fixture +def plan_factory( + epc_c_scenario: "Scenario", created_at_datetime: datetime +) -> Callable[[int, "Epc"], "Plan"]: + # returns a function to create plans with different attributes + def _create_plan(post_sap_points: int, post_epc_rating: "Epc") -> "Plan": + plan_record = PlanRecord( + property_id=1, + portfolio_id=1, + created_at=created_at_datetime, + is_default=False, + post_sap_points=post_sap_points, + post_epc_rating=post_epc_rating, + ) + return Plan(record=plan_record, scenario=epc_c_scenario, id=1) + + return _create_plan + + +@pytest.mark.parametrize( + "post_sap_points, post_epc_rating, expected_compliance", + [ + (75, Epc.C, True), + (100, Epc.A, True), + (60, Epc.D, False), + ], +) +def test_scenario_goal_is_epc_c( + plan_factory: Callable[[int, "Epc"], "Plan"], + post_sap_points: int, + post_epc_rating: "Epc", + expected_compliance: bool, +) -> None: + # arrange + plan = plan_factory(post_sap_points, post_epc_rating) + + # act + actual_compliance: bool = plan.is_compliant + + # assert + assert actual_compliance == expected_compliance diff --git a/backend/categorisation/tests/test_prioritised_plan_selected.py b/backend/categorisation/tests/test_prioritised_plan_selected.py new file mode 100644 index 00000000..a9529a53 --- /dev/null +++ b/backend/categorisation/tests/test_prioritised_plan_selected.py @@ -0,0 +1,160 @@ +from datetime import datetime +from typing import List, Optional +import pytest + +from backend.app.domain.classes.plan import Plan +from backend.app.domain.classes.scenario import Scenario +from backend.app.domain.records.plan_record import PlanRecord +from backend.app.domain.records.scenario_record import ScenarioRecord +from backend.app.db.models.portfolio import Epc, PortfolioGoal +from backend.categorisation.processor import choose_cheapest_relevant_plan + + +@pytest.fixture +def created_at_datetime() -> datetime: + return datetime.now() + + +def make_plan_record( + created_at: datetime, default: bool, cost_of_works: Optional[float] = 500.0 +) -> PlanRecord: + return PlanRecord( + property_id=1, + portfolio_id=1, + created_at=created_at, + is_default=default, + post_epc_rating=Epc.C, + cost_of_works=cost_of_works, + ) + + +def make_scenario(name: str, created_at: datetime, is_default: bool) -> Scenario: + record = ScenarioRecord( + name=name, + created_at=created_at, + housing_type="", + goal=PortfolioGoal.INCREASING_EPC, + goal_value="C", + trigger_file_path="", + multi_plan=False, + is_default=is_default, + ) + return Scenario(record=record, id=3 if is_default else 4) + + +def make_plan( + created_at: datetime, + default: bool, + cost_of_works: Optional[float] = 500.0, + name: str = "", +) -> Plan: + scenario = make_scenario(name, created_at, default) + plan_id = 1 if default else 2 + return Plan( + record=make_plan_record(created_at, default, cost_of_works), + scenario=scenario, + id=plan_id, + ) + + +def test_prioritised_scenario_selected(created_at_datetime: datetime) -> None: + # arrange + epc_c_plan = make_plan(created_at_datetime, True, name="EPC C") + minor_works_plan = make_plan(created_at_datetime, False, name="EPC C - Minor Works") + scenario_priority_order: List[int] = [4, 3] + expected_default_plan_id = 2 + + # act + actual_default_plan = choose_cheapest_relevant_plan( + plans=[epc_c_plan, minor_works_plan], + scenario_priority_order=scenario_priority_order, + ) + + # assert + assert actual_default_plan.id == expected_default_plan_id + + +def test_cheapest_plan_returned_if_not_in_priority_list( + created_at_datetime: datetime, +) -> None: + # arrange + epc_c_plan = make_plan( + created_at_datetime, True, cost_of_works=1000.0, name="EPC C" + ) + minor_works_plan = make_plan( + created_at_datetime, False, cost_of_works=100.0, name="EPC C - Minor Works" + ) + scenario_priority_order: List[int] = [3, 5] + expected_default_plan_id = 2 + + # act + actual_default_plan = choose_cheapest_relevant_plan( + plans=[epc_c_plan, minor_works_plan], + scenario_priority_order=scenario_priority_order, + ) + + # assert + assert actual_default_plan.id == expected_default_plan_id + + +def test_all_plans_zero_cost__highest_priority_returned( + created_at_datetime: datetime, +) -> None: + # arrange + epc_c_plan = make_plan(created_at_datetime, True, cost_of_works=0.0, name="EPC C") + minor_works_plan = make_plan( + created_at_datetime, False, cost_of_works=0.0, name="EPC C - Minor Works" + ) + scenario_priority_order: List[int] = [4, 3] + expected_default_plan_id = 2 + + # act + actual_default_plan = choose_cheapest_relevant_plan( + plans=[epc_c_plan, minor_works_plan], + scenario_priority_order=scenario_priority_order, + ) + + # assert + assert actual_default_plan.id == expected_default_plan_id + + +def test_some_plans_zero_cost__cheapest_returned( + created_at_datetime: datetime, +) -> None: + # arrange + epc_c_plan = make_plan(created_at_datetime, True, cost_of_works=0.0, name="EPC C") + minor_works_plan = make_plan( + created_at_datetime, False, cost_of_works=50.0, name="EPC C - Minor Works" + ) + scenario_priority_order: List[int] = [4, 3] + expected_default_plan_id = 1 + + # act + actual_default_plan = choose_cheapest_relevant_plan( + plans=[epc_c_plan, minor_works_plan], + scenario_priority_order=scenario_priority_order, + ) + + # assert + assert actual_default_plan.id == expected_default_plan_id + + +def test_all_plans_null_cost__highest_priority_returned( + created_at_datetime: datetime, +) -> None: + # arrange + epc_c_plan = make_plan(created_at_datetime, True, cost_of_works=None, name="EPC C") + minor_works_plan = make_plan( + created_at_datetime, False, cost_of_works=None, name="EPC C - Minor Works" + ) + scenario_priority_order: List[int] = [4, 3] + expected_default_plan_id = 2 + + # act + actual_default_plan = choose_cheapest_relevant_plan( + plans=[epc_c_plan, minor_works_plan], + scenario_priority_order=scenario_priority_order, + ) + + # assert + assert actual_default_plan.id == expected_default_plan_id diff --git a/backend/condition/condition_trigger_request.py b/backend/condition/condition_trigger_request.py index 03bd6ad1..daa82949 100644 --- a/backend/condition/condition_trigger_request.py +++ b/backend/condition/condition_trigger_request.py @@ -29,5 +29,5 @@ class ConditionTriggerRequest(BaseModel): # { # "file_type": "LBWF", # "trigger_file_bucket": "condition-data-dev", -# "trigger_file_key": "input/lbwf/LBWF - Example Asset Data September 2025.xlsx", +# "trigger_file_key": "input/lbwf/LBWF - Example Asset Data September 2025.xlsx" # } diff --git a/backend/docker-compose-local-lambdas.yml b/backend/docker-compose-local-lambdas.yml new file mode 100644 index 00000000..50e9193b --- /dev/null +++ b/backend/docker-compose-local-lambdas.yml @@ -0,0 +1,11 @@ +version: "3.9" + +services: + categorisation-lambda: + build: + context: ../ + dockerfile: backend/categorisation/handler/Dockerfile + ports: + - "9000:8080" + env_file: + - ../.env \ No newline at end of file diff --git a/backend/engine/engine.py b/backend/engine/engine.py index d1b6faba..bb465d4c 100644 --- a/backend/engine/engine.py +++ b/backend/engine/engine.py @@ -1191,14 +1191,18 @@ async def model_engine(body: PlanTriggerRequest): property_required_measures = [m for m in recommendations[p.id] if m[0]["type"] in body.required_measures] measures_to_optimise = [m for m in recommendations[p.id] if m[0]["type"] not in body.required_measures] - ventilation_included = "ventilation" in property_measure_types + # TODO - formalise property measure types into an enum + ventilation_included = ( + "ventilation" in property_measure_types or "mechanical_ventilation" in property_measure_types + ) # If a measure requiring ventilation is selected, and the property does not have ventilation, we enfore # its inclusion - needs_ventilation = any( - x in property_measure_types for x in assumptions.measures_needing_ventilation - ) and not p.has_ventilation and ventilation_included + needs_ventilation = optimiser_functions.check_needs_ventilation( + property_measure_types, assumptions.measures_needing_ventilation, p.has_ventilation, + ventilation_included + ) if not measures_to_optimise: # Nothing to do, we just reshape the recommendations @@ -1315,7 +1319,7 @@ async def model_engine(body: PlanTriggerRequest): recommendations=recommendations, selected=selected, ) - # Add best practice measures (ventilation/trickle vents) + # Add best practice measures (ventilation/trickle vents) - pass needs_ventilation flag selected = optimiser_functions.add_best_practice_measures(p.id, solution, recommendations, selected) # Final flattening - we pass what the battery SAP score would be, regardless if the battery was selected recommendations[p.id] = optimiser_functions.flatten_recommendations_with_defaults( diff --git a/backend/export/README.md b/backend/export/README.md new file mode 100644 index 00000000..b5715ced --- /dev/null +++ b/backend/export/README.md @@ -0,0 +1,169 @@ +# 🧪 Running Tests in PyCharm (macOS + pytest-postgresql) + +Our test suite uses `pytest` and `pytest-postgresql`, which +automatically spins up a temporary PostgreSQL instance. + +On Linux (including GitHub Actions), PostgreSQL binaries are installed +in standard system locations.\ +On macOS (Homebrew), they are not --- so PyCharm needs a small +configuration tweak to locate `pg_ctl`. + +This guide explains how to run and debug tests locally in PyCharm +without modifying test code. + +------------------------------------------------------------------------ + +## ✅ Prerequisites + +### Devcontainer + +Postgres install is included in the devcontainer, so no additional setup is needed. + +Running + +```bash +make test +``` + +Will instigate the test suite, which will automatically start a temporary PostgreSQL instance. + +### Local MacOS + +1. Install PostgreSQL via Homebrew: + +``` bash +brew install postgresql +``` + +2. Confirm `pg_ctl` exists: + +``` bash +which pg_ctl +``` + +Typical output: + + /opt/homebrew/bin/pg_ctl + +------------------------------------------------------------------------ + +# 🚀 Running Tests in PyCharm + +## Step 1 --- Create a PyCharm pytest Run Configuration + +1. Open the test file. +2. Click the green ▶ next to the test. +3. Choose **"Edit Run Configuration..."** + +You should see something like: + +- **Target:** `backend/export/tests/test_export.py` +- **Working directory:** Project root (e.g.`Model/`) + +------------------------------------------------------------------------ + +## Step 2 --- Add Required Override (macOS Only) + +In the Run Configuration: + +### ➜ "Additional Arguments" + +Add: + + --override-ini=postgresql_exec=/opt/homebrew/bin/pg_ctl + +This tells `pytest-postgresql` where `pg_ctl` lives on macOS. + +Without this, PyCharm may fail with: + + ExecutableMissingException: Could not found pg_config executable + +------------------------------------------------------------------------ + +## Step 3 --- Run or Debug + +You can now: + +- Click ▶ Run\ +- Click 🐞 Debug\ +- Set breakpoints normally + +The temporary PostgreSQL instance will start automatically. + +------------------------------------------------------------------------ + +# 🔍 Why This Is Needed + +`pytest-postgresql` defaults to a Linux-style path: + + /usr/lib/postgresql//bin/pg_ctl + +That path exists on Ubuntu (CI), but not on macOS. + +On macOS, Homebrew installs PostgreSQL in: + + /opt/homebrew/bin/ + +The `--override-ini` flag safely overrides the executable path +**locally**, without modifying: + +- test files\ +- `conftest.py`\ +- `pytest.ini`\ +- CI configuration + +This ensures: + +- ✅ Tests still work in GitHub Actions\ +- ✅ Tests still work for Linux users\ +- ✅ macOS developers can debug in PyCharm\ +- ✅ No repository-specific hacks are required + +------------------------------------------------------------------------ + +# 🛠 Optional: Using a Local `.env` File + +If you prefer not to hardcode the override in the run configuration: + +1. Create a local file: + +```{=html} + +``` + + .env.local + +2. Add: + +```{=html} + +``` + + PYTEST_ADDOPTS=--override-ini=postgresql_exec=/opt/homebrew/bin/pg_ctl + +3. In PyCharm: + - Open the Run Configuration + - Add `.env.local` under **"Paths to .env files"** + +------------------------------------------------------------------------ + +# 🧪 Running Tests via Terminal (Recommended for CI Parity) + +For normal execution outside PyCharm: + +``` bash +make test +``` + +These already work without additional configuration. + +------------------------------------------------------------------------ + +# 🧠 Summary + +Environment Works Without Override? Needs `--override-ini`? + ------------------------ ------------------------- ------------------------- +GitHub Actions (Linux) ✅ Yes ❌ No +Linux local ✅ Yes ❌ No +macOS terminal (tox) ✅ Yes ❌ No +macOS PyCharm debugger ❌ No ✅ Yes diff --git a/backend/export/property_scenarios/db_functions.py b/backend/export/property_scenarios/db_functions.py new file mode 100644 index 00000000..e9b3d7e3 --- /dev/null +++ b/backend/export/property_scenarios/db_functions.py @@ -0,0 +1,227 @@ +from typing import List, Any, Dict, Optional, Tuple, Sequence +import pandas as pd +from sqlalchemy import select +from sqlalchemy.orm import Session +from sqlalchemy.engine import Row +from collections import defaultdict + +from backend.app.db.models.recommendations import ( + Recommendation, + PlanModel, + PlanRecommendations, + RecommendationMaterials, +) +from backend.app.db.models.portfolio import ( + PropertyModel, + PropertyDetailsEpcModel, +) +from backend.app.db.models.materials import Material +from utils.logger import setup_logger + +logger = setup_logger() + + +class DbMethods: + + def __init__(self, session: Session) -> None: + self.session = session + + def get_properties(self, portfolio_id: int) -> pd.DataFrame: + """ + Function to fetch the property data, for property scenario exports + :param portfolio_id: + :return: + """ + stmt = ( + select(PropertyModel, PropertyDetailsEpcModel) + .join( + PropertyDetailsEpcModel, + PropertyModel.id == PropertyDetailsEpcModel.property_id, + ) + .where(PropertyModel.portfolio_id == portfolio_id) + ) + + rows: Sequence[Row[Tuple[PropertyModel, PropertyDetailsEpcModel]]] = ( + self.session.execute(stmt).all() + ) + + data: List[Dict[str, Any]] = [ + { + **{ + col.name: getattr(property_model, col.name) + for col in PropertyModel.__table__.columns.values() + }, + **{ + col.name: getattr(epc_model, col.name) + for col in PropertyDetailsEpcModel.__table__.columns.values() + }, + } + for property_model, epc_model in rows + ] + + return pd.DataFrame(data) + + def get_latest_plans( + self, + portfolio_id: int, + scenario_ids: Optional[List[int]] = None, + default_only: bool = False, + ) -> pd.DataFrame: + """ + Fetch latest plans. + + Modes: + 1) Scenario mode: latest per (scenario_id, property_id) + 2) Default mode: latest default plan per property (ignores scenario_ids) + + """ + + # ----------------------------- + # Sanity checks + # ----------------------------- + if default_only and scenario_ids: + # Override scenario_ids to make it explicit that they will be ignored in the query + scenario_ids = None + + if not default_only and not scenario_ids: + raise ValueError( + "Either scenario_ids must be provided " + "or default_only must be True." + ) + + # ----------------------------- + # Filter on just the default plans - we ignore the scenario ids. NOTE - this is specific to postgres + # and relies on DISTINCT ON behaviour. + # ----------------------------- + if default_only: + # Latest default plan per property (ignore scenarios entirely) + # DISTINCT ON (property_id) keeps the first row per property, + # ordered by created_at DESC so we get the newest one. + + stmt = ( + select(PlanModel) + .where( + PlanModel.portfolio_id == portfolio_id, + PlanModel.is_default.is_(True), + ) + .distinct(PlanModel.property_id) + .order_by( + PlanModel.property_id, + PlanModel.created_at.desc(), + ) + ) + + else: + # Latest plan per (scenario_id, property_id) + # DISTINCT ON (scenario_id, property_id) keeps the newest + # plan per scenario/property combination. + + assert scenario_ids is not None + + stmt = ( + select(PlanModel) + .where( + PlanModel.portfolio_id == portfolio_id, + PlanModel.scenario_id.in_(scenario_ids), + ) + .distinct( + PlanModel.scenario_id, + PlanModel.property_id, + ) + .order_by( + PlanModel.scenario_id, + PlanModel.property_id, + PlanModel.created_at.desc(), + ) + ) + + logger.info("Fetching plans") + + plans: Sequence[PlanModel] = self.session.scalars(stmt).all() + + return pd.DataFrame( + [ + { + col.name: getattr(plan, col.name) + for col in PlanModel.__table__.columns.values() + } + for plan in plans + ] + ) + + def get_recommendations(self, plan_ids: List[int]) -> pd.DataFrame: + + if not plan_ids: + logger.info("No plan ids provided") + return pd.DataFrame() + + stmt = ( + select(Recommendation, PlanModel.scenario_id, PlanModel.name) + .join( + PlanRecommendations, + Recommendation.id == PlanRecommendations.recommendation_id, + ) + .join(PlanModel, PlanModel.id == PlanRecommendations.plan_id) + .where( + PlanRecommendations.plan_id.in_(plan_ids), + Recommendation.default.is_(True), + Recommendation.already_installed.is_(False), + ) + ) + + rows: Sequence[Tuple[Recommendation, Optional[int], Optional[str]]] = ( + self.session.execute(stmt).tuples().all() + ) + + data: List[Dict[str, Any]] = [ + { + **{ + col.name: getattr(rec_model, col.name) + for col in Recommendation.__table__.columns.values() + }, + "scenario_id": scenario_id, + "plan_name": plan_name, + } + for rec_model, scenario_id, plan_name in rows + ] + + return pd.DataFrame(data) + + def attach_materials(self, recommendations_df: pd.DataFrame) -> pd.DataFrame: + + if recommendations_df.empty: + recommendations_df["materials"] = [] + return recommendations_df + + rec_ids: List[int] = recommendations_df["id"].astype(int).tolist() + + stmt = ( + select(RecommendationMaterials, Material) + .join(Material, RecommendationMaterials.material_id == Material.id) + .where(RecommendationMaterials.recommendation_id.in_(rec_ids)) + ) + + rows: Sequence[Tuple[RecommendationMaterials, Material]] = ( + self.session.execute(stmt).tuples().all() + ) + + materials_map: Dict[int, List[Dict[str, Any]]] = defaultdict(list) + + for rec_mat, material in rows: + materials_map[rec_mat.recommendation_id].append( + { + "material_id": rec_mat.material_id, + "depth": rec_mat.depth, + "quantity": rec_mat.quantity, + "quantity_unit": rec_mat.quantity_unit, + "estimated_cost": rec_mat.estimated_cost, + "type": material.type.value if material.type else None, + "includes_battery": material.includes_battery, + } + ) + + recommendations_df["materials"] = recommendations_df["id"].astype(int).apply( + lambda x: materials_map.get(x, []) + ) + + return recommendations_df diff --git a/backend/export/property_scenarios/input_schema.py b/backend/export/property_scenarios/input_schema.py new file mode 100644 index 00000000..f6fa5965 --- /dev/null +++ b/backend/export/property_scenarios/input_schema.py @@ -0,0 +1,40 @@ +from typing import Optional, Union, List +from pydantic import BaseModel, model_validator, PrivateAttr + + +class ExportRequest(BaseModel): + # uuid which maps to a specific export request, used for tracking and logging + task_id: Union[str, None] + # uuid which maps to a specific export operation, used for tracking and logging. subtask is the child of the + # task, where the work has been distributed across workers + subtask_id: Union[str, None] + # associated portfolio id for the export request + portfolio_id: int + # list of scenario ids to export + scenario_ids: List[int] + # boolean which will overwrite the scenario ids. If this is true, we will only export the default plan for each + # property and will ignore the scenario ids + default_plans_only: Optional[bool] = False + + # Private attribute to indicate whether scenario_ids should be ignored due to default_plans_only being True + _scenario_ids_ignored: bool = PrivateAttr(default=False) + + @model_validator(mode="after") + def validate_default_plan_override(self): + """ + If default_plans_only is True and scenario_ids were provided, + we allow execution but make it explicit that scenario_ids + will be ignored. + """ + if self.default_plans_only and self.scenario_ids: + # We do NOT raise — we allow execution. + # We just mark the object so the handler can log/return a warning. + object.__setattr__(self, "_scenario_ids_ignored", True) + else: + object.__setattr__(self, "_scenario_ids_ignored", False) + + return self + + @property + def scenario_ids_ignored(self) -> bool: + return self._scenario_ids_ignored diff --git a/backend/export/property_scenarios/main.py b/backend/export/property_scenarios/main.py new file mode 100644 index 00000000..d38db4c9 --- /dev/null +++ b/backend/export/property_scenarios/main.py @@ -0,0 +1,179 @@ +import json +from typing import Optional, Any, Mapping, Dict, Union, List + +import pandas as pd +from sqlalchemy.orm import Session + +from backend.export.property_scenarios.input_schema import ExportRequest +from backend.export.property_scenarios.db_functions import DbMethods +from backend.app.db.connection import db_read_session +from backend.app.utils import sap_to_epc +from utils.logger import setup_logger + +logger = setup_logger() + + +def choose_group_keys(payload: ExportRequest) -> List[Union[int, str]]: + if payload.default_plans_only: + return ["default_plans"] # Single export, no scenario grouping + return payload.scenario_ids + + +def has_solar_with_battery(materials_list: Optional[List[Dict[str, Any]]]) -> bool: + """ + Simple check to determine if any material in the list is a solar PV measure that includes a battery. + :param materials_list: + :return: + """ + for m in materials_list or []: + if ( + m.get("type") == "solar_pv" + and m.get("includes_battery") is True + ): + return True + return False + + +def process_export(payload: ExportRequest, session: Session) -> Dict[Union[str, int], pd.DataFrame]: + export_files: Dict[Union[str, int], pd.DataFrame] = {} + + db_methods = DbMethods(session) + + properties_df = db_methods.get_properties(payload.portfolio_id) + + logger.info("Retrieved %s properties for export", len(properties_df)) + + plans_df: pd.DataFrame = db_methods.get_latest_plans( + portfolio_id=payload.portfolio_id, + scenario_ids=payload.scenario_ids, + default_only=bool(payload.default_plans_only), + ) + + logger.info("Retrieved %s plans for export", len(plans_df)) + + if plans_df.empty: + logger.info("Empty plans dataframe - no plans to export. Returning empty export.") + return export_files + plan_ids: List[int] = plans_df["id"].tolist() + recommendations_df: pd.DataFrame = db_methods.get_recommendations(plan_ids) + + logger.info("Retrieved %s recommendations for export", len(recommendations_df)) + + recommendations_df = db_methods.attach_materials(recommendations_df) + + recommendations_df["has_solar_with_battery"] = ( + recommendations_df["materials"].apply(has_solar_with_battery) + ) + + _filter = ( + (recommendations_df["measure_type"] == "solar_pv") + & (recommendations_df["has_solar_with_battery"]) + ) + + recommendations_df.loc[_filter, "measure_type"] = ( + recommendations_df.loc[_filter, "measure_type"] + "_with_battery" + ) + + group_keys: List[Union[str, int]] = choose_group_keys(payload) + + for group_key in group_keys: + + if payload.default_plans_only: + scenario_recs = recommendations_df + else: + scenario_recs = recommendations_df[ + recommendations_df["scenario_id"] == group_key + ] + + if scenario_recs.empty: + logger.info("No recommendations found for group_key %s - skipping export for this group", group_key) + continue + + measures_df: pd.DataFrame = scenario_recs[ + ["property_id", "measure_type", "plan_name", "estimated_cost"] + ].drop_duplicates() + + pivot: pd.DataFrame = measures_df.pivot( + index=["property_id", "plan_name"], + columns="measure_type", + values="estimated_cost", + ).reset_index() + + pivot["total_retrofit_cost"] = ( + pivot.drop(columns=["property_id", "plan_name"]).sum(axis=1) + ) + + post_sap: pd.DataFrame = ( + scenario_recs.groupby("property_id")[["sap_points"]] + .sum() + .reset_index() + ) + + df: pd.DataFrame = ( + properties_df.rename(columns={"solar_pv": "existing_solar_pv"}) + .merge(pivot, how="left", on="property_id") + .merge(post_sap, how="left", on="property_id") + ) + + df["sap_points"] = df["sap_points"].fillna(0) + df["predicted_post_works_sap"] = df["current_sap_points"] + df["sap_points"] + df["predicted_post_works_epc"] = df["predicted_post_works_sap"].apply(sap_to_epc) + + export_files[group_key] = df + + return export_files + + +# ============================================================ +# Lambda Handler +# ============================================================ + +def handler(event: Mapping[str, Any], context: Optional[Any]) -> Mapping[str, Union[int, str]]: + """ + Example event: + body_dict = { + "task_id": "test", + "subtask_id": "test", + "portfolio_id": 569, + "scenario_ids": [], + "default_plans_only": True, + } + :param event: Lambda event containing export request details + :param context: Lambda context (not used in this handler but included for completeness) + :return: HTTP response indicating success or failure of the export operation + """ + for record in event.get("Records", []): + try: + body_dict = json.loads(record["body"]) + + logger.debug("Validating request body") + payload = ExportRequest.model_validate(body_dict) + + if payload.scenario_ids_ignored: + logger.warning( + "Received scenario_ids in request body but they will be ignored " + "because default_plans_only is set to True" + ) + + logger.debug("Successfully validated request body") + with db_read_session() as session: + exported_files = process_export(payload, session) + + # TODO: Need to handle the exported files - e.g. upload to s3 and email a presigned url + _ = exported_files + return { + "statusCode": 200, + "body": json.dumps({}), + } + + except Exception as e: + logger.error(f"Failed to process record: {e}") + return { + "statusCode": 500, + "body": json.dumps({"message": "Failed to process export request"}), + } + + return { + "statusCode": 201, + "body": json.dumps({"message": "No records to process"}), + } diff --git a/backend/export/tests/conftest.py b/backend/export/tests/conftest.py new file mode 100644 index 00000000..10bfa971 --- /dev/null +++ b/backend/export/tests/conftest.py @@ -0,0 +1,55 @@ +import pytest +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker +from backend.app.db.base import Base + + +@pytest.fixture(scope="function") +def engine(postgresql): + """ + Create a SQLAlchemy engine bound to the ephemeral + pytest-postgresql database. + """ + + # Build SQLAlchemy URL from psycopg connection info + connection_string = ( + f"postgresql+psycopg://" + f"{postgresql.info.user}:" + f"{postgresql.info.password}@" + f"{postgresql.info.host}:" + f"{postgresql.info.port}/" + f"{postgresql.info.dbname}" + ) + + engine = create_engine(connection_string) + + # Create tables once per test session + Base.metadata.create_all(engine) + + # Yeild will split this function into two phase. 1) setup and 2) teardown, the latter of which will run after all + # tests have completed + yield engine + + # Clean-up after entire test session + Base.metadata.drop_all(engine) + engine.dispose() + + +@pytest.fixture(scope="function") +def db_session(engine): + """ + Provides a clean transactional session per test. + + Rolls back after each test to keep isolation. + """ + + connection = engine.connect() + transaction = connection.begin() + + session = sessionmaker(bind=connection)() + + yield session + + session.close() + transaction.rollback() + connection.close() diff --git a/backend/export/tests/fixtures/plan_recs_569.csv b/backend/export/tests/fixtures/plan_recs_569.csv new file mode 100644 index 00000000..01df3c96 --- /dev/null +++ b/backend/export/tests/fixtures/plan_recs_569.csv @@ -0,0 +1,14 @@ +id,plan_id,recommendation_id +24799722,1604277,24798968 +24799726,1604277,24798972 +24801150,1604367,24800396 +24802703,1604448,24801949 +24802724,1604448,24801970 +24805327,1604577,24804573 +24805397,1604579,24804643 +24805401,1604579,24804647 +24813000,1605111,24812246 +24813002,1605111,24812248 +24813004,1605111,24812250 +24813006,1605112,24812252 +24813009,1605112,24812255 diff --git a/backend/export/tests/fixtures/plans_569.csv b/backend/export/tests/fixtures/plans_569.csv new file mode 100644 index 00000000..7580163f --- /dev/null +++ b/backend/export/tests/fixtures/plans_569.csv @@ -0,0 +1,11 @@ +id,name,portfolio_id,property_id,scenario_id,created_at,is_default,valuation_increase_lower_bound,valuation_increase_upper_bound,valuation_increase_average,plan_type,post_sap_points,post_epc_rating,post_co2_emissions,co2_savings,post_energy_bill,energy_bill_savings,post_energy_consumption,energy_consumption_savings,valuation_post_retrofit,valuation_increase,cost_of_works,contingency_cost +1604277,,569,660478,1060,2026-02-19 16:14:45.560816,True,0.0302,0.07,0.048226666,,71.5,Epc.C,4.1813498,0.71865046,1447.5204,691.6662,15303.688,3276.7622,,,6984.568,1003.9568 +1604448,,569,660529,1060,2026-02-19 16:14:52.052740,True,0.0302,0.07,0.048226666,,70.0,Epc.C,7.32816,1.5818402,2978.734,2314.7651,16558.295,1837.0155,,,13528.6,2844.636 +1604367,,569,660538,1060,2026-02-19 16:14:48.517937,True,0.02,0.03,0.025,,71.0,Epc.C,5.003036,0.43696404,1933.2236,521.5316,19190.531,1883.4657,,,5520.0,828.0 +1604577,,569,660688,1060,2026-02-19 16:15:04.461456,True,0.02,0.03,0.025,,70.0,Epc.C,3.6019807,0.20801921,1610.3181,248.27809,13746.731,896.6345,,,5100.0,765.0 +1604579,,569,660690,1060,2026-02-19 16:15:04.461456,True,0.02,0.03,0.025,,70.0,Epc.C,4.7473392,0.5326607,1867.537,699.7881,18730.615,2527.2231,,,5469.0,825.74 +1605110,,569,660598,1069,2026-02-19 16:18:57.606337,True,0.0,0.0,0.0,,70.0,Epc.C,1.89,0.0,1125.7338,0.0,7268.866,0.0,,,0.0,0.0 +1605111,,569,660599,1069,2026-02-19 16:18:57.606337,True,0.0,0.0,0.0,,68.7,Epc.D,2.02,1.1,1174.9326,319.18213,7748.233,3924.9,,,1218.584,124.0984 +1605080,,569,660448,1069,2026-02-19 16:18:57.581528,True,0.0,0.0,0.0,,71.0,Epc.C,1.79,0.0,1101.9677,0.0,6821.7285,0.0,,,0.0,0.0 +1605112,,569,660600,1069,2026-02-19 16:18:57.606337,True,0.0,0.0,0.0,,64.9,Epc.D,1.89,0.8,1131.3535,172.0886,7241.062,2466.7,,,3885.834,716.7084 +1605404,,569,660652,1069,2026-02-19 16:19:28.383096,True,0.0,0.0,0.0,,71.0,Epc.C,3.18,0.0,1757.515,0.0,11929.814,0.0,,,0.0,0.0 diff --git a/backend/export/tests/fixtures/portfolio_569.csv b/backend/export/tests/fixtures/portfolio_569.csv new file mode 100644 index 00000000..7cbcbab9 --- /dev/null +++ b/backend/export/tests/fixtures/portfolio_569.csv @@ -0,0 +1,2 @@ +id,name,budget,status,goal,cost,number_of_properties,co2_equivalent_savings,energy_savings,energy_cost_savings,property_valuation_increase,rental_yield_increase,total_work_hours,labour_days,created_at,updated_at,epc_breakdown_pre_retrofit,epc_breakdown_post_retrofit,n_units_to_retrofit,co2_per_unit_pre_retrofit,co2_per_unit_post_retrofit,energy_bill_per_unit_pre_retrofit,energy_bill_per_unit_post_retrofit,energy_consumption_per_unit_pre_retrofit,energy_consumption_per_unit_post_retrofit,valuation_improvement_per_unit,cost_per_unit,cost_per_co2_saved,cost_per_sap_point,valuation_return_on_investment +569,Lifespace Rentals - Sample Retrofit Plans,,PortfolioStatus.SCOPING,PortfolioGoal.NONE,,,,,,,,,,2026-02-12 21:23:37.862000+00:00,2026-02-12 21:23:37.862000+00:00,,,,,,,,,,,,,, diff --git a/backend/export/tests/fixtures/properties_569.csv b/backend/export/tests/fixtures/properties_569.csv new file mode 100644 index 00000000..ac1934bd --- /dev/null +++ b/backend/export/tests/fixtures/properties_569.csv @@ -0,0 +1,11 @@ +,id,portfolio_id,creation_status,uprn,landlord_property_id,building_reference_number,status,address,postcode,has_pre_condition_report,has_recommendations,created_at,updated_at,property_type,built_form,local_authority,constituency,number_of_rooms,year_built,tenure,current_epc_rating,current_sap_points,current_valuation,installed_measures_sap_point_adjustment,is_sap_points_adjusted_for_installed_measures,original_sap_points +0,660478,569,PropertyCreationStatus.READY,100090438731.0,BARR052,3460742868.0,PortfolioStatus.ASSESSMENT,"52, Barrack Street",CO1 2LR,True,True,2026-02-12 21:59:02.744427,2026-02-19 16:18:57.941443,House,End-Terrace,Colchester,Colchester,4.0,1900.0,rental (private),Epc.E,53.0,0.0,0.0,False,53.0 +1,660448,569,PropertyCreationStatus.READY,100090678548.0,BOUR110A,10002385993.0,PortfolioStatus.ASSESSMENT,Upper 110a Bournemouth Park Road,SS2 5LS,True,True,2026-02-12 21:59:02.388473,2026-02-19 16:18:57.578330,Flat,Detached,Southend-on-Sea,Rochford and Southend East,2.0,1950.0,Rented (private),Epc.C,71.0,0.0,0.0,False,71.0 +2,660538,569,PropertyCreationStatus.READY,10033423541.0,CHUR099,8188570968.0,PortfolioStatus.ASSESSMENT,"99, Church Road",RM3 0SH,True,True,2026-02-12 21:59:03.203854,2026-02-19 16:19:03.748571,House,Mid-Terrace,Havering,Hornchurch and Upminster,5.0,1900.0,rental (private),Epc.D,58.0,0.0,0.0,False,58.0 +3,660529,569,PropertyCreationStatus.READY,100091596678.0,CHER003,8961772668.0,PortfolioStatus.ASSESSMENT,"3, Brickfield Cottages",SS4 1PP,True,True,2026-02-12 21:59:02.935502,2026-02-19 16:18:55.971569,House,Mid-Terrace,Rochford,Rochford and Southend East,4.0,1900.0,rental (private),Epc.E,41.0,0.0,0.0,False,41.0 +4,660598,569,PropertyCreationStatus.READY,100090663644.0,FLEM049B,10006705876.0,PortfolioStatus.ASSESSMENT,49b Flemming Crescent,SS9 4HR,True,True,2026-02-12 21:59:04.732965,2026-02-19 16:18:57.601893,Flat,Semi-Detached,Southend-on-Sea,,2.0,1930.0,Rented (social),Epc.C,70.0,0.0,0.0,False,70.0 +5,660599,569,PropertyCreationStatus.READY,10012149765.0,FORE003A,9740118668.0,PortfolioStatus.ASSESSMENT,"3a, Forest Avenue",SS1 2HU,True,True,2026-02-12 21:59:04.732965,2026-02-19 16:18:57.601893,Flat,End-Terrace,Southend-on-Sea,Rochford and Southend East,2.0,1930.0,rental (private),Epc.D,56.0,0.0,0.0,False,56.0 +6,660600,569,PropertyCreationStatus.READY,10012149797.0,FORE003GFF,1436818568.0,PortfolioStatus.ASSESSMENT,"3, Forest Avenue",SS1 2HU,True,True,2026-02-12 21:59:04.732965,2026-02-19 16:18:57.601893,Flat,End-Terrace,Southend-on-Sea,Rochford and Southend East,2.0,1900.0,rental (private),Epc.D,59.0,0.0,0.0,False,59.0 +7,660652,569,PropertyCreationStatus.READY,100022668838.0,MANT061,10000429573.0,PortfolioStatus.ASSESSMENT,61 MANTILLA ROAD,SW17 8DY,True,True,2026-02-12 21:59:04.711717,2026-02-19 16:19:28.379512,Flat,Mid-Terrace,Wandsworth,Tooting,4.0,1900.0,Owner-occupied,Epc.C,71.0,0.0,0.0,False,71.0 +8,660690,569,PropertyCreationStatus.READY,100021987220.0,MERR008,9050743578.0,PortfolioStatus.ASSESSMENT,"8, Merritt Road",SE4 1DY,True,True,2026-02-12 21:59:09.459245,2026-02-19 16:19:32.826638,House,Mid-Terrace,Lewisham,"Lewisham, Deptford",6.0,1900.0,owner-occupied,Epc.D,58.0,0.0,0.0,False,58.0 +9,660688,569,PropertyCreationStatus.READY,207158120.0,MEDC048,208210678.0,PortfolioStatus.ASSESSMENT,"48, Medcalf Road",EN3 6HL,True,True,2026-02-12 21:59:09.459245,2026-02-19 16:19:32.826638,House,Mid-Terrace,Enfield,Enfield North,4.0,1900.0,rental (private),Epc.D,61.0,0.0,0.0,False,61.0 diff --git a/backend/export/tests/fixtures/property_details_epc_569.csv b/backend/export/tests/fixtures/property_details_epc_569.csv new file mode 100644 index 00000000..16f48e54 --- /dev/null +++ b/backend/export/tests/fixtures/property_details_epc_569.csv @@ -0,0 +1,11 @@ +,id,property_id,portfolio_id,full_address,lodgement_date,is_expired,total_floor_area,walls,walls_rating,roof,roof_rating,floor,floor_rating,windows,windows_rating,heating,heating_rating,heating_controls,heating_controls_rating,hot_water,hot_water_rating,lighting,lighting_rating,mainfuel,ventilation,solar_pv,solar_hot_water,wind_turbine,floor_height,number_heated_rooms,heat_loss_corridor,unheated_corridor_length,number_of_open_fireplaces,number_of_extensions,number_of_storeys,mains_gas,energy_tariff,primary_energy_consumption,co2_emissions,current_energy_demand,current_energy_demand_heating_hotwater,estimated,sap_05_overwritten,sap_05_score,sap_05_epc_rating,heating_cost_current,hot_water_cost_current,lighting_cost_current,appliances_cost_current,gas_standing_charge,electricity_standing_charge,original_co2_emissions,original_primary_energy_consumption,original_current_energy_demand,original_current_energy_demand_heating_hotwater,installed_measures_co2_adjustment,installed_measures_energy_demand_adjustment,installed_measures_total_energy_bill_adjustment,installed_measures_heat_demand_adjustment,is_epc_adjusted_for_installed_measures +44,1534934,660688,569,"48, Medcalf Road",2018-09-05,False,68.0,"Solid brick, as built, no insulation",1,"Pitched, no insulation",1.0,"Solid, no insulation",,Fully double glazed,4,"Boiler and radiators, mains gas",4,"Programmer, room thermostat and trvs",4,From main system,4,Low energy lighting in all fixed outlets,5,Mains gas not community,natural,0.0,False,0.0,2.55,,False,,0,0,,True,Single,278.0,3.81,14643.366,12185.6,False,False,,,711.0628,139.06198,70.770935,609.7844,128.0785,199.8375,3.81,278.0,14643.366,12185.6,0.0,0.0,0.0,0.0,False +53,1534816,660600,569,"3, Forest Avenue",2020-02-27,False,35.0,"Solid brick, as built, no insulation",1,(another dwelling above),,"Suspended, no insulation",,Fully double glazed,3,"Boiler and radiators, mains gas",4,Programmer and room thermostat,3,From main system,4,Low energy lighting in 83% of fixed outlets,5,Mains gas not community,natural,0.0,False,0.0,2.64,,False,,0,0,,True,Single,389.0,2.69,9707.762,8267.8,False,False,,,466.75378,110.046844,53.1057,345.6198,128.0785,199.8375,2.69,389.0,9707.762,8267.8,0.0,0.0,0.0,0.0,False +292,1534754,660478,569,"52, Barrack Street",2019-09-11,False,67.0,"Solid brick, as built, no insulation",1,"Pitched, no insulation",1.0,"Solid, no insulation",,Partial double glazing,2,"Boiler and radiators, mains gas",4,"Programmer, room thermostat and trvs",4,From main system,4,Low energy lighting in 78% of fixed outlets,5,Mains gas not community,natural,0.0,False,0.0,2.36,,False,,0,1,,True,Single,374.0,4.9,18580.451,16094.1,False,False,,,980.4243,142.37581,86.25319,602.2173,128.0785,199.8375,4.9,374.0,18580.451,16094.1,0.0,0.0,0.0,0.0,False +295,1534868,660652,569,"61 MANTILLA ROAD, LONDON",2020-12-10,False,79.0,"Solid brick, as built, no insulation",1,(another dwelling above),,"Solid, no insulation",,Fully double glazed,3,"Boiler and radiators, mains gas",4,Programmer and room thermostat,3,From main system,4,Low energy lighting in all fixed outlets,5,Mains gas not community,natural,0.0,False,0.0,2.63,,False,,0,0,,True,off-peak 7 hour,184.0,3.18,11929.814,9046.1,False,False,,,487.25763,143.84087,110.2875,688.2131,128.0785,199.8375,3.18,184.0,11929.814,9046.1,0.0,0.0,0.0,0.0,False +310,1534964,660448,569,Upper 110a Bournemouth Park Road,2022-02-22,False,35.0,"Solid brick, as built, no insulation",1,"Pitched, 100 mm loft insulation",3.0,(another dwelling below),,Fully double glazed,3,"Boiler and radiators, mains gas",4,Programmer and room thermostat,3,From main system,4,Low energy lighting in 80% of fixed outlets,5,Mains gas not community,natural,0.0,False,0.0,2.41,,False,,0,0,,True,Unknown,238.0,1.79,6821.7285,5382.4,False,False,,,272.55676,102.9448,52.930252,345.6198,128.0785,199.8375,1.79,238.0,6821.7285,5382.4,0.0,0.0,0.0,0.0,False +344,1534936,660690,569,"8, Merritt Road",2017-08-15,False,101.0,"Solid brick, as built, no insulation",1,"Pitched, no insulation",1.0,"Suspended, no insulation",,Fully double glazed,3,"Boiler and radiators, mains gas",4,"Programmer, room thermostat and trvs",4,From main system,4,No low energy lighting,1,Mains gas not community,natural,0.0,False,0.0,2.6,,False,,0,1,,True,Unknown,260.0,5.28,21257.838,17606.3,False,False,,,1074.1602,154.13814,194.25749,816.8532,128.0785,199.8375,5.28,260.0,21257.838,17606.3,0.0,0.0,0.0,0.0,False +460,1535385,660529,569,"3, Brickfield Cottages, Cherry Orchard Lane",2020-04-09,False,85.0,"Solid brick, as built, no insulation",2,"Pitched, 200 mm loft insulation",4.0,"Suspended, no insulation",,Fully double glazed,3,Electric storage heaters,3,Manual charge control,2,"Electric immersion, off-peak",3,Low energy lighting in 58% of fixed outlets,4,Electricity not community,natural,0.0,False,0.0,2.45,,False,,0,1,,True,dual,577.0,8.91,18395.31,15230.1,False,False,,,3550.6333,666.58136,149.46556,726.9812,0.0,199.8375,8.91,577.0,18395.31,15230.1,0.0,0.0,0.0,0.0,False +485,1534784,660538,569,"99, Church Road, Harold Wood",2019-09-03,False,92.0,"Solid brick, as built, no insulation",1,"Pitched, no insulation",1.0,"Suspended, no insulation",,Fully double glazed,4,"Boiler and radiators, mains gas",4,Programmer and room thermostat,3,From main system,4,Low energy lighting in 80% of fixed outlets,5,Mains gas not community,natural,0.0,False,0.0,2.52,,False,,0,1,,True,Single,297.0,5.44,21073.996,17904.0,False,False,,,1092.4246,156.6427,109.16419,768.6077,128.0785,199.8375,5.44,297.0,21073.996,17904.0,0.0,0.0,0.0,0.0,False +494,1534814,660598,569,49b Flemming Crescent,2024-10-03,False,35.0,"Solid brick, as built, no insulation",1,(another dwelling above),,"Suspended, no insulation",,Fully double glazed,4,"Boiler and radiators, mains gas",4,Programmer and room thermostat,3,From main system,4,Low energy lighting in all fixed outlets,5,Mains gas not community,natural,0.0,False,0.0,2.42,,False,,0,0,,True,Single,261.0,1.89,7268.866,5865.4,False,False,,,304.39737,104.800545,43.0,345.6198,128.0785,199.8375,1.89,261.0,7268.866,5865.4,0.0,0.0,0.0,0.0,False +741,1534815,660599,569,"3a, Forest Avenue",2020-06-05,False,40.0,"Solid brick, as built, no insulation",1,"Pitched, no insulation",1.0,(another dwelling below),,Fully double glazed,3,"Boiler and radiators, mains gas",4,Programmer and room thermostat,3,From main system,4,Low energy lighting in 38% of fixed outlets,3,Mains gas not community,natural,0.0,False,0.0,2.58,,False,,0,0,,True,Unknown,396.0,3.12,11673.133,9974.6,False,False,,,587.73975,108.13529,85.62337,384.70035,128.0785,199.8375,3.12,396.0,11673.133,9974.6,0.0,0.0,0.0,0.0,False diff --git a/backend/export/tests/fixtures/recommendations_569.csv b/backend/export/tests/fixtures/recommendations_569.csv new file mode 100644 index 00000000..769643ea --- /dev/null +++ b/backend/export/tests/fixtures/recommendations_569.csv @@ -0,0 +1,14 @@ +Unnamed: 0,id,property_id,created_at,type,measure_type,description,estimated_cost,default,starting_u_value,new_u_value,sap_points,heat_demand,kwh_savings,co2_equivalent_savings,energy_savings,energy_cost_savings,property_valuation_increase,rental_yield_increase,total_work_hours,labour_days,already_installed,plan_name +49705,24798968,660478,2026-02-19 16:14:45.560816,heating,time_temperature_zone_control,"Upgrade heating controls to Smart Thermostats, room sensors and smart radiator valves (time & temperature zone control)",874.568,True,,,1.5,14.9,1041.2,0.2,14.9,72.639015,,,4.16,1.0,False,whatever +49709,24798972,660478,2026-02-19 16:14:45.560816,solar_pv,solar_pv,"8 panel system, 400W solar panels, 5.8kw Growatt battery - 3.2 kWp system",6110.0,True,,,17.0,79.1,2235.5623,0.5186504,79.1,619.02716,,,48.0,2.0,False,whatever +51133,24800396,660538,2026-02-19 16:14:48.517937,solar_pv,solar_pv,"10 panel system, 400W solar panels - 4.0 kWp system",5520.0,True,,,13.0,58.5,1883.4657,0.43696404,58.5,521.5316,,,48.0,2.0,False,whatever +52686,24801949,660529,2026-02-19 16:14:52.052740,heating,boiler_upgrade,"Upgrade to a new condensing boiler. Upgrade heating controls to Room thermostat, programmer and TRVs",8008.6,True,,,12.9,132.9,0.0,1.1556525,132.9,1806.0955,,,26.5,4.0,False,whatever +52707,24801970,660529,2026-02-19 16:14:52.052740,solar_pv,solar_pv,"10 panel system, 400W solar panels - 4.0 kWp system",5520.0,True,,,16.1,68.8,1837.0155,0.4261876,68.8,508.6696,,,48.0,2.0,False,whatever +55310,24804573,660688,2026-02-19 16:15:04.461456,solar_pv,solar_pv,"5 panel system, 400W solar panels - 2.0 kWp system",5100.0,True,,,9.0,41.4,896.6345,0.20801921,41.4,248.27809,,,48.0,2.0,False,whatever +55380,24804643,660690,2026-02-19 16:15:04.461456,low_energy_lighting,low_energy_lighting,Install low energy lighting in 14 outlets,49.0,True,,,2.0,18.2,766.5,0.124173,18.2,212.24385,,,1.0,0.125,False,whatever +55384,24804647,660690,2026-02-19 16:15:04.461456,solar_pv,solar_pv,"9 panel system, 400W solar panels - 3.6 kWp system",5420.0,True,,,10.0,43.9,1760.723,0.40848774,43.9,487.54422,,,48.0,2.0,False,whatever +62983,24812246,660599,2026-02-19 16:18:57.606337,loft_insulation,loft_insulation,Install 300mm of Knauf Loft Roll 44 glass fibre roll in your loft,600.0,True,2.3,2.3,8.4,102.8,3178.2,0.9,102.8,221.72618,,,8.0,1.0,False,whatever +62985,24812248,660599,2026-02-19 16:18:57.606337,low_energy_lighting,low_energy_lighting,Install low energy lighting in 4 outlets,14.0,True,,,1.0,14.2,219.0,0.0,14.2,60.6411,,,1.0,0.125,False,whatever +62987,24812250,660599,2026-02-19 16:18:57.606337,heating,time_temperature_zone_control,"Upgrade heating controls to Smart Thermostats, room sensors and smart radiator valves (time & temperature zone control)",604.584,True,,,3.3,18.4,527.7,0.2,18.4,36.814835,,,3.08,1.0,False,whatever +62989,24812252,660600,2026-02-19 16:18:57.606337,suspended_floor_insulation,suspended_floor_insulation,Install 75mm Q-bot underfloor insulation insulation in suspended floor,3281.25,True,0.87,0.22,4.0,99.2,1816.6,0.6,99.2,126.734566,,,57.05,2.3770833,False,whatever +62992,24812255,660600,2026-02-19 16:18:57.606337,heating,time_temperature_zone_control,"Upgrade heating controls to Smart Thermostats, room sensors and smart radiator valves (time & temperature zone control)",604.584,True,,,1.9,17.7,650.1,0.2,17.7,45.354034,,,3.08,1.0,False,whatever diff --git a/backend/export/tests/test_export.py b/backend/export/tests/test_export.py new file mode 100644 index 00000000..823882b5 --- /dev/null +++ b/backend/export/tests/test_export.py @@ -0,0 +1,540 @@ +import pandas as pd +import numpy as np +from pathlib import Path +import time + +from backend.export.property_scenarios.main import process_export +from backend.export.property_scenarios.input_schema import ExportRequest +from backend.app.db.models.portfolio import PropertyModel, Epc, Portfolio, PortfolioStatus, PortfolioGoal, \ + PropertyCreationStatus, PropertyDetailsEpcModel +from backend.app.db.models.recommendations import PlanModel, Recommendation, PlanRecommendations, \ + RecommendationMaterials +from backend.app.db.models.materials import Material +from utils.logger import setup_logger + +FIXTURE_PATH = Path("backend/export/tests/fixtures") +logger = setup_logger() + + +def load_csv(name: str) -> pd.DataFrame: + df = pd.read_csv(FIXTURE_PATH / name) + df = df.replace({np.nan: None}) + return df + + +def test_default_export_integration(db_session): + # ---------------------------------------- + # 1) Load csvs + # ---------------------------------------- + t0 = time.perf_counter() + portfolio_df = load_csv("portfolio_569.csv") + properties_df = load_csv("properties_569.csv") + property_details_epc_df = load_csv("property_details_epc_569.csv") + plans_df = load_csv("plans_569.csv") + plan_recs_df = load_csv("plan_recs_569.csv") + recommendations_df = load_csv("recommendations_569.csv") + + logger.info( + "Loaded CSVs in %.2f seconds | properties=%s plans=%s recs=%s", + time.perf_counter() - t0, + len(properties_df), + len(plans_df), + len(recommendations_df), + ) + + logger.info("Starting database load") + db_load_t0 = time.perf_counter() + + # ---------------------------------------- + # 2) Insert test portfolio + # ---------------------------------------- + + portfolios = [] + for row in portfolio_df.itertuples(index=False): + portfolios.append( + Portfolio( + id=row.id, + name=row.name, + status=PortfolioStatus[row.status.split(".")[-1]], + goal=PortfolioGoal[row.goal.split(".")[-1]] if row.goal else None, + ) + ) + + db_session.bulk_save_objects(portfolios) + db_session.flush() + # ---------------------------------------- + # 3) Insert test property + # ---------------------------------------- + + properties = [] + + for row in properties_df.itertuples(index=False): + row_dict = row._asdict() + + row_dict["uprn"] = int(row_dict["uprn"]) if row_dict.get("uprn") else None + row_dict["building_reference_number"] = ( + int(row_dict["building_reference_number"]) + if row_dict.get("building_reference_number") + else None + ) + + prop = PropertyModel(**{ + col: row_dict[col] + for col in PropertyModel.__table__.columns.keys() + if col in row_dict + }) + + prop.creation_status = PropertyCreationStatus[ + row_dict["creation_status"].split(".")[-1] + ] + prop.status = PortfolioStatus[row_dict["status"].split(".")[-1]] + + if row_dict.get("current_epc_rating"): + prop.current_epc_rating = Epc[ + row_dict["current_epc_rating"].split(".")[-1] + ] + + properties.append(prop) + + db_session.bulk_save_objects(properties) + db_session.flush() + + # ---------------------------------------- + # 4) Insert property details - EPC + # ---------------------------------------- + + epc_rows = [] + + for row in property_details_epc_df.itertuples(index=False): + row_dict = row._asdict() + + # Build only fields that exist on the model + epc_data = { + col.name: row_dict[col.name] + for col in PropertyDetailsEpcModel.__table__.columns.values() + if col.name in row_dict and col.name not in ["id", "property_id", "portfolio_id"] + } + + epc = PropertyDetailsEpcModel( + property_id=row.property_id, + portfolio_id=row.portfolio_id, + **epc_data, + ) + + epc_rows.append(epc) + + db_session.bulk_save_objects(epc_rows) + db_session.flush() + + # ---------------------------------------- + # 4) Insert default plan + # ---------------------------------------- + + plans = [] + + for row in plans_df.itertuples(index=False): + row_dict = row._asdict() + + if row_dict.get("post_epc_rating"): + row_dict["post_epc_rating"] = Epc[ + row_dict["post_epc_rating"].split(".")[-1] + ] + + row_dict["scenario_id"] = None + + plan = PlanModel(**{ + col: row_dict[col] + for col in PlanModel.__table__.columns.keys() + if col in row_dict + }) + + plans.append(plan) + + db_session.bulk_save_objects(plans) + db_session.flush() + + # ---------------------------------------- + # 5) Insert recommendation + # ---------------------------------------- + + recs = [ + Recommendation(**{ + col: row[col] + for col in Recommendation.__table__.columns.keys() + if col in row + }) + for _, row in recommendations_df.iterrows() + ] + + db_session.bulk_save_objects(recs) + db_session.flush() + + # ---------------------------------------- + # 6) Insert PlanRecommendations + # ---------------------------------------- + links = [ + PlanRecommendations( + plan_id=row.plan_id, + recommendation_id=row.recommendation_id, + ) + for row in plan_recs_df.itertuples(index=False) + ] + + db_session.bulk_save_objects(links) + db_session.commit() + logger.info("Inserted all data in %.2f seconds", time.perf_counter() - db_load_t0) + + # ---------------------------------------- + # 6) Build payload + # ---------------------------------------- + + body_dict = { + "task_id": "test", + "subtask_id": "test", + "portfolio_id": 569, + "scenario_ids": [], + "default_plans_only": True, + } + + payload = ExportRequest.model_validate(body_dict) + + # ---------------------------------------- + # 7) Call process_export + # ---------------------------------------- + + logger.info( + "Recommendation count in DB: %s", + db_session.query(Recommendation).count() + ) + + logger.info( + "Property count in DB: %s", + db_session.query(PropertyModel).count() + ) + + logger.info( + "Property EPC in DB: %s", + db_session.query(PropertyDetailsEpcModel).count() + ) + + logger.info( + "Plan count in DB: %s", + db_session.query(PlanModel).count() + ) + + logger.info( + "PlanRecommendatons count in DB: %s", + db_session.query(PlanModel).count() + ) + + logger.info("Starting process_export") + process_t0 = time.perf_counter() + + result = process_export(payload, session=db_session) + + logger.info("process_export finished in %.2f seconds", time.perf_counter() - process_t0) + + # ---------------------------------------- + # 8) Assertions + # ---------------------------------------- + + assert "default_plans" in result, "Expected 'default_plans' in export result, got {}".format(result.keys()) + + df = result["default_plans"] + + assert df.shape[0] == 10, "Expected 10 properties in the export, got {}".format(df.shape[0]) + + failed = df[df["predicted_post_works_sap"] < 69] + failed_property_types = failed["property_type"].value_counts().to_dict() + assert failed_property_types["Flat"] == 2 + # Check the houses + + assert failed.shape[0] + + assert df["total_retrofit_cost"].sum() == 41706.585999999996, ( + "Expected total retrofit cost to be 10000, got {}".format(df["total_retrofit_cost"].sum()) + ) + + assert df["predicted_post_works_sap"].sum() == 698.1, ( + "Expected total predicted post works SAP to be 698.1, got {}".format(df["predicted_post_works_sap"].sum()) + ) + + assert df["sap_points"].sum() == 100.10000000000001, ( + "Expected total SAP points increase to be 100.10000000000001, got {}".format(df["sap_points"].sum()) + ) + + assert df.shape == (10, 95), "Expected dataframe shape to be (10, 11), got {}".format(df.shape) + + +def test_solar_with_battery_example(db_session): + test_portfolio_id = 1 + test_property_id = 1 + + portfolio_df = pd.DataFrame( + [{'id': test_portfolio_id, 'name': 'Example', 'budget': None, + 'status': 'PortfolioStatus.SCOPING', 'goal': 'PortfolioGoal.NONE', 'cost': None, 'number_of_properties': None, + 'co2_equivalent_savings': None, 'energy_savings': None, 'energy_cost_savings': None, + 'property_valuation_increase': None, 'rental_yield_increase': None, 'total_work_hours': None, + 'labour_days': None, 'created_at': '2026-02-12 21:23:37.862000+00:00', + 'updated_at': '2026-02-12 21:23:37.862000+00:00', 'epc_breakdown_pre_retrofit': None, + 'epc_breakdown_post_retrofit': None, 'n_units_to_retrofit': None, 'co2_per_unit_pre_retrofit': None, + 'co2_per_unit_post_retrofit': None, 'energy_bill_per_unit_pre_retrofit': None, + 'energy_bill_per_unit_post_retrofit': None, 'energy_consumption_per_unit_pre_retrofit': None, + 'energy_consumption_per_unit_post_retrofit': None, 'valuation_improvement_per_unit': None, + 'cost_per_unit': None, 'cost_per_co2_saved': None, 'cost_per_sap_point': None, + 'valuation_return_on_investment': None}] + ) + + properties_df = pd.DataFrame( + [{'id': test_property_id, 'portfolio_id': test_portfolio_id, 'creation_status': 'PropertyCreationStatus.READY', + 'uprn': 100090438731, 'landlord_property_id': 'BARR052', 'building_reference_number': 3460742868.0, + 'status': 'PortfolioStatus.ASSESSMENT', 'address': '52, Barrack Street', 'postcode': 'CO1 2LR', + 'has_pre_condition_report': True, 'has_recommendations': True, 'created_at': '2026-02-12 21:59:02.744427', + 'updated_at': '2026-02-19 16:18:57.941443', 'property_type': 'House', 'built_form': 'End-Terrace', + 'local_authority': 'Colchester', 'constituency': 'Colchester', 'number_of_rooms': 4.0, 'year_built': 1900.0, + 'tenure': 'rental (private)', 'current_epc_rating': 'Epc.E', 'current_sap_points': 53.0, + 'current_valuation': 0.0, 'installed_measures_sap_point_adjustment': 0.0, + 'is_sap_points_adjusted_for_installed_measures': False, 'original_sap_points': 53.0}] + ) + + property_details_epc_df = pd.DataFrame( + [ + {'id': 1534934, 'property_id': test_property_id, 'portfolio_id': test_portfolio_id, + 'full_address': '48, Medcalf Road', 'lodgement_date': '2018-09-05', 'is_expired': False, + 'total_floor_area': 68.0, 'walls': 'Solid brick, as built, no insulation', 'walls_rating': 1, + 'roof': 'Pitched, no insulation', 'roof_rating': 1.0, 'floor': 'Solid, no insulation', + 'floor_rating': None, + 'windows': 'Fully double glazed', 'windows_rating': 4, 'heating': 'Boiler and radiators, mains gas', + 'heating_rating': 4, 'heating_controls': 'Programmer, room thermostat and trvs', + 'heating_controls_rating': 4, + 'hot_water': 'From main system', 'hot_water_rating': 4, + 'lighting': 'Low energy lighting in all fixed outlets', 'lighting_rating': 5, + 'mainfuel': 'Mains gas not community', 'ventilation': 'natural', 'solar_pv': 0.0, 'solar_hot_water': False, + 'wind_turbine': 0.0, 'floor_height': 2.55, 'number_heated_rooms': None, 'heat_loss_corridor': False, + 'unheated_corridor_length': None, 'number_of_open_fireplaces': 0, 'number_of_extensions': 0, + 'number_of_storeys': None, 'mains_gas': True, 'energy_tariff': 'Single', + 'primary_energy_consumption': 278.0, + 'co2_emissions': 3.81, 'current_energy_demand': 14643.366, + 'current_energy_demand_heating_hotwater': 12185.6, + 'estimated': False, 'sap_05_overwritten': False, 'sap_05_score': None, 'sap_05_epc_rating': None, + 'heating_cost_current': 711.0628, 'hot_water_cost_current': 139.06198, 'lighting_cost_current': 70.770935, + 'appliances_cost_current': 609.7844, 'gas_standing_charge': 128.0785, + 'electricity_standing_charge': 199.8375, + 'original_co2_emissions': 3.81, 'original_primary_energy_consumption': 278.0, + 'original_current_energy_demand': 14643.366, 'original_current_energy_demand_heating_hotwater': 12185.6, + 'installed_measures_co2_adjustment': 0.0, 'installed_measures_energy_demand_adjustment': 0.0, + 'installed_measures_total_energy_bill_adjustment': 0.0, 'installed_measures_heat_demand_adjustment': 0.0, + 'is_epc_adjusted_for_installed_measures': False} + ] + ) + + plans_df = pd.DataFrame( + [ + {'id': 0, 'name': None, 'portfolio_id': test_portfolio_id, 'property_id': test_property_id, + 'scenario_id': 1060, 'created_at': '2026-02-19 16:14:45.560816', 'is_default': True, + 'valuation_increase_lower_bound': 0.0302, + 'valuation_increase_upper_bound': 0.07, 'valuation_increase_average': 0.048226666, 'plan_type': None, + 'post_sap_points': 71.5, 'post_epc_rating': 'Epc.C', 'post_co2_emissions': 4.1813498, + 'co2_savings': 0.71865046, 'post_energy_bill': 1447.5204, 'energy_bill_savings': 691.6662, + 'post_energy_consumption': 15303.688, 'energy_consumption_savings': 3276.7622, + 'valuation_post_retrofit': None, 'valuation_increase': None, 'cost_of_works': 6984.568, + 'contingency_cost': 1003.9568} + ] + ) + + plan_recs_df = pd.DataFrame( + [{'id': 0, 'plan_id': 0, 'recommendation_id': 0}] + ) + + recommendations_df = pd.DataFrame( + [{'id': 0, 'property_id': test_property_id, 'created_at': '2026-02-19 16:14:45.560816', + 'type': 'solar_pv', 'measure_type': 'solar_pv', + 'description': 'Fit solar', + 'estimated_cost': 10000, 'default': True, 'starting_u_value': None, 'new_u_value': None, 'sap_points': 1.5, + 'heat_demand': 14.9, 'kwh_savings': 1041.2, 'co2_equivalent_savings': 0.2, 'energy_savings': 14.9, + 'energy_cost_savings': 72.639015, 'property_valuation_increase': None, 'rental_yield_increase': None, + 'total_work_hours': 4.16, 'labour_days': 1.0, 'already_installed': False, 'plan_name': 'whatever'} + ] + ) + + recommendations_materials_df = pd.DataFrame( + [ + { + "id": 0, "recommendation_id": 0, "material_id": 0, "depth": None, "quantity": 1.0, + "quantity_unit": "part", + "estimated_cost": 10000, "created_at": '2026-02-19 16:14:45.560816', + "updated_at": '2026-02-19 16:14:45.560816', + } + ] + ) + + materials_df = pd.DataFrame( + [ + {'id': 0, 'type': 'solar_pv', 'description': 'Some solar product', + 'depth': 75.0, + 'depth_unit': 'mm', 'cost': None, 'cost_unit': 'gbp_per_m2', 'r_value_per_mm': 0.030303031, + 'r_value_unit': 'square_meter_kelvin_per_watt', 'thermal_conductivity': 0.033, + 'thermal_conductivity_unit': 'watt_per_meter_kelvin', 'link': 'Test', + 'created_at': "'2026-02-19 16:14:45.560816", 'is_active': True, + 'prime_material_cost': None, + 'material_cost': 0.0, 'labour_cost': 0.0, 'labour_hours_per_unit': 0.0, 'plant_cost': 0.0, + 'total_cost': 10000, + 'notes': None, 'is_installer_quote': True, 'innovation_rate': 0.25, 'size': None, 'size_unit': None, + 'includes_scaffolding': True, 'includes_battery': True, 'battery_size': 5.8} + ] + ) + + # Load into db + # ------------------------------------------------- + # Insert Portfolio + # ------------------------------------------------- + for row in portfolio_df.itertuples(index=False): + db_session.add( + Portfolio( + id=row.id, + name=row.name, + status=PortfolioStatus[row.status.split(".")[-1]], + goal=PortfolioGoal[row.goal.split(".")[-1]], + ) + ) + db_session.flush() + + # ------------------------------------------------- + # Insert Property + # ------------------------------------------------- + for row in properties_df.itertuples(index=False): + prop = PropertyModel( + id=row.id, + portfolio_id=row.portfolio_id, + creation_status=PropertyCreationStatus[row.creation_status.split(".")[-1]], + status=PortfolioStatus[row.status.split(".")[-1]], + uprn=row.uprn, + property_type=row.property_type, + current_sap_points=row.current_sap_points, + current_epc_rating=Epc[row.current_epc_rating.split(".")[-1]], + ) + db_session.add(prop) + db_session.flush() + + # ------------------------------------------------- + # Insert EPC Details + # ------------------------------------------------- + for row in property_details_epc_df.itertuples(index=False): + epc = PropertyDetailsEpcModel( + property_id=row.property_id, + portfolio_id=row.portfolio_id, + full_address=row.full_address, + total_floor_area=row.total_floor_area, + walls=row.walls, + roof=row.roof, + windows=row.windows, + heating=row.heating, + solar_pv=row.solar_pv, + ) + db_session.add(epc) + db_session.flush() + + # ------------------------------------------------- + # Insert Plan (default) + # ------------------------------------------------- + for row in plans_df.itertuples(index=False): + plan = PlanModel( + id=row.id, + portfolio_id=row.portfolio_id, + property_id=row.property_id, + scenario_id=None, # default mode + is_default=row.is_default, + ) + db_session.add(plan) + db_session.flush() + + # ------------------------------------------------- + # IMPORTANT: Force recommendation to be solar_pv + # ------------------------------------------------- + recommendations_df.loc[0, "measure_type"] = "solar_pv" + + for row in recommendations_df.itertuples(index=False): + rec = Recommendation( + id=row.id, + property_id=row.property_id, + measure_type=row.measure_type, + estimated_cost=row.estimated_cost, + default=row.default, + already_installed=row.already_installed, + sap_points=row.sap_points, + type=row.type, + description=row.description + ) + db_session.add(rec) + db_session.flush() + + # ------------------------------------------------- + # Link Plan -> Recommendation + # ------------------------------------------------- + for row in plan_recs_df.itertuples(index=False): + db_session.add( + PlanRecommendations( + plan_id=row.plan_id, + recommendation_id=row.recommendation_id, + ) + ) + db_session.flush() + + # ------------------------------------------------- + # Insert Material (includes_battery=True) + # ------------------------------------------------- + for row in materials_df.itertuples(index=False): + material = Material( + id=row.id, + type=row.type, + description=row.description, + depth_unit=row.depth_unit, + cost_unit=row.cost_unit, + r_value_unit=row.r_value_unit, + thermal_conductivity_unit=row.thermal_conductivity_unit, + includes_battery=row.includes_battery, + is_active=row.is_active, + ) + db_session.add(material) + db_session.flush() + + # ------------------------------------------------- + # Link Recommendation -> Material + # ------------------------------------------------- + for row in recommendations_materials_df.itertuples(index=False): + db_session.add( + RecommendationMaterials( + recommendation_id=row.recommendation_id, + material_id=row.material_id, + depth=row.depth or 0.0, + quantity=row.quantity, + quantity_unit=row.quantity_unit, + estimated_cost=row.estimated_cost, + ) + ) + + db_session.commit() + + payload = ExportRequest.model_validate({ + "task_id": "test", + "subtask_id": "test", + "portfolio_id": test_portfolio_id, + "scenario_ids": [], + "default_plans_only": True, + }) + + result = process_export(payload, session=db_session) + + assert "default_plans" in result + + df = result["default_plans"] + + assert "solar_pv_with_battery" in df.columns + + # solar_pv should NOT exist + assert "solar_pv" not in df.columns + + assert df.shape[0] == 1, "Expected 1 property in the export, got {}".format(df.shape[0]) + + # Cost should land in correct column + assert df["solar_pv_with_battery"].iloc[0] == 10000 diff --git a/backend/postcode_splitter/handler/Dockerfile b/backend/postcode_splitter/handler/Dockerfile index 7c1a7989..74c00b9f 100644 --- a/backend/postcode_splitter/handler/Dockerfile +++ b/backend/postcode_splitter/handler/Dockerfile @@ -1,9 +1,28 @@ -FROM public.ecr.aws/lambda/python:3.10 +FROM public.ecr.aws/lambda/python:3.11 + +ARG DEV_DB_HOST +ARG DEV_DB_PORT +ARG DEV_DB_NAME + +ENV DB_HOST=${DEV_DB_HOST} +ENV DB_PORT=${DEV_DB_PORT} +ENV DB_NAME=${DEV_DB_NAME} # Set working directory (Lambda task root) WORKDIR /var/task -# ----------------------------- +COPY backend/postcode_splitter/handler/requirements.txt . + +RUN pip install --no-cache-dir -r requirements.txt + +# Copy necessary files for database and utility imports +COPY utils/ utils/ +COPY backend/ backend/ +COPY datatypes/ datatypes/ + +# Copy the handler +COPY backend/postcode_splitter/main.py . + # Lambda handler -# ----------------------------- CMD ["main.handler"] + diff --git a/backend/postcode_splitter/handler/requirements.txt b/backend/postcode_splitter/handler/requirements.txt index e69de29b..6ef41b2d 100644 --- a/backend/postcode_splitter/handler/requirements.txt +++ b/backend/postcode_splitter/handler/requirements.txt @@ -0,0 +1,11 @@ +pandas==2.2.2 +numpy<2.0 +requests +tqdm +openpyxl +epc-api-python==1.0.2 +boto3==1.35.44 +sqlmodel +sqlalchemy==2.0.36 +psycopg2-binary==2.9.10 +pydantic-settings==2.6.0 \ No newline at end of file diff --git a/backend/postcode_splitter/main.py b/backend/postcode_splitter/main.py index d55f618a..4f63ed4b 100644 --- a/backend/postcode_splitter/main.py +++ b/backend/postcode_splitter/main.py @@ -1,127 +1,278 @@ +import os +import sys +import json import pandas as pd import requests -from backend.address2UPRN.main import ( - resolve_uprns_for_postcode_group, - get_epc_data_with_postcode, +import boto3 +from uuid import UUID, uuid4 +from utils.s3 import ( + read_csv_from_s3 as read_csv_from_s3_dict, + save_csv_to_s3, + parse_s3_uri, ) +from utils.logger import setup_logger from tqdm import tqdm +from backend.app.db.functions.tasks.Tasks import SubTaskInterface +from datetime import datetime + +logger = setup_logger() -def sanitise_postcode(postcode: str) -> str | None: +def upload_batch_to_s3( + batch_df: pd.DataFrame, task_id: str, sub_task_id: str, bucket_name: str = None +) -> str: """ - Normalise postcode for grouping. - - - Uppercase - - Remove all whitespace + Upload batch DataFrame to S3 as CSV. """ - if pd.isna(postcode): - return None + if bucket_name is None: + bucket_name = os.getenv("S3_BUCKET_NAME") - return postcode.upper().replace(" ", "") - - -def is_valid_postcode(postcode_clean: str) -> bool: - """ - Validate postcode using postcodes.io. - - Expects a sanitised postcode (e.g. E84SQ). - Returns True if valid, False otherwise. - """ - POSTCODES_IO_VALIDATE_URL = "https://api.postcodes.io/postcodes/{postcode}/validate" - if not postcode_clean: - return False + if not bucket_name: + logger.error( + "S3 bucket name not provided and S3_BUCKET_NAME environment variable not set" + ) + raise ValueError("S3_BUCKET_NAME not configured") try: - resp = requests.get( - POSTCODES_IO_VALIDATE_URL.format(postcode=postcode_clean), - timeout=5, + file_name = f"{datetime.now().isoformat()}_{str(uuid4())[:8]}" + file_key = ( + f"ara_postcode_splitter_batches/{task_id}/{sub_task_id}/{file_name}.csv" ) - resp.raise_for_status() - return resp.json().get("result", False) - except requests.RequestException: - # Network issues, rate limits, etc. - return False + + success = save_csv_to_s3(batch_df, bucket_name, file_key) + + if success: + s3_uri = f"s3://{bucket_name}/{file_key}" + logger.info(f"Successfully uploaded batch to {s3_uri}") + return s3_uri + else: + logger.error(f"Failed to upload batch to S3") + raise ValueError("Failed to save CSV to S3") + + except Exception as e: + logger.error(f"Error uploading batch to S3: {str(e)}") + raise -def main(): - df = pd.read_excel("hackney.xlsx", sheet_name="Sustainability") - df = df.head(500) +def send_to_address2uprn_queue(task_id: str, sub_task_id: str, s3_uri: str) -> str: + """ + Send a batch to the address2UPRN SQS queue with S3 reference. - # Sanitise postcodes - df["postcode_clean"] = df["Postcode"].apply(sanitise_postcode) + Args: + task_id: The parent task ID + sub_task_id: The new subtask ID for this batch + s3_uri: S3 URI pointing to the batch CSV file - # --- validate AFTER grouping (save API calls) --- + Returns: + Message ID from SQS + """ + sqs_client = boto3.client("sqs") + queue_url = os.getenv("ADDRESS2UPRN_QUEUE_URL") - # Get unique, non-null postcodes - unique_postcodes = df["postcode_clean"].dropna().unique() + if not queue_url: + raise ValueError("ADDRESS2UPRN_QUEUE_URL environment variable not set") - # Validate each postcode once, TODOadd a progress bar - postcode_validity = { - pc: is_valid_postcode(pc) - for pc in tqdm(unique_postcodes, total=len(unique_postcodes)) + message_body = { + "task_id": task_id, + "sub_task_id": sub_task_id, + "s3_uri": s3_uri, } - # Map validity back onto dataframe - df["postcode_valid"] = df["postcode_clean"].map(postcode_validity) + response = sqs_client.send_message( + QueueUrl=queue_url, + MessageBody=json.dumps(message_body), + ) + logger.info( + f"Sent message to address2UPRN queue. " + f"Task: {task_id}, SubTask: {sub_task_id}, MessageId: {response['MessageId']}" + ) + + return response["MessageId"] + + +def create_batch_and_send_to_address2uprn( + batch_df: pd.DataFrame, + task_id: str, + sub_task_id: str, + subtask_interface: SubTaskInterface, + bucket_name: str, +) -> str: + """ + Create a batch DataFrame, upload to S3, create subtask, and send to address2UPRN queue. + + """ + # Upload batch to S3 + + s3_uri = upload_batch_to_s3(batch_df, str(task_id), str(sub_task_id), bucket_name) + + # Create a new subtask for this batch with all inputs + created_batch_sub_task_id = subtask_interface.create_subtask( + task_id=task_id, + inputs={ + "task_id": str(task_id), + "s3_uri": s3_uri, + }, + ) + + logger.info(f"Created batch subtask {created_batch_sub_task_id}") + + # Send message with S3 reference + send_to_address2uprn_queue( + task_id=str(task_id), + sub_task_id=str(created_batch_sub_task_id), + s3_uri=s3_uri, + ) + + return created_batch_sub_task_id + + +def handler(event, context, local=False): + print(f"Function: {context.function_name}") + print(f"Request ID: {context.aws_request_id}") + + # Example SQS message for testing (copy and paste into SQS): + if local is True: + event = { + "Records": [ + { + "body": json.dumps( + { + "task_id": "e31f2f21-175b-4a91-a3ec-a6baa325e917", + "sub_task_id": "8673913b-1a88-42d7-8578-0449123d94b0", + "s3_uri": "s3://retrofit-data-dev/ara_raw_inputs/peabody/2025_11_11 - Peabody - Data Extracts for Domna_transformed.csv", + } + ) + } + ] + } + # Handle both single event and batch events (SQS, etc.) + records = event.get("Records", [event]) results = [] + errors = [] + subtask_interface = SubTaskInterface() + bucket_name = os.getenv("S3_BUCKET_NAME") + if local: + bucket_name = "retrofit-data-dev" - for postcode, group_df in tqdm( - df[df["postcode_valid"]].groupby("postcode_clean"), - desc="Resolving UPRNs by postcode", - ): - try: - epc_df = get_epc_data_with_postcode(postcode) + for record in records: + if local: + record = records[0] + task_id = None + subtask_id = None + # Parse body (inputs) - if epc_df.empty: - tmp = group_df.copy() - tmp["found_uprn"] = None - tmp["status"] = "no_epc_results" - results.append(tmp) - continue + if isinstance(record.get("body"), str): + body = json.loads(record["body"]) + else: + body = record.get("body", {}) - resolved = resolve_uprns_for_postcode_group( - group_df=group_df, - epc_df=epc_df, + # Validate required fields + task_id = body.get("task_id") + subtask_id = body.get("sub_task_id") + s3_uri = body.get("s3_uri") + + # Convert task_id to UUID + task_id = UUID(task_id) if isinstance(task_id, str) else task_id + subtask_id = UUID(subtask_id) if isinstance(subtask_id, str) else subtask_id + + # Mark subtask as in progress + subtask_interface.update_subtask_status(subtask_id, "in progress") + logger.info(f"Marked subtask {subtask_id} as in progress") + + # Read CSV from S3 + bucket, key = parse_s3_uri(s3_uri) + logger.info(f"S3 Bucket: {bucket}, Key: {key}") + + csv_data = read_csv_from_s3_dict(bucket, key) + df = pd.DataFrame(csv_data) + + logger.info(f"CSV loaded: {len(df)} rows, {len(df.columns)} columns") + + # Sanitise postcodes + df["postcode_clean"] = df["postcode"].str.upper().str.replace(" ", "") + + df = df.dropna(subset=["postcode_clean"]) + + batch_size = 500 + if df.shape[0] < batch_size: + create_batch_and_send_to_address2uprn( + batch_df=df, + task_id=task_id, + sub_task_id=subtask_id, + subtask_interface=subtask_interface, + bucket_name=bucket_name, ) + else: + postcode_to_addresses = { + postcode: group + for postcode, group in df.groupby("postcode_clean", sort=False) + } - results.append(resolved) + count = 0 + buffer = [] - except Exception as e: - tmp = group_df.copy() - tmp["found_uprn"] = None - tmp["status"] = "exception" - tmp["error"] = str(e) - results.append(tmp) + for postcode, group_df in postcode_to_addresses.items(): + group_len = len(group_df) - final_df = pd.concat(results, ignore_index=True) - a = final_df[ - [ - "best_match_lexiscore", - "Address 1", - "best_match_address", - "Postcode", - "UPRN", - "best_match_uprn", - ] - ] # add levi score to viewing - b = final_df[final_df["best_match_lexiscore"] > 0] # add levi score to viewing - b = b[ - [ - "best_match_lexiscore", - "Address 1", - "best_match_address", - "Postcode", - "UPRN", - "best_match_uprn", - ] - ] + # If single postcode is bigger than batch_size → send directly + if group_len >= batch_size: + if buffer: + create_batch_and_send_to_address2uprn( + batch_df=pd.concat(buffer, ignore_index=True), + task_id=task_id, + sub_task_id=subtask_id, + subtask_interface=subtask_interface, + bucket_name=bucket_name, + ) + buffer = [] + count = 0 + create_batch_and_send_to_address2uprn( + batch_df=group_df, + task_id=task_id, + sub_task_id=subtask_id, + subtask_interface=subtask_interface, + bucket_name=bucket_name, + ) + continue -def handler(event, context): - print("hello Postcode splitter world") - return {"statusCode": 200, "body": "hello world"} + # If adding would exceed batch → flush first + if count + group_len > batch_size: + create_batch_and_send_to_address2uprn( + batch_df=pd.concat(buffer, ignore_index=True), + task_id=task_id, + sub_task_id=subtask_id, + subtask_interface=subtask_interface, + bucket_name=bucket_name, + ) + buffer = [] + count = 0 + # Add group + buffer.append(group_df) + count += group_len -if __name__ == "__main__": - main() + # Final flush + if buffer: + create_batch_and_send_to_address2uprn( + batch_df=pd.concat(buffer, ignore_index=True), + task_id=task_id, + sub_task_id=subtask_id, + subtask_interface=subtask_interface, + bucket_name=bucket_name, + ) + + # Mark subtask as completed + subtask_interface.update_subtask_status( + subtask_id, + "completed", + outputs={"rows_processed": "completed"}, + ) + + return { + "statusCode": 200, + "body": json.dumps( + {"processed": results, "errors": errors if errors else None} + ), + } diff --git a/etl/customers/l_and_g/ic_slides.py b/etl/customers/l_and_g/ic_slides.py index a5cb3511..de6edd49 100644 --- a/etl/customers/l_and_g/ic_slides.py +++ b/etl/customers/l_and_g/ic_slides.py @@ -41,7 +41,10 @@ epc_data = pd.read_csv( # Classify floor area in <73m2, 73-98, 99-200, 200+ epc_data["floor_area_bracket"] = epc_data["total_floor_area"].apply( - lambda x: "<73" if x < 73 else "73-98" if x < 99 else "99-200" if x < 200 else "200+") + lambda x: ( + "<73" if x < 73 else "73-98" if x < 99 else "99-200" if x < 200 else "200+" + ) +) # 73-98 185 # <73 156 @@ -65,7 +68,11 @@ import pandas as pd import numpy as np from sqlalchemy.orm import sessionmaker from backend.app.db.connection import db_engine -from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations +from backend.app.db.models.recommendations import ( + Recommendation, + PlanModel, + PlanRecommendations, +) from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel @@ -74,56 +81,79 @@ def get_data(portfolio_id, scenario_ids): session.begin() # Get properties and their details for a specific portfolio - properties_query = session.query( - PropertyModel, - PropertyDetailsEpcModel - ).join( - PropertyDetailsEpcModel, PropertyModel.id == PropertyDetailsEpcModel.property_id - ).filter( - PropertyModel.portfolio_id == portfolio_id # Filter by portfolio ID - ).all() + properties_query = ( + session.query(PropertyModel, PropertyDetailsEpcModel) + .join( + PropertyDetailsEpcModel, + PropertyModel.id == PropertyDetailsEpcModel.property_id, + ) + .filter(PropertyModel.portfolio_id == portfolio_id) # Filter by portfolio ID + .all() + ) # Transform properties data to include all fields dynamically properties_data = [ - {**{col.name: getattr(prop.PropertyModel, col.name) for col in PropertyModel.__table__.columns}, - **{col.name: getattr(prop.PropertyDetailsEpcModel, col.name) for col in - PropertyDetailsEpcModel.__table__.columns}} + { + **{ + col.name: getattr(prop.PropertyModel, col.name) + for col in PropertyModel.__table__.columns + }, + **{ + col.name: getattr(prop.PropertyDetailsEpcModel, col.name) + for col in PropertyDetailsEpcModel.__table__.columns + }, + } for prop in properties_query ] # Get property IDs from fetched properties # Get plans linked to the fetched properties - plans_query = session.query(Plan).filter(Plan.scenario_id.in_(scenario_ids)).all() + plans_query = ( + session.query(PlanModel).filter(PlanModel.scenario_id.in_(scenario_ids)).all() + ) # Transform plans data to include all fields dynamically plans_data = [ - {col.name: getattr(plan, col.name) for col in Plan.__table__.columns} + {col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns} for plan in plans_query ] # Extract plan IDs for filtering recommendations through PlanRecommendations - plan_ids = [plan['id'] for plan in plans_data] + plan_ids = [plan["id"] for plan in plans_data] # Get recommendations through PlanRecommendations for those plans and that are default - recommendations_query = session.query( - Recommendation, - Plan.scenario_id - ).join( - PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id - ).join( - Plan, Plan.id == PlanRecommendations.plan_id # Join with Plan to access scenario_id - ).filter( - PlanRecommendations.plan_id.in_(plan_ids), - Recommendation.default == True # Filtering for default recommendations - ).all() + recommendations_query = ( + session.query(Recommendation, PlanModel.scenario_id) + .join( + PlanRecommendations, + Recommendation.id == PlanRecommendations.recommendation_id, + ) + .join( + PlanModel, + PlanModel.id + == PlanRecommendations.plan_id, # Join with Plan to access scenario_id + ) + .filter( + PlanRecommendations.plan_id.in_(plan_ids), + Recommendation.default == True, # Filtering for default recommendations + ) + .all() + ) # Transform recommendations data to include all fields dynamically and include scenario_id recommendations_data = [ - {**{col.name: getattr(rec.Recommendation, col.name) if hasattr(rec, 'Recommendation') else getattr(rec, - col.name) for - col in Recommendation.__table__.columns}, - "Scenario ID": rec.scenario_id} + { + **{ + col.name: ( + getattr(rec.Recommendation, col.name) + if hasattr(rec, "Recommendation") + else getattr(rec, col.name) + ) + for col in Recommendation.__table__.columns + }, + "Scenario ID": rec.scenario_id, + } for rec in recommendations_query ] @@ -132,7 +162,9 @@ def get_data(portfolio_id, scenario_ids): return properties_data, plans_data, recommendations_data -properties_data, plans_data, recommendations_data = get_data(portfolio_id=124, scenario_ids=[205]) +properties_data, plans_data, recommendations_data = get_data( + portfolio_id=124, scenario_ids=[205] +) properties_df = pd.DataFrame(properties_data) plans_df = pd.DataFrame(plans_data) @@ -147,12 +179,12 @@ recommended_measures_df = recommended_measures_df.drop(columns=["default"]) post_install_sap = recommendations_df[["property_id", "default", "sap_points"]] post_install_sap = post_install_sap[post_install_sap["default"]] # Sum up the sap points by property id -post_install_sap = post_install_sap.groupby("property_id")[["sap_points"]].sum().reset_index() +post_install_sap = ( + post_install_sap.groupby("property_id")[["sap_points"]].sum().reset_index() +) recommendations_measures_pivot = recommended_measures_df.pivot( - index='property_id', - columns='measure_type', - values='estimated_cost' + index="property_id", columns="measure_type", values="estimated_cost" ) recommendations_measures_pivot = recommendations_measures_pivot.reset_index() @@ -163,7 +195,7 @@ recommendations_measures_pivot = recommendations_measures_pivot.rename( "double_glazing": "Cost: Double Glazing", "loft_insulation": "Cost: Loft Insulation", "mechanical_ventilation": "Cost: Ventilation", - "solar_pv": "Cost: Solar PV" + "solar_pv": "Cost: Solar PV", } ) recommendations_measures_pivot = recommendations_measures_pivot.fillna(0) @@ -186,16 +218,26 @@ recommendations_measures_pivot["Recommendation: Solar PV"] = ( recommendations_measures_pivot["Cost: Solar PV"] > 0 ) -df = properties_df[ - [ - "property_id", "uprn", "address", "postcode", "property_type", "walls", "roof", "heating", "windows", - "current_epc_rating", - "current_sap_points", "total_floor_area", "number_of_rooms", +df = ( + properties_df[ + [ + "property_id", + "uprn", + "address", + "postcode", + "property_type", + "walls", + "roof", + "heating", + "windows", + "current_epc_rating", + "current_sap_points", + "total_floor_area", + "number_of_rooms", + ] ] -].merge( - recommendations_measures_pivot, how="left", on="property_id" -).merge( - post_install_sap, how="left", on="property_id" + .merge(recommendations_measures_pivot, how="left", on="property_id") + .merge(post_install_sap, how="left", on="property_id") ) df = df.drop(columns=["property_id"]) @@ -222,25 +264,36 @@ df["Has Recommendations"] = ~pd.isnull(df["Cost: Air Source Heat Pump"]) # We fill missings: for col in [ - "Recommendation: Air Source Heat Pump", "Recommendation: Cavity Wall Insulation", - "Recommendation: Double Glazing", "Recommendation: Loft Insulation", "Recommendation: Ventilation", - "Recommendation: Solar PV" + "Recommendation: Air Source Heat Pump", + "Recommendation: Cavity Wall Insulation", + "Recommendation: Double Glazing", + "Recommendation: Loft Insulation", + "Recommendation: Ventilation", + "Recommendation: Solar PV", ]: df[col] = df[col].fillna(False) for col in [ - "Cost: Air Source Heat Pump", "Cost: Cavity Wall Insulation", - "Cost: Double Glazing", "Cost: Loft Insulation", "Cost: Ventilation", - "Cost: Solar PV" + "Cost: Air Source Heat Pump", + "Cost: Cavity Wall Insulation", + "Cost: Double Glazing", + "Cost: Loft Insulation", + "Cost: Ventilation", + "Cost: Solar PV", ]: df[col] = df[col].fillna(0) # Calculate post SAP df["Predicted Post Works SAP"] = df["Current SAP Points"] + df["sap_points"] df["Predicted Post Works SAP"] = df["Predicted Post Works SAP"].round() -df["Predicted Post Works EPC"] = df["Predicted Post Works SAP"].apply(lambda x: sap_to_epc(x)) +df["Predicted Post Works EPC"] = df["Predicted Post Works SAP"].apply( + lambda x: sap_to_epc(x) +) df["Recommendation: Air Source Heat Pump"].sum() df["Cost: Air Source Heat Pump"].sum() -df.to_csv("/Users/khalimconn-kowlessar/Documents/hestia/Customers/L&G/Basildon Data Export - 2.csv", index=False) +df.to_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/L&G/Basildon Data Export - 2.csv", + index=False, +) diff --git a/etl/customers/mod/pilot/2. Create Excel Model.py b/etl/customers/mod/pilot/2. Create Excel Model.py index 9a9eda86..810ab661 100644 --- a/etl/customers/mod/pilot/2. Create Excel Model.py +++ b/etl/customers/mod/pilot/2. Create Excel Model.py @@ -4,7 +4,11 @@ import numpy as np from backend.app.utils import sap_to_epc from sqlalchemy.orm import sessionmaker from backend.app.db.connection import db_engine -from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations +from backend.app.db.models.recommendations import ( + Recommendation, + PlanModel, + PlanRecommendations, +) from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel @@ -13,56 +17,79 @@ def get_data(portfolio_id, scenario_ids): session.begin() # Get properties and their details for a specific portfolio - properties_query = session.query( - PropertyModel, - PropertyDetailsEpcModel - ).join( - PropertyDetailsEpcModel, PropertyModel.id == PropertyDetailsEpcModel.property_id - ).filter( - PropertyModel.portfolio_id == portfolio_id # Filter by portfolio ID - ).all() + properties_query = ( + session.query(PropertyModel, PropertyDetailsEpcModel) + .join( + PropertyDetailsEpcModel, + PropertyModel.id == PropertyDetailsEpcModel.property_id, + ) + .filter(PropertyModel.portfolio_id == portfolio_id) # Filter by portfolio ID + .all() + ) # Transform properties data to include all fields dynamically properties_data = [ - {**{col.name: getattr(prop.PropertyModel, col.name) for col in PropertyModel.__table__.columns}, - **{col.name: getattr(prop.PropertyDetailsEpcModel, col.name) for col in - PropertyDetailsEpcModel.__table__.columns}} + { + **{ + col.name: getattr(prop.PropertyModel, col.name) + for col in PropertyModel.__table__.columns + }, + **{ + col.name: getattr(prop.PropertyDetailsEpcModel, col.name) + for col in PropertyDetailsEpcModel.__table__.columns + }, + } for prop in properties_query ] # Get property IDs from fetched properties # Get plans linked to the fetched properties - plans_query = session.query(Plan).filter(Plan.scenario_id.in_(scenario_ids)).all() + plans_query = ( + session.query(PlanModel).filter(PlanModel.scenario_id.in_(scenario_ids)).all() + ) # Transform plans data to include all fields dynamically plans_data = [ - {col.name: getattr(plan, col.name) for col in Plan.__table__.columns} + {col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns} for plan in plans_query ] # Extract plan IDs for filtering recommendations through PlanRecommendations - plan_ids = [plan['id'] for plan in plans_data] + plan_ids = [plan["id"] for plan in plans_data] # Get recommendations through PlanRecommendations for those plans and that are default - recommendations_query = session.query( - Recommendation, - Plan.scenario_id - ).join( - PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id - ).join( - Plan, Plan.id == PlanRecommendations.plan_id # Join with Plan to access scenario_id - ).filter( - PlanRecommendations.plan_id.in_(plan_ids), - Recommendation.default == True # Filtering for default recommendations - ).all() + recommendations_query = ( + session.query(Recommendation, PlanModel.scenario_id) + .join( + PlanRecommendations, + Recommendation.id == PlanRecommendations.recommendation_id, + ) + .join( + PlanModel, + PlanModel.id + == PlanRecommendations.plan_id, # Join with Plan to access scenario_id + ) + .filter( + PlanRecommendations.plan_id.in_(plan_ids), + Recommendation.default == True, # Filtering for default recommendations + ) + .all() + ) # Transform recommendations data to include all fields dynamically and include scenario_id recommendations_data = [ - {**{col.name: getattr(rec.Recommendation, col.name) if hasattr(rec, 'Recommendation') - else getattr(rec, col.name) for - col in Recommendation.__table__.columns}, - "Scenario ID": rec.scenario_id} + { + **{ + col.name: ( + getattr(rec.Recommendation, col.name) + if hasattr(rec, "Recommendation") + else getattr(rec, col.name) + ) + for col in Recommendation.__table__.columns + }, + "Scenario ID": rec.scenario_id, + } for rec in recommendations_query ] @@ -94,16 +121,34 @@ def app(): ) property_asset_data = properties_df.merge( - mod_property_data.drop(columns=["address", "postcode", "tenure"]), how="left", on="uprn" + mod_property_data.drop(columns=["address", "postcode", "tenure"]), + how="left", + on="uprn", ) - property_asset_data["is_pitched"] = property_asset_data["roof"].str.contains("pitched", case=False) + property_asset_data["is_pitched"] = property_asset_data["roof"].str.contains( + "pitched", case=False + ) property_asset_data["pre_1970"] = property_asset_data["BUILD_YEAR"] < 1970 - property_asset_data["wall_type"] = property_asset_data["walls"].str.split(" ").str[0].str.strip() - property_asset_data["is_insulated"] = ( - property_asset_data["walls"].str.split(",").str[1].str.strip().isin( - ["filled cavity", "with external insulation", "filled cavity and external insulation"] - ) | property_asset_data["walls"].str.split(",").str[2].str.strip().isin(["insulated"]) + property_asset_data["wall_type"] = ( + property_asset_data["walls"].str.split(" ").str[0].str.strip() + ) + property_asset_data["is_insulated"] = property_asset_data["walls"].str.split( + "," + ).str[1].str.strip().isin( + [ + "filled cavity", + "with external insulation", + "filled cavity and external insulation", + ] + ) | property_asset_data[ + "walls" + ].str.split( + "," + ).str[ + 2 + ].str.strip().isin( + ["insulated"] ) property_asset_data["is_insulated"] = np.where( property_asset_data["is_insulated"], "Insulated", "Uninsulated" @@ -115,18 +160,26 @@ def app(): property_asset_data["pre_1970"], "Pre 1970", "Post 1970" ) - archetype_variables = ["property_type", "wall_type", "is_insulated", "is_pitched", "pre_1970"] + archetype_variables = [ + "property_type", + "wall_type", + "is_insulated", + "is_pitched", + "pre_1970", + ] assigned_archetypes = ( - property_asset_data.groupby( - archetype_variables - ).size().reset_index().rename(columns={0: "n_properties"}).sort_values("n_properties", ascending=False) + property_asset_data.groupby(archetype_variables) + .size() + .reset_index() + .rename(columns={0: "n_properties"}) + .sort_values("n_properties", ascending=False) ) # Make the archetype ID a concatenation of the variables - assigned_archetypes["archetype_id"] = assigned_archetypes[archetype_variables].apply( - lambda x: "_".join(x.astype(str)), axis=1 - ) + assigned_archetypes["archetype_id"] = assigned_archetypes[ + archetype_variables + ].apply(lambda x: "_".join(x.astype(str)), axis=1) # Most prominent archetypes prominent_archetypes = assigned_archetypes.head(6) @@ -136,7 +189,7 @@ def app(): property_asset_data = property_asset_data.merge( assigned_archetypes[archetype_variables + ["archetype_id"]], how="left", - on=archetype_variables + on=archetype_variables, ) # Create age bands: @@ -148,7 +201,7 @@ def app(): property_asset_data["age_band"] = pd.cut( property_asset_data["BUILD_YEAR"], bins=[1959, 1969, 1979, 1989, 1999, 2022], - labels=["1960-1969", "1970-1979", "1980-1989", "1990-1999", "2000+"] + labels=["1960-1969", "1970-1979", "1980-1989", "1990-1999", "2000+"], ) # Create floor area bands @@ -159,47 +212,59 @@ def app(): property_asset_data["floor_area_band"] = pd.cut( property_asset_data["total_floor_area"], bins=[0, 73, 97, 199, 10000], - labels=["0-73", "74-97", "98-199", "200+"] + labels=["0-73", "74-97", "98-199", "200+"], ) property_asset_data["archetype_group"] = property_asset_data["archetype_id"].copy() property_asset_data["archetype_group"] = np.where( - property_asset_data["archetype_id"].isin(other_archetypes["archetype_id"].values), + property_asset_data["archetype_id"].isin( + other_archetypes["archetype_id"].values + ), "other", - property_asset_data["archetype_group"] + property_asset_data["archetype_group"], ) # For colour wall_types = ( - property_asset_data[["wall_type"]].value_counts().to_frame().reset_index().rename( - columns={"wall_type": "Wall Type"} - ) + property_asset_data[["wall_type"]] + .value_counts() + .to_frame() + .reset_index() + .rename(columns={"wall_type": "Wall Type"}) ) # Group into age bands ages = ( - property_asset_data[["age_band"]].value_counts() + property_asset_data[["age_band"]] + .value_counts() .to_frame() - .reset_index().sort_values("age_band", ascending=True) + .reset_index() + .sort_values("age_band", ascending=True) .rename(columns={"age_band": "Age Band"}) ) floor_area_bands = ( - property_asset_data[["floor_area_band"]].value_counts() + property_asset_data[["floor_area_band"]] + .value_counts() .to_frame() - .reset_index().sort_values("floor_area_band", ascending=True) + .reset_index() + .sort_values("floor_area_band", ascending=True) .rename(columns={"floor_area_band": "Floor Area Band"}) ) archetype_counts = ( - property_asset_data[["archetype_group"]]. - value_counts(). - to_frame(). - reset_index() + property_asset_data[["archetype_group"]] + .value_counts() + .to_frame() + .reset_index() .rename(columns={"archetype_group": "Archetype"}) ) property_types = ( - (property_asset_data["property_type"] + ": " + property_asset_data["built_form"]). - value_counts(). - to_frame(). - reset_index() + ( + property_asset_data["property_type"] + + ": " + + property_asset_data["built_form"] + ) + .value_counts() + .to_frame() + .reset_index() .rename(columns={"index": "Property Type", 0: "Count"}) ) @@ -217,18 +282,24 @@ def app(): totals = property_asset_data[ [ "Total_household_members", - "co2_emissions", "current_energy_demand", "current_energy_demand_heating_hotwater", - "heating_cost_current", "hot_water_cost_current", "lighting_cost_current", - "appliances_cost_current", "gas_standing_charge", "electricity_standing_charge" + "co2_emissions", + "current_energy_demand", + "current_energy_demand_heating_hotwater", + "heating_cost_current", + "hot_water_cost_current", + "lighting_cost_current", + "appliances_cost_current", + "gas_standing_charge", + "electricity_standing_charge", ] ].copy() totals["total_cost"] = ( - totals["heating_cost_current"] + - totals["hot_water_cost_current"] + - totals["lighting_cost_current"] + - totals["appliances_cost_current"] + - totals["gas_standing_charge"] + - totals["electricity_standing_charge"] + totals["heating_cost_current"] + + totals["hot_water_cost_current"] + + totals["lighting_cost_current"] + + totals["appliances_cost_current"] + + totals["gas_standing_charge"] + + totals["electricity_standing_charge"] ) print( totals[ @@ -259,38 +330,59 @@ def app(): scenario_recommendations_df = recommendations_df[ recommendations_df["Scenario ID"] == scenario - ].copy() + ].copy() - scenario_recommendations_df["contingency"] = contingency * scenario_recommendations_df["estimated_cost"] + scenario_recommendations_df["contingency"] = ( + contingency * scenario_recommendations_df["estimated_cost"] + ) scenario_recommendations_df["total_cost"] = ( - scenario_recommendations_df["estimated_cost"] + scenario_recommendations_df["contingency"] + scenario_recommendations_df["estimated_cost"] + + scenario_recommendations_df["contingency"] ) recommended_measures_df = scenario_recommendations_df[ ["property_id", "measure_type", "estimated_cost", "default"] ] - recommended_measures_df = recommended_measures_df[recommended_measures_df["default"]] + recommended_measures_df = recommended_measures_df[ + recommended_measures_df["default"] + ] recommended_measures_df = recommended_measures_df.drop(columns=["default"]) # Metrics by property ID aggregated_metrics = scenario_recommendations_df[ [ - "property_id", "type", "default", "sap_points", - "energy_cost_savings", "kwh_savings", "co2_equivalent_savings", "estimated_cost", "contingency", - "total_cost" + "property_id", + "type", + "default", + "sap_points", + "energy_cost_savings", + "kwh_savings", + "co2_equivalent_savings", + "estimated_cost", + "contingency", + "total_cost", ] ] aggregated_metrics = aggregated_metrics[aggregated_metrics["default"]] - aggregated_metrics = aggregated_metrics.groupby("property_id")[ - ["sap_points", "co2_equivalent_savings", "energy_cost_savings", "kwh_savings", "estimated_cost", - "total_cost", "contingency"] - ].sum().reset_index() + aggregated_metrics = ( + aggregated_metrics.groupby("property_id")[ + [ + "sap_points", + "co2_equivalent_savings", + "energy_cost_savings", + "kwh_savings", + "estimated_cost", + "total_cost", + "contingency", + ] + ] + .sum() + .reset_index() + ) recommendations_measures_pivot = recommended_measures_df.pivot( - index='property_id', - columns='measure_type', - values='estimated_cost' + index="property_id", columns="measure_type", values="estimated_cost" ) recommendations_measures_pivot = recommendations_measures_pivot.reset_index() recommendations_measures_pivot = recommendations_measures_pivot.fillna(0) @@ -299,30 +391,58 @@ def app(): for c in recommendations_measures_pivot.columns: if c == "property_id": continue - recommendations_measures_pivot["Recommendation: " + c] = recommendations_measures_pivot[c] > 0 + recommendations_measures_pivot["Recommendation: " + c] = ( + recommendations_measures_pivot[c] > 0 + ) # We now create a final output - df = properties_df[ - [ - "property_id", "uprn", "address", "postcode", "property_type", "walls", "roof", "heating", "windows", - "current_epc_rating", "current_sap_points", "total_floor_area", "number_of_rooms", - "co2_emissions", "current_energy_demand", "current_energy_demand_heating_hotwater", - "heating_cost_current", "hot_water_cost_current", "lighting_cost_current", - "appliances_cost_current", "gas_standing_charge", "electricity_standing_charge" + df = ( + properties_df[ + [ + "property_id", + "uprn", + "address", + "postcode", + "property_type", + "walls", + "roof", + "heating", + "windows", + "current_epc_rating", + "current_sap_points", + "total_floor_area", + "number_of_rooms", + "co2_emissions", + "current_energy_demand", + "current_energy_demand_heating_hotwater", + "heating_cost_current", + "hot_water_cost_current", + "lighting_cost_current", + "appliances_cost_current", + "gas_standing_charge", + "electricity_standing_charge", + ] ] - ].merge( - recommendations_measures_pivot, how="left", on="property_id" - ).merge( - aggregated_metrics, how="left", on="property_id" + .merge(recommendations_measures_pivot, how="left", on="property_id") + .merge(aggregated_metrics, how="left", on="property_id") ) df["bills_total_cost"] = ( - df["heating_cost_current"] + df["hot_water_cost_current"] + df["lighting_cost_current"] + - df["appliances_cost_current"] + df["gas_standing_charge"] + df["electricity_standing_charge"] + df["heating_cost_current"] + + df["hot_water_cost_current"] + + df["lighting_cost_current"] + + df["appliances_cost_current"] + + df["gas_standing_charge"] + + df["electricity_standing_charge"] ) df = df.drop(columns=["property_id"]) - for c in ["sap_points", "co2_equivalent_savings", "energy_cost_savings", "kwh_savings"]: + for c in [ + "sap_points", + "co2_equivalent_savings", + "energy_cost_savings", + "kwh_savings", + ]: df[c] = df[c].fillna(0) df = df.rename( @@ -345,16 +465,23 @@ def app(): # Calculate post SAP df["Predicted Post Works SAP"] = df["Current SAP Points"] + df["sap_points"] df["Predicted Post Works SAP"] = df["Predicted Post Works SAP"].round() - df["Predicted Post Works EPC"] = df["Predicted Post Works SAP"].apply(lambda x: sap_to_epc(x)) + df["Predicted Post Works EPC"] = df["Predicted Post Works SAP"].apply( + lambda x: sap_to_epc(x) + ) # Calculate the relative savings on carbon, kwh, and bills - df["relative_carbon_savings"] = df["co2_equivalent_savings"] / df["co2_emissions"] + df["relative_carbon_savings"] = ( + df["co2_equivalent_savings"] / df["co2_emissions"] + ) df["relative_kwh_savings"] = df["kwh_savings"] / df["current_energy_demand"] df["relative_bill_savings"] = df["energy_cost_savings"] / df["bills_total_cost"] # Add on the archetype df = df.merge( - property_asset_data[["uprn", "archetype_group"]], how="left", left_on="UPRN", right_on="uprn" + property_asset_data[["uprn", "archetype_group"]], + how="left", + left_on="UPRN", + right_on="uprn", ) # For properties that don't make it to EPC B, check why. E.g. for a property that has an oil boiler, it @@ -387,7 +514,9 @@ def app(): printing_scenario_id = scenario_ids[0] # EPC breakdown - print(scenario_data[printing_scenario_id]['Predicted Post Works EPC'].value_counts()) + print( + scenario_data[printing_scenario_id]["Predicted Post Works EPC"].value_counts() + ) # Cost # Total cost print(scenario_data[printing_scenario_id]["total_cost"].sum()) @@ -408,16 +537,24 @@ def app(): measure_details = {} for scenario in scenario_ids: measure_details[scenario] = {} - recommendation_cols = [c for c in scenario_data[scenario].columns if "Recommendation:" in c] - measure_details[scenario]["count"] = scenario_data[scenario][recommendation_cols].sum().to_dict() + recommendation_cols = [ + c for c in scenario_data[scenario].columns if "Recommendation:" in c + ] + measure_details[scenario]["count"] = ( + scenario_data[scenario][recommendation_cols].sum().to_dict() + ) # Get average cost per measure measure_columns = [ - c.split("Recommendation: ")[1] for c in scenario_data[scenario].columns if "Recommendation:" in c + c.split("Recommendation: ")[1] + for c in scenario_data[scenario].columns + if "Recommendation:" in c ] # Take the mean, drop zero columns measure_costs = {} for m in measure_columns: - measure_costs[m] = float(scenario_data[scenario][scenario_data[scenario][m] > 0][m].mean()) + measure_costs[m] = float( + scenario_data[scenario][scenario_data[scenario][m] > 0][m].mean() + ) measure_details[scenario]["cost_per_measure"] = measure_costs pprint(measure_details[scenario_ids[0]]["count"]) @@ -452,12 +589,27 @@ def app(): for scenario in scenario_ids: df = scenario_data[scenario].copy() - avg_savings = df[ - ["sap_points", "co2_equivalent_savings", "energy_cost_savings", "kwh_savings", "estimated_cost", - "total_cost", "contingency"] - ].mean().to_dict() - avg_savings["cost_per_sap_point"] = avg_savings["total_cost"] / avg_savings["sap_points"] - avg_savings["cost_per_carbon"] = avg_savings["total_cost"] / avg_savings["co2_equivalent_savings"] + avg_savings = ( + df[ + [ + "sap_points", + "co2_equivalent_savings", + "energy_cost_savings", + "kwh_savings", + "estimated_cost", + "total_cost", + "contingency", + ] + ] + .mean() + .to_dict() + ) + avg_savings["cost_per_sap_point"] = ( + avg_savings["total_cost"] / avg_savings["sap_points"] + ) + avg_savings["cost_per_carbon"] = ( + avg_savings["total_cost"] / avg_savings["co2_equivalent_savings"] + ) scenario_metrics[scenario] = avg_savings pprint(scenario_metrics[scenario_ids[0]]) @@ -465,11 +617,11 @@ def app(): scenario_data[scenario_ids[0]]["loft_insulation"][ scenario_data[scenario_ids[0]]["loft_insulation"] > 0 - ].mean() + ].mean() scenario_data[scenario_ids[0]]["cavity_wall_insulation"][ scenario_data[scenario_ids[0]]["cavity_wall_insulation"] > 0 - ].mean() + ].mean() # Testing checking floor risk @@ -477,11 +629,7 @@ def app(): def get_flood_risk(lat, lon, radius_km=1): url = "https://environment.data.gov.uk/flood-monitoring/id/floods" - params = { - 'lat': lat, - 'long': lon, - 'dist': radius_km # search radius in km - } + params = {"lat": lat, "long": lon, "dist": radius_km} # search radius in km response = requests.get(url, params=params) response.raise_for_status() @@ -495,20 +643,19 @@ def app(): print(f"{len(flood_warnings)} warning(s) found near the location:") for warning in flood_warnings: print(f"- Area: {warning.get('description')}") - print(f" Severity: {warning.get('severity')} (Level {warning.get('severityLevel')})") + print( + f" Severity: {warning.get('severity')} (Level {warning.get('severityLevel')})" + ) print(f" Message changed at: {warning.get('timeMessageChanged')}") print() return flood_warnings from shapely.geometry import shape, Point + def get_flood_areas_near_point(lat, lon, radius_km=2): url = "https://environment.data.gov.uk/flood-monitoring/id/floodAreas" - params = { - 'lat': lat, - 'long': lon, - 'dist': radius_km - } + params = {"lat": lat, "long": lon, "dist": radius_km} response = requests.get(url, params=params) response.raise_for_status() @@ -531,7 +678,7 @@ def app(): if not features: continue - flood_polygon = shape(features[0]['geometry']) + flood_polygon = shape(features[0]["geometry"]) try: is_inside = flood_polygon.contains(point) @@ -539,12 +686,17 @@ def app(): is_inside = False if is_inside: - print(f"📍 Point is inside flood area: {area['label']} ({area['notation']})") + print( + f"📍 Point is inside flood area: {area['label']} ({area['notation']})" + ) return area from tqdm import tqdm + floor_warnings_data = [] - for _, property in tqdm(property_asset_data.iterrows(), total=len(property_asset_data)): + for _, property in tqdm( + property_asset_data.iterrows(), total=len(property_asset_data) + ): # warnings = floor_warnings_data.extend( # get_flood_risk(lat=property["LATITUDE"], lon=property["LONGITUDE"], radius_km=1) # ) @@ -556,7 +708,7 @@ def app(): "uprn": property["uprn"], "address": property["address"], "postcode": property["postcode"], - "area": resp + "area": resp, } ) continue @@ -570,7 +722,7 @@ def app(): "House_Cavity_Uninsulated_Pitched roof_Post 1970", "other", "House_System_Uninsulated_Pitched roof_Pre 1970", - "House_Solid_Uninsulated_Not Pitched Roof_Pre 1970" + "House_Solid_Uninsulated_Not Pitched Roof_Pre 1970", ] values = [62, 36, 21, 16, 16, 4, 2] @@ -582,36 +734,39 @@ def app(): "Cavity wall insulation, ventilation", "Bespoke retrofit measures", "External wall insulation, roof insulation", - "Flat roof insulation, internal wall insulation" + "Flat roof insulation, internal wall insulation", ] - fig = go.Figure(go.Treemap( - labels=labels, - parents=[""] * len(labels), # No root - values=values, - hovertext=hovertext, - hoverinfo="text", - textinfo="none", - marker=dict( - line=dict(color="white", width=4), - colors=values, - colorscale="Blues" + fig = go.Figure( + go.Treemap( + labels=labels, + parents=[""] * len(labels), # No root + values=values, + hovertext=hovertext, + hoverinfo="text", + textinfo="none", + marker=dict( + line=dict(color="white", width=4), colors=values, colorscale="Blues" + ), ) - )) + ) fig.update_layout( - margin=dict(t=10, l=10, r=10, b=10), - plot_bgcolor="white", - paper_bgcolor="white" + margin=dict(t=10, l=10, r=10, b=10), plot_bgcolor="white", paper_bgcolor="white" ) fig.show() # Get the recommended measures by scenario id - recommendation_cols = [c for c in scenario_data[scenario_ids[1]].columns if "Recommendation:" in c] - measure_counts_by_scenario = scenario_data[scenario_ids[1]].groupby("archetype_group")[ - recommendation_cols - ].sum().reset_index() + recommendation_cols = [ + c for c in scenario_data[scenario_ids[1]].columns if "Recommendation:" in c + ] + measure_counts_by_scenario = ( + scenario_data[scenario_ids[1]] + .groupby("archetype_group")[recommendation_cols] + .sum() + .reset_index() + ) measure_counts_by_scenario.to_csv( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/MOD/Pilot Programme/measure_counts_by_scenario.csv" @@ -630,15 +785,13 @@ def app(): to_append = {"uprn": uprn} for _id in scenario_ids: - scenario = scenario_data[_id][ - scenario_data[_id]["uprn"] == uprn - ].squeeze() + scenario = scenario_data[_id][scenario_data[_id]["uprn"] == uprn].squeeze() val = PropertyValuation.estimate_valuation_improvement( current_value=x["valuation"], current_epc=scenario["Current EPC Rating"].value, target_epc=scenario["Predicted Post Works EPC"], - total_cost=None + total_cost=None, ) to_append[_id] = val["average_increase"] diff --git a/etl/customers/newhaven/slides.py b/etl/customers/newhaven/slides.py index 45108fec..efedb844 100644 --- a/etl/customers/newhaven/slides.py +++ b/etl/customers/newhaven/slides.py @@ -3,7 +3,12 @@ import pandas as pd import numpy as np from sqlalchemy.orm import sessionmaker from backend.app.db.connection import db_engine -from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations, Scenario +from backend.app.db.models.recommendations import ( + Recommendation, + PlanModel, + PlanRecommendations, + ScenarioModel, +) from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel from utils.s3 import read_csv_from_s3 @@ -13,56 +18,79 @@ def get_data(portfolio_id, scenario_ids): session.begin() # Get properties and their details for a specific portfolio - properties_query = session.query( - PropertyModel, - PropertyDetailsEpcModel - ).join( - PropertyDetailsEpcModel, PropertyModel.id == PropertyDetailsEpcModel.property_id - ).filter( - PropertyModel.portfolio_id == portfolio_id # Filter by portfolio ID - ).all() + properties_query = ( + session.query(PropertyModel, PropertyDetailsEpcModel) + .join( + PropertyDetailsEpcModel, + PropertyModel.id == PropertyDetailsEpcModel.property_id, + ) + .filter(PropertyModel.portfolio_id == portfolio_id) # Filter by portfolio ID + .all() + ) # Transform properties data to include all fields dynamically properties_data = [ - {**{col.name: getattr(prop.PropertyModel, col.name) for col in PropertyModel.__table__.columns}, - **{col.name: getattr(prop.PropertyDetailsEpcModel, col.name) for col in - PropertyDetailsEpcModel.__table__.columns}} + { + **{ + col.name: getattr(prop.PropertyModel, col.name) + for col in PropertyModel.__table__.columns + }, + **{ + col.name: getattr(prop.PropertyDetailsEpcModel, col.name) + for col in PropertyDetailsEpcModel.__table__.columns + }, + } for prop in properties_query ] # Get property IDs from fetched properties # Get plans linked to the fetched properties - plans_query = session.query(Plan).filter(Plan.scenario_id.in_(scenario_ids)).all() + plans_query = ( + session.query(PlanModel).filter(PlanModel.scenario_id.in_(scenario_ids)).all() + ) # Transform plans data to include all fields dynamically plans_data = [ - {col.name: getattr(plan, col.name) for col in Plan.__table__.columns} + {col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns} for plan in plans_query ] # Extract plan IDs for filtering recommendations through PlanRecommendations - plan_ids = [plan['id'] for plan in plans_data] + plan_ids = [plan["id"] for plan in plans_data] # Get recommendations through PlanRecommendations for those plans and that are default - recommendations_query = session.query( - Recommendation, - Plan.scenario_id - ).join( - PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id - ).join( - Plan, Plan.id == PlanRecommendations.plan_id # Join with Plan to access scenario_id - ).filter( - PlanRecommendations.plan_id.in_(plan_ids), - Recommendation.default == True # Filtering for default recommendations - ).all() + recommendations_query = ( + session.query(Recommendation, PlanModel.scenario_id) + .join( + PlanRecommendations, + Recommendation.id == PlanRecommendations.recommendation_id, + ) + .join( + PlanModel, + PlanModel.id + == PlanRecommendations.plan_id, # Join with Plan to access scenario_id + ) + .filter( + PlanRecommendations.plan_id.in_(plan_ids), + Recommendation.default == True, # Filtering for default recommendations + ) + .all() + ) # Transform recommendations data to include all fields dynamically and include scenario_id recommendations_data = [ - {**{col.name: getattr(rec.Recommendation, col.name) if hasattr(rec, 'Recommendation') else getattr(rec, - col.name) for - col in Recommendation.__table__.columns}, - "Scenario ID": rec.scenario_id} + { + **{ + col.name: ( + getattr(rec.Recommendation, col.name) + if hasattr(rec, "Recommendation") + else getattr(rec, col.name) + ) + for col in Recommendation.__table__.columns + }, + "Scenario ID": rec.scenario_id, + } for rec in recommendations_query ] @@ -71,7 +99,9 @@ def get_data(portfolio_id, scenario_ids): return properties_data, plans_data, recommendations_data -def estimate_post_retrofit_heating_hotwater_kwh(properties_df, recommendations_df, scenario_ids): +def estimate_post_retrofit_heating_hotwater_kwh( + properties_df, recommendations_df, scenario_ids +): # properties_starting_with_electric_heating = properties_df[ # properties_df["mainfuel"].isin( # ["Electricity not community", "Electricity electricity unspecified tariff"] @@ -85,20 +115,29 @@ def estimate_post_retrofit_heating_hotwater_kwh(properties_df, recommendations_d for scenario_id in scenario_ids: # Get the recommendations for the scenario, default scenario_recommendations = recommendations_df[ - (recommendations_df["Scenario ID"] == scenario_id) & - (recommendations_df["default"] == True) - ].copy() + (recommendations_df["Scenario ID"] == scenario_id) + & (recommendations_df["default"] == True) + ].copy() - scenario_recommendations['ligting_kwh'] = scenario_recommendations.apply( - lambda x: x['kwh_savings'] if x['type'] == 'low_energy_lighting' else 0, - axis=1) - scenario_recommendations['solar_kwh'] = scenario_recommendations.apply( - lambda x: x['kwh_savings'] if x['type'] == 'solar_pv' else 0, axis=1) + scenario_recommendations["ligting_kwh"] = scenario_recommendations.apply( + lambda x: x["kwh_savings"] if x["type"] == "low_energy_lighting" else 0, + axis=1, + ) + scenario_recommendations["solar_kwh"] = scenario_recommendations.apply( + lambda x: x["kwh_savings"] if x["type"] == "solar_pv" else 0, axis=1 + ) # Set 'Estimated Kwh Savings' to zero where specific kwh columns are used - scenario_recommendations['Estimated Kwh Savings'] = scenario_recommendations.apply( - lambda x: 0 if x['type'] in ['low_energy_lighting', 'solar_pv'] else x[ - 'kwh_savings'], axis=1) + scenario_recommendations["Estimated Kwh Savings"] = ( + scenario_recommendations.apply( + lambda x: ( + 0 + if x["type"] in ["low_energy_lighting", "solar_pv"] + else x["kwh_savings"] + ), + axis=1, + ) + ) # We need to determine if any of the properties start with electric heating or end with it # property_electric_heating = [] @@ -112,51 +151,76 @@ def estimate_post_retrofit_heating_hotwater_kwh(properties_df, recommendations_d # property_electric_heating.append(pid) # continue - grouped_data = scenario_recommendations.groupby(['property_id']).agg({ - 'Estimated Kwh Savings': 'sum', - 'ligting_kwh': 'sum', - 'solar_kwh': 'sum', - "estimated_cost": "sum" - }).reset_index() + grouped_data = ( + scenario_recommendations.groupby(["property_id"]) + .agg( + { + "Estimated Kwh Savings": "sum", + "ligting_kwh": "sum", + "solar_kwh": "sum", + "estimated_cost": "sum", + } + ) + .reset_index() + ) comparison = properties_df.drop_duplicates().merge( grouped_data, on=["property_id"], how="left" ) comparison["Post Retrofit Heating & Hotwater kwh"] = ( - comparison["current_energy_demand_heating_hotwater"] - \ - comparison["Estimated Kwh Savings"] + comparison["current_energy_demand_heating_hotwater"] + - comparison["Estimated Kwh Savings"] ) - avgs = comparison[['current_energy_demand_heating_hotwater', 'Post Retrofit Heating & Hotwater kwh']].mean() + avgs = comparison[ + [ + "current_energy_demand_heating_hotwater", + "Post Retrofit Heating & Hotwater kwh", + ] + ].mean() # We now, for properties that have a plan, do a before and after with_savings = comparison[~pd.isnull(comparison["Estimated Kwh Savings"])] avgs2 = with_savings[ - ['current_energy_demand_heating_hotwater', 'Post Retrofit Heating & Hotwater kwh']].mean() - avgs2["difference"] = avgs2["current_energy_demand_heating_hotwater"] - avgs2[ - "Post Retrofit Heating & Hotwater kwh"] - avgs2["percentage_reduction"] = 100 * avgs2["difference"] / avgs2["current_energy_demand_heating_hotwater"] + [ + "current_energy_demand_heating_hotwater", + "Post Retrofit Heating & Hotwater kwh", + ] + ].mean() + avgs2["difference"] = ( + avgs2["current_energy_demand_heating_hotwater"] + - avgs2["Post Retrofit Heating & Hotwater kwh"] + ) + avgs2["percentage_reduction"] = ( + 100 * avgs2["difference"] / avgs2["current_energy_demand_heating_hotwater"] + ) # We also calculate the cost per kwh saves total_kwh_saved = ( - with_savings["Estimated Kwh Savings"].sum() + - with_savings["ligting_kwh"].sum() + - with_savings["solar_kwh"].sum() + with_savings["Estimated Kwh Savings"].sum() + + with_savings["ligting_kwh"].sum() + + with_savings["solar_kwh"].sum() ) total_cost = with_savings["estimated_cost"].sum() cost_per_kwh_saved = total_cost / total_kwh_saved scenario_comparison_df.append({"scenario_id": scenario_id, **avgs}) scenario_comparison_df_2.append({"scenario_id": scenario_id, **avgs2}) - cost_per_kwh_saved_table.append({"scenario_id": scenario_id, "cost_per_kwh_saved": cost_per_kwh_saved}) + cost_per_kwh_saved_table.append( + {"scenario_id": scenario_id, "cost_per_kwh_saved": cost_per_kwh_saved} + ) scenario_comparison_population = pd.DataFrame(scenario_comparison_df) scenario_comparison_retrofitted_units = pd.DataFrame(scenario_comparison_df_2) cost_per_kwh_saved_table = pd.DataFrame(cost_per_kwh_saved_table) - return scenario_comparison_population, scenario_comparison_retrofitted_units, cost_per_kwh_saved_table + return ( + scenario_comparison_population, + scenario_comparison_retrofitted_units, + cost_per_kwh_saved_table, + ) def slides(): @@ -167,7 +231,9 @@ def slides(): # Look at one scenario at a time, otherwise this is agony scenario_ids = [47, 48, 49, 50, 51] - properties_data, plans_data, recommendations_data = get_data(portfolio_id, scenario_ids) + properties_data, plans_data, recommendations_data = get_data( + portfolio_id, scenario_ids + ) properties_df = pd.DataFrame(properties_data) plans_df = pd.DataFrame(plans_data) @@ -177,16 +243,19 @@ def slides(): raise ValueError("The number of unique properties is not 2553") # Q1: What is the baseline heating and energy demand for the properties in the portfolio - baseline? - heating_hotwater_kwh = ( - properties_df[['current_energy_demand', 'current_energy_demand_heating_hotwater']] - .mean() - ) + heating_hotwater_kwh = properties_df[ + ["current_energy_demand", "current_energy_demand_heating_hotwater"] + ].mean() # Q2: For each scenario, what is for what is the heating and hot water kwh after retrofit, on the entire # popoulation (incl those without retrofit) and for just those being retrofit # We also calculat the cost per kwh saved - scenario_comparison_population, scenario_comparison_retrofitted_units, cost_per_kwh_saved_table = ( - estimate_post_retrofit_heating_hotwater_kwh(properties_df, recommendations_df, scenario_ids) + ( + scenario_comparison_population, + scenario_comparison_retrofitted_units, + cost_per_kwh_saved_table, + ) = estimate_post_retrofit_heating_hotwater_kwh( + properties_df, recommendations_df, scenario_ids ) # Q3: For each scenario, we want to answer what the heating and hot water kwh looks like after retrofit @@ -194,42 +263,55 @@ def slides(): # By property - recommendations_df["type_mapped"] = recommendations_df["type"].copy().replace( - { - "loft_insulation": "roof_insulation", - "room_roof_insulation": "roof_insulation", - "flat_roof_insulation": "roof_insulation", - "hot_water_tank_insulation": "other", - "cylinder_thermostat": "other", - "sealing_open_fireplace": "other", - "suspended_floor_insulation": "floor_insulation", - "solid_floor_insulation": "floor_insulation", - } + recommendations_df["type_mapped"] = ( + recommendations_df["type"] + .copy() + .replace( + { + "loft_insulation": "roof_insulation", + "room_roof_insulation": "roof_insulation", + "flat_roof_insulation": "roof_insulation", + "hot_water_tank_insulation": "other", + "cylinder_thermostat": "other", + "sealing_open_fireplace": "other", + "suspended_floor_insulation": "floor_insulation", + "solid_floor_insulation": "floor_insulation", + } + ) ) recommendations_df["type_mapped"] = np.where( recommendations_df["description"].str.contains("air source heat pump"), "air_source_heat_pump", - recommendations_df["type_mapped"] + recommendations_df["type_mapped"], ) # Group by 'Plan Name' and 'Recommendation Type' and count unique 'Property ID' - recommendation_summary = recommendations_df[recommendations_df["default"] == True].groupby( - ['Scenario ID', 'type_mapped'] - ).agg({ - 'property_id': 'nunique' - }).reset_index() + recommendation_summary = ( + recommendations_df[recommendations_df["default"] == True] + .groupby(["Scenario ID", "type_mapped"]) + .agg({"property_id": "nunique"}) + .reset_index() + ) - recommendation_summary.columns = ['Scenario ID', 'Type Mapped', 'Number of Properties'] + recommendation_summary.columns = [ + "Scenario ID", + "Type Mapped", + "Number of Properties", + ] recommendation_summary["Percentage of Properties"] = 100 * ( recommendation_summary["Number of Properties"] / properties_df["id"].nunique() ) - recommendation_summary_final_scenario = recommendation_summary[recommendation_summary["Scenario ID"].isin([51])] + recommendation_summary_final_scenario = recommendation_summary[ + recommendation_summary["Scenario ID"].isin([51]) + ] # MVP implementation of funding estimation for the most basic scenario, using GBIS - project_scores_matrix = pd.read_csv("/Users/khalimconn-kowlessar/Downloads/ECO4 Full Project Scores Matrix.csv") + project_scores_matrix = pd.read_csv( + "/Users/khalimconn-kowlessar/Downloads/ECO4 Full Project Scores Matrix.csv" + ) def find_abs(sap_movement, starting_sap, floor_area): starting_band = find_band(starting_sap) @@ -238,7 +320,7 @@ def slides(): return 0 if floor_area <= 72: - floor_area_segment = '0-72' + floor_area_segment = "0-72" elif (floor_area > 72) and (floor_area <= 97): floor_area_segment = "73-97" elif (floor_area > 97) and (floor_area <= 199): @@ -247,26 +329,26 @@ def slides(): floor_area_segment = "200+" return project_scores_matrix[ - (project_scores_matrix["Floor Area Segment"] == floor_area_segment) & - (project_scores_matrix["Starting Band"] == starting_band) & - (project_scores_matrix["Finishing Band"] == finishing_band) - ].squeeze()["Cost Savings"] + (project_scores_matrix["Floor Area Segment"] == floor_area_segment) + & (project_scores_matrix["Starting Band"] == starting_band) + & (project_scores_matrix["Finishing Band"] == finishing_band) + ].squeeze()["Cost Savings"] eco4_scores_sap_table = [ - {'Band': 'High_A', 'From': 96.0, 'Up to': 100.0, 'Mid-point': 98.0}, - {'Band': 'Low_A', 'From': 92.0, 'Up to': 96.0, 'Mid-point': 94.0}, - {'Band': 'High_B', 'From': 86.0, 'Up to': 91.0, 'Mid-point': 88.5}, - {'Band': 'Low_B', 'From': 81.0, 'Up to': 86.0, 'Mid-point': 83.5}, - {'Band': 'High_C', 'From': 74.5, 'Up to': 80.0, 'Mid-point': 77.25}, - {'Band': 'Low_C', 'From': 69.0, 'Up to': 74.5, 'Mid-point': 71.75}, - {'Band': 'High_D', 'From': 61.5, 'Up to': 68.0, 'Mid-point': 64.75}, - {'Band': 'Low_D', 'From': 55.0, 'Up to': 61.5, 'Mid-point': 58.25}, - {'Band': 'High_E', 'From': 46.5, 'Up to': 54.0, 'Mid-point': 50.25}, - {'Band': 'Low_E', 'From': 39.0, 'Up to': 46.5, 'Mid-point': 42.75}, - {'Band': 'High_F', 'From': 29.5, 'Up to': 38.0, 'Mid-point': 33.75}, - {'Band': 'Low_F', 'From': 21.0, 'Up to': 29.5, 'Mid-point': 25.25}, - {'Band': 'High_G', 'From': 10.5, 'Up to': 20.0, 'Mid-point': 15.25}, - {'Band': 'Low_G', 'From': 1.0, 'Up to': 10.5, 'Mid-point': 5.75} + {"Band": "High_A", "From": 96.0, "Up to": 100.0, "Mid-point": 98.0}, + {"Band": "Low_A", "From": 92.0, "Up to": 96.0, "Mid-point": 94.0}, + {"Band": "High_B", "From": 86.0, "Up to": 91.0, "Mid-point": 88.5}, + {"Band": "Low_B", "From": 81.0, "Up to": 86.0, "Mid-point": 83.5}, + {"Band": "High_C", "From": 74.5, "Up to": 80.0, "Mid-point": 77.25}, + {"Band": "Low_C", "From": 69.0, "Up to": 74.5, "Mid-point": 71.75}, + {"Band": "High_D", "From": 61.5, "Up to": 68.0, "Mid-point": 64.75}, + {"Band": "Low_D", "From": 55.0, "Up to": 61.5, "Mid-point": 58.25}, + {"Band": "High_E", "From": 46.5, "Up to": 54.0, "Mid-point": 50.25}, + {"Band": "Low_E", "From": 39.0, "Up to": 46.5, "Mid-point": 42.75}, + {"Band": "High_F", "From": 29.5, "Up to": 38.0, "Mid-point": 33.75}, + {"Band": "Low_F", "From": 21.0, "Up to": 29.5, "Mid-point": 25.25}, + {"Band": "High_G", "From": 10.5, "Up to": 20.0, "Mid-point": 15.25}, + {"Band": "Low_G", "From": 1.0, "Up to": 10.5, "Mid-point": 5.75}, ] eco4_scores_sap_table = pd.DataFrame(eco4_scores_sap_table) @@ -274,8 +356,9 @@ def slides(): # Iterate through each row in the DataFrame to find the correct band value_floored = np.floor(value) return eco4_scores_sap_table[ - (eco4_scores_sap_table["From"] <= value_floored) & (eco4_scores_sap_table["Up to"] >= value_floored) - ].squeeze()["Band"] + (eco4_scores_sap_table["From"] <= value_floored) + & (eco4_scores_sap_table["Up to"] >= value_floored) + ].squeeze()["Band"] def identify_funding_measure(p, p_recs, is_social): measures = ["cavity_wall_insulation", "loft_insulation"] @@ -287,15 +370,17 @@ def slides(): project_abs = find_abs( sap_movement=funding_measure["sap_points"], starting_sap=p["current_sap_points"], - floor_area=p["total_floor_area"] + floor_area=p["total_floor_area"], + ) + property_abs.append( + { + "property_id": p["property_id"], + "measure": funding_measure["type"], + "cost": funding_measure["estimated_cost"], + "abs": project_abs, + "is_social": is_social, + } ) - property_abs.append({ - "property_id": p["property_id"], - "measure": funding_measure["type"], - "cost": funding_measure["estimated_cost"], - "abs": project_abs, - "is_social": is_social - }) if not property_abs: return None @@ -351,7 +436,9 @@ def slides(): band_b_proportion = 0.195 band_c_proportion = 0.219 band_d_proportion = 0.156 - a_to_d_proportion = band_a_proportion + band_b_proportion + band_c_proportion + band_d_proportion + a_to_d_proportion = ( + band_a_proportion + band_b_proportion + band_c_proportion + band_d_proportion + ) benefits_proportion = 0.51 @@ -360,20 +447,26 @@ def slides(): # We scale the private funding based on these two factors private_funding_scaled = private_funding * benefits_proportion * a_to_d_proportion - n_private_projects = np.round((~funding["is_social"]).sum() * benefits_proportion * a_to_d_proportion) + n_private_projects = np.round( + (~funding["is_social"]).sum() * benefits_proportion * a_to_d_proportion + ) # Look at the impact of EWI for scenario ewi_jobs = recommendations_df[ - (recommendations_df["Scenario ID"] == 49) & (recommendations_df["type"] == "external_wall_insulation") - ] + (recommendations_df["Scenario ID"] == 49) + & (recommendations_df["type"] == "external_wall_insulation") + ] ewi_jobs["estimated_cost"].sum() has_cavity = recommendations_df[ - (recommendations_df["type"] == "cavity_wall_insulation") & (recommendations_df["Scenario ID"] == 47) - ] + (recommendations_df["type"] == "cavity_wall_insulation") + & (recommendations_df["Scenario ID"] == 47) + ] # Take the some properties in this - cavity_units = properties_df[properties_df["property_id"].isin(has_cavity["property_id"].values)] + cavity_units = properties_df[ + properties_df["property_id"].isin(has_cavity["property_id"].values) + ] cavity_units[cavity_units.index == 3][["uprn", "property_id"]] @@ -381,41 +474,52 @@ def slides(): # Recommenation type by kwh savings per unit recommendations_final_scenario = recommendations_df[ - recommendations_df["Scenario ID"].isin([51]) & - (recommendations_df["default"] == True) - ].copy() + recommendations_df["Scenario ID"].isin([51]) + & (recommendations_df["default"] == True) + ].copy() # Merge on floor area recommendations_final_scenario = recommendations_final_scenario.merge( properties_df[["property_id", "total_floor_area"]], on="property_id", how="left" ) recommendations_final_scenario = recommendations_final_scenario[ - ~pd.isnull(recommendations_final_scenario["total_floor_area"])] - recommendations_final_scenario["kwh_savings_per_unit"] = recommendations_final_scenario["kwh_savings"] / \ - recommendations_final_scenario["total_floor_area"] - - recommendations_final_scenario["type_mapped2"] = recommendations_df["type"].copy().replace( - { - "room_roof_insulation": "roof_insulation", - "flat_roof_insulation": "roof_insulation", - "hot_water_tank_insulation": "other", - "cylinder_thermostat": "other", - "sealing_open_fireplace": "other", - "suspended_floor_insulation": "floor_insulation", - "solid_floor_insulation": "floor_insulation", - } + ~pd.isnull(recommendations_final_scenario["total_floor_area"]) + ] + recommendations_final_scenario["kwh_savings_per_unit"] = ( + recommendations_final_scenario["kwh_savings"] + / recommendations_final_scenario["total_floor_area"] ) - aggs = recommendations_final_scenario.groupby("type_mapped")[ - ["kwh_savings_per_unit", "estimated_cost"]].mean().reset_index().sort_values( - "kwh_savings_per_unit", ascending=False + recommendations_final_scenario["type_mapped2"] = ( + recommendations_df["type"] + .copy() + .replace( + { + "room_roof_insulation": "roof_insulation", + "flat_roof_insulation": "roof_insulation", + "hot_water_tank_insulation": "other", + "cylinder_thermostat": "other", + "sealing_open_fireplace": "other", + "suspended_floor_insulation": "floor_insulation", + "solid_floor_insulation": "floor_insulation", + } + ) + ) + + aggs = ( + recommendations_final_scenario.groupby("type_mapped")[ + ["kwh_savings_per_unit", "estimated_cost"] + ] + .mean() + .reset_index() + .sort_values("kwh_savings_per_unit", ascending=False) ) aggs["cost_per_kwh_saved"] = aggs["estimated_cost"] / aggs["kwh_savings_per_unit"] # Show more columns with pandas - pd.set_option('display.max_columns', None) + pd.set_option("display.max_columns", None) # Show more rows with pandas - pd.set_option('display.max_rows', None) + pd.set_option("display.max_rows", None) # Show more characters in a column - pd.set_option('display.max_colwidth', None) + pd.set_option("display.max_colwidth", None) def lewes_outputs(): @@ -427,12 +531,14 @@ def lewes_outputs(): """ # get the asset list - asset_list = read_csv_from_s3(bucket_name="retrofit-plan-inputs-dev", filepath="8/90/pilot.csv") + asset_list = read_csv_from_s3( + bucket_name="retrofit-plan-inputs-dev", filepath="8/90/pilot.csv" + ) asset_list = pd.DataFrame(asset_list) # Get non-invasive recommendations non_intrusive_recommendations = read_csv_from_s3( bucket_name="retrofit-plan-inputs-dev", - filepath="8/90/non_invasive_recommendations.csv" + filepath="8/90/non_invasive_recommendations.csv", ) non_intrusive_recommendations = pd.DataFrame(non_intrusive_recommendations) @@ -440,20 +546,21 @@ def lewes_outputs(): portfolio_id = 90 # Look at one scenario at a time, otherwise this is agony scenario_ids = [47, 48, 49, 50, 51] - properties_data, plans_data, recommendations_data = get_data(portfolio_id, scenario_ids) + properties_data, plans_data, recommendations_data = get_data( + portfolio_id, scenario_ids + ) properties_df = pd.DataFrame(properties_data) recommendations_df = pd.DataFrame(recommendations_data) # Unnest this import ast + survey_recs = [] for _, row in non_intrusive_recommendations.iterrows(): recs = ast.literal_eval(row["recommendations"]) ashp_rec = next((r for r in recs if r["type"] == "air_source_heat_pump"), None) solar_rec = next((r for r in recs if r["type"] == "solar_pv"), None) - to_append = { - "uprn": row["uprn"] - } + to_append = {"uprn": row["uprn"]} if ashp_rec["suitable"]: to_append = { **to_append, @@ -479,44 +586,57 @@ def lewes_outputs(): domna_kwh = 10850 scaling_factor = vital_kwh / domna_kwh - next_gen_dataset = properties_df[[ - "uprn", "address", "postcode", - "property_type", "built_form", "current_energy_demand_heating_hotwater", - "mainfuel", "total_floor_area", "floor_height" - ]].rename( - columns={ - "mainfuel": "primary_fuel_type", - "total_floor_area": "gross_floor_area", - "current_energy_demand_heating_hotwater": "estimated_heating_hotwater_kwh" - } - ).merge( - asset_list[["uprn", "number_of_floors"]], - how="left", - on="uprn" - ).merge( - survey_recs, - how="left", - on="uprn" + next_gen_dataset = ( + properties_df[ + [ + "uprn", + "address", + "postcode", + "property_type", + "built_form", + "current_energy_demand_heating_hotwater", + "mainfuel", + "total_floor_area", + "floor_height", + ] + ] + .rename( + columns={ + "mainfuel": "primary_fuel_type", + "total_floor_area": "gross_floor_area", + "current_energy_demand_heating_hotwater": "estimated_heating_hotwater_kwh", + } + ) + .merge(asset_list[["uprn", "number_of_floors"]], how="left", on="uprn") + .merge(survey_recs, how="left", on="uprn") ) next_gen_dataset["estimated_heating_hotwater_kwh_scaled"] = ( next_gen_dataset["estimated_heating_hotwater_kwh"] * scaling_factor ) next_gen_dataset["ashp_suitable"] = next_gen_dataset["ashp_suitable"].fillna(False) - next_gen_dataset["solar_suitable"] = next_gen_dataset["solar_suitable"].fillna(False) + next_gen_dataset["solar_suitable"] = next_gen_dataset["solar_suitable"].fillna( + False + ) # We prepare the scenario outputs by property type grouped_data = next_gen_dataset.copy() grouped_data["property_sub_type"] = grouped_data["built_form"].copy() # If a property is a flat, re-map sub_type just to flat - grouped_data.loc[grouped_data["property_type"] == "Flat", "property_sub_type"] = "Flat" + grouped_data.loc[grouped_data["property_type"] == "Flat", "property_sub_type"] = ( + "Flat" + ) # Same for maisonettes - grouped_data.loc[grouped_data["property_type"] == "Maisonette", "property_sub_type"] = "Maisonette" + grouped_data.loc[ + grouped_data["property_type"] == "Maisonette", "property_sub_type" + ] = "Maisonette" # We now pull out the recommendations impact by property type and sub type # Exclude sealing open fireplaces - recommendations_df = recommendations_df[recommendations_df["type"] != "sealing_open_fireplace"] + recommendations_df = recommendations_df[ + recommendations_df["type"] != "sealing_open_fireplace" + ] # We update the type column so that if type == heating, and the description contains "air source heat pump", # the type is "air_source_heat_pump", else if the description contains "high heat retention storage heaters", else @@ -532,108 +652,130 @@ def lewes_outputs(): np.where( recommendations_df["description"].str.contains("condensing boiler"), "Boiler Upgrade", - recommendations_df["type"] - ) - ) + recommendations_df["type"], + ), + ), ), - recommendations_df["type"] + recommendations_df["type"], ) recommendation_types = recommendations_df["type"].unique().tolist() rename_dict = { - 'hot_water_tank_insulation': 'Hot Water Tank Insulation', - 'windows_glazing': 'Windows Glazing', - 'secondary_heating': 'Secondary Heating', - 'cavity_wall_insulation': 'Cavity Wall Insulation', - 'flat_roof_insulation': 'Flat Roof Insulation', - 'mechanical_ventilation': 'Mechanical Ventilation', - 'loft_insulation': 'Loft Insulation', - 'cylinder_thermostat': 'Cylinder Thermostat', - 'room_roof_insulation': 'Room Roof Insulation', - 'low_energy_lighting': 'Low Energy Lighting', - 'external_wall_insulation': 'External Wall Insulation', - 'solar_pv': 'Solar PV', - 'heating_control': 'Heating Control', - 'solid_floor_insulation': 'Solid Floor Insulation', - 'suspended_floor_insulation': 'Suspended Floor Insulation', - 'internal_wall_insulation': 'Internal Wall Insulation' + "hot_water_tank_insulation": "Hot Water Tank Insulation", + "windows_glazing": "Windows Glazing", + "secondary_heating": "Secondary Heating", + "cavity_wall_insulation": "Cavity Wall Insulation", + "flat_roof_insulation": "Flat Roof Insulation", + "mechanical_ventilation": "Mechanical Ventilation", + "loft_insulation": "Loft Insulation", + "cylinder_thermostat": "Cylinder Thermostat", + "room_roof_insulation": "Room Roof Insulation", + "low_energy_lighting": "Low Energy Lighting", + "external_wall_insulation": "External Wall Insulation", + "solar_pv": "Solar PV", + "heating_control": "Heating Control", + "solid_floor_insulation": "Solid Floor Insulation", + "suspended_floor_insulation": "Suspended Floor Insulation", + "internal_wall_insulation": "Internal Wall Insulation", } property_scenario_impact = [] for scenario_id in tqdm(scenario_ids): # Get the recommendations for the scenario, default scenario_recommendations = recommendations_df[ - (recommendations_df["Scenario ID"] == scenario_id) & - (recommendations_df["default"] == True) - ].copy() + (recommendations_df["Scenario ID"] == scenario_id) + & (recommendations_df["default"] == True) + ].copy() - scenario_recommendations['Estimated Lighting kWh Savings'] = scenario_recommendations.apply( - lambda x: x['kwh_savings'] if x['type'] == 'low_energy_lighting' else 0, - axis=1) - scenario_recommendations['Estimated Solar kWh Savings'] = scenario_recommendations.apply( - lambda x: x['kwh_savings'] if x['type'] == 'solar_pv' else 0, axis=1) + scenario_recommendations["Estimated Lighting kWh Savings"] = ( + scenario_recommendations.apply( + lambda x: x["kwh_savings"] if x["type"] == "low_energy_lighting" else 0, + axis=1, + ) + ) + scenario_recommendations["Estimated Solar kWh Savings"] = ( + scenario_recommendations.apply( + lambda x: x["kwh_savings"] if x["type"] == "solar_pv" else 0, axis=1 + ) + ) # Set 'Estimated Kwh Savings' to zero where specific kwh columns are used - scenario_recommendations['Estimated Heating Demand kWh Savings'] = scenario_recommendations.apply( - lambda x: 0 if x['type'] in ['low_energy_lighting', 'solar_pv'] else x[ - 'kwh_savings'], axis=1) + scenario_recommendations["Estimated Heating Demand kWh Savings"] = ( + scenario_recommendations.apply( + lambda x: ( + 0 + if x["type"] in ["low_energy_lighting", "solar_pv"] + else x["kwh_savings"] + ), + axis=1, + ) + ) - scenario_grouped_data = scenario_recommendations.groupby(['property_id']).agg({ - 'Estimated Heating Demand kWh Savings': 'sum', - 'Estimated Lighting kWh Savings': 'sum', - 'Estimated Solar kWh Savings': 'sum', - "estimated_cost": "sum" - }).reset_index() + scenario_grouped_data = ( + scenario_recommendations.groupby(["property_id"]) + .agg( + { + "Estimated Heating Demand kWh Savings": "sum", + "Estimated Lighting kWh Savings": "sum", + "Estimated Solar kWh Savings": "sum", + "estimated_cost": "sum", + } + ) + .reset_index() + ) comparison = properties_df.drop_duplicates()[ ["uprn", "property_id", "current_energy_demand_heating_hotwater"] - ].merge( - scenario_grouped_data, on=["property_id"], how="left" - ) - comparison["Estimated Heating Demand kWh Savings"] = ( - comparison["Estimated Heating Demand kWh Savings"].fillna(0) - ) - comparison["Estimated Lighting kWh Savings"] = ( - comparison["Estimated Lighting kWh Savings"].fillna(0) - ) - comparison["Estimated Solar kWh Savings"] = ( - comparison["Estimated Solar kWh Savings"].fillna(0) - ) + ].merge(scenario_grouped_data, on=["property_id"], how="left") + comparison["Estimated Heating Demand kWh Savings"] = comparison[ + "Estimated Heating Demand kWh Savings" + ].fillna(0) + comparison["Estimated Lighting kWh Savings"] = comparison[ + "Estimated Lighting kWh Savings" + ].fillna(0) + comparison["Estimated Solar kWh Savings"] = comparison[ + "Estimated Solar kWh Savings" + ].fillna(0) comparison["estimated_cost"] = comparison["estimated_cost"].fillna(0) comparison["post_scenario_heating_hotwater_kwh"] = ( - comparison["current_energy_demand_heating_hotwater"] - comparison["Estimated Heating Demand kWh Savings"] + comparison["current_energy_demand_heating_hotwater"] + - comparison["Estimated Heating Demand kWh Savings"] ) # For each scenario, we create a measure matrix measure_matrix = scenario_recommendations.pivot_table( - index='property_id', - columns='type', - values='id', # Using 'id' just as a placeholder for the pivot + index="property_id", + columns="type", + values="id", # Using 'id' just as a placeholder for the pivot aggfunc=lambda x: True, # If an ID exists for a given type, mark as True - fill_value=False # Fill other entries as False + fill_value=False, # Fill other entries as False ).reset_index() non_zero_heat_demand_impact = comparison[ - (comparison["Estimated Heating Demand kWh Savings"] > 0) | - (comparison["Estimated Lighting kWh Savings"] > 0) | - (comparison["Estimated Solar kWh Savings"] > 0) - ] + (comparison["Estimated Heating Demand kWh Savings"] > 0) + | (comparison["Estimated Lighting kWh Savings"] > 0) + | (comparison["Estimated Solar kWh Savings"] > 0) + ] measure_matrix = measure_matrix[ - measure_matrix["property_id"].isin(non_zero_heat_demand_impact["property_id"].values) + measure_matrix["property_id"].isin( + non_zero_heat_demand_impact["property_id"].values + ) ] measure_matrix = measure_matrix.rename(columns=rename_dict) - comparison = comparison.merge( - measure_matrix, on="property_id", how="left" - ) + comparison = comparison.merge(measure_matrix, on="property_id", how="left") comparison["scenario_id"] = scenario_id property_scenario_impact.append(comparison) property_scenario_impact = pd.concat(property_scenario_impact) # property_scenario_impact = property_scenario_impact.drop(columns=["property_id", "Estimated Kwh Savings"]) - for v in list(rename_dict.values()) + ["Air Source Heat Pump", "High Heat Retention Storage", "Boiler Upgrade"]: + for v in list(rename_dict.values()) + [ + "Air Source Heat Pump", + "High Heat Retention Storage", + "Boiler Upgrade", + ]: # Fill NaNs with False property_scenario_impact[v] = property_scenario_impact[v].fillna(False) @@ -642,18 +784,22 @@ def lewes_outputs(): property_scenario_impact["post_scenario_heating_hotwater_kwh"] * scaling_factor ) - grouped_data = grouped_data.merge( - property_scenario_impact, how="left", on="uprn" - ) + grouped_data = grouped_data.merge(property_scenario_impact, how="left", on="uprn") # Agg the data - grouped_data = grouped_data.groupby(["property_type", "property_sub_type", "scenario_id"]).agg({ - "estimated_heating_hotwater_kwh": "mean", - "estimated_heating_hotwater_kwh_scaled": "mean", - "estimated_cost": "mean", - "post_scenario_heating_hotwater_kwh": "mean", - "post_scenario_heating_hotwater_kwh_scaled": "mean" - }).reset_index() + grouped_data = ( + grouped_data.groupby(["property_type", "property_sub_type", "scenario_id"]) + .agg( + { + "estimated_heating_hotwater_kwh": "mean", + "estimated_heating_hotwater_kwh_scaled": "mean", + "estimated_cost": "mean", + "post_scenario_heating_hotwater_kwh": "mean", + "post_scenario_heating_hotwater_kwh_scaled": "mean", + } + ) + .reset_index() + ) scenario_names = pd.DataFrame( [ @@ -665,45 +811,40 @@ def lewes_outputs(): "scenario_id": 48, "scenario": "Demand reduction – no solid wall, floors or heating/renewables", }, - { - "scenario_id": 49, - "scenario": "Demand reduction – no decant" - }, + {"scenario_id": 49, "scenario": "Demand reduction – no decant"}, { "scenario_id": 50, "scenario": "Demand reduction – no decant + heating & solar", }, - { - "scenario_id": 51, - "scenario": "Whole house retrofit" - } + {"scenario_id": 51, "scenario": "Whole house retrofit"}, ] - ) - grouped_data = grouped_data.merge( - scenario_names, how="left", on="scenario_id" - ) + grouped_data = grouped_data.merge(scenario_names, how="left", on="scenario_id") if not grouped_data[ - grouped_data["estimated_heating_hotwater_kwh"] < grouped_data["post_scenario_heating_hotwater_kwh"]].empty: + grouped_data["estimated_heating_hotwater_kwh"] + < grouped_data["post_scenario_heating_hotwater_kwh"] + ].empty: raise Exception("someting went wrong") - if not grouped_data[grouped_data["estimated_heating_hotwater_kwh_scaled"] < grouped_data[ - "post_scenario_heating_hotwater_kwh_scaled"]].empty: + if not grouped_data[ + grouped_data["estimated_heating_hotwater_kwh_scaled"] + < grouped_data["post_scenario_heating_hotwater_kwh_scaled"] + ].empty: raise Exception("someting went wrong") # Reorder the columns grouped_data = grouped_data[ [ - 'property_type', - 'property_sub_type', - 'scenario', - 'estimated_heating_hotwater_kwh', - 'post_scenario_heating_hotwater_kwh', - 'estimated_heating_hotwater_kwh_scaled', - 'post_scenario_heating_hotwater_kwh_scaled', - 'estimated_cost', + "property_type", + "property_sub_type", + "scenario", + "estimated_heating_hotwater_kwh", + "post_scenario_heating_hotwater_kwh", + "estimated_heating_hotwater_kwh_scaled", + "post_scenario_heating_hotwater_kwh_scaled", + "estimated_cost", ] ] @@ -730,9 +871,7 @@ def lewes_outputs(): scenario_names, how="left", on="scenario_id" ) - lewes_data = next_gen_dataset.merge( - property_scenario_impact, how="left", on="uprn" - ) + lewes_data = next_gen_dataset.merge(property_scenario_impact, how="left", on="uprn") lewes_data = lewes_data.sort_values( ["postcode", "uprn", "scenario_id"], ascending=True @@ -742,31 +881,52 @@ def lewes_outputs(): # TODO - remap the heating type lewes_data = lewes_data[ [ - 'uprn', 'address', 'postcode', 'property_type', 'built_form', + "uprn", + "address", + "postcode", + "property_type", + "built_form", # 'estimated_heating_hotwater_kwh', - 'primary_fuel_type', 'gross_floor_area', 'floor_height', 'number_of_floors', 'ashp_suitable', - 'ashp_size_kw', - 'ashp_cost', 'solar_suitable', 'solar_size_kwp', 'solar_cost', - 'scenario', - 'estimated_heating_hotwater_kwh_scaled', - 'post_scenario_heating_hotwater_kwh_scaled', + "primary_fuel_type", + "gross_floor_area", + "floor_height", + "number_of_floors", + "ashp_suitable", + "ashp_size_kw", + "ashp_cost", + "solar_suitable", + "solar_size_kwp", + "solar_cost", + "scenario", + "estimated_heating_hotwater_kwh_scaled", + "post_scenario_heating_hotwater_kwh_scaled", # 'property_id', - dropped # 'current_energy_demand_heating_hotwater', - 'Estimated Heating Demand kWh Savings', - 'Estimated Lighting kWh Savings', - 'Estimated Solar kWh Savings', - 'estimated_cost', - 'post_scenario_heating_hotwater_kwh', 'Cavity Wall Insulation', 'Cylinder Thermostat', - 'Flat Roof Insulation', - 'Hot Water Tank Insulation', 'Loft Insulation', 'Mechanical Ventilation', 'Room Roof Insulation', + "Estimated Heating Demand kWh Savings", + "Estimated Lighting kWh Savings", + "Estimated Solar kWh Savings", + "estimated_cost", + "post_scenario_heating_hotwater_kwh", + "Cavity Wall Insulation", + "Cylinder Thermostat", + "Flat Roof Insulation", + "Hot Water Tank Insulation", + "Loft Insulation", + "Mechanical Ventilation", + "Room Roof Insulation", # 'scenario_id', - dropped - 'Low Energy Lighting', 'Secondary Heating', 'Windows Glazing', 'External Wall Insulation', - 'Heating Control', - 'Solar PV', - 'Air Source Heat Pump', 'Boiler Upgrade', 'High Heat Retention Storage', - 'Internal Wall Insulation', - 'Solid Floor Insulation', - 'Suspended Floor Insulation', + "Low Energy Lighting", + "Secondary Heating", + "Windows Glazing", + "External Wall Insulation", + "Heating Control", + "Solar PV", + "Air Source Heat Pump", + "Boiler Upgrade", + "High Heat Retention Storage", + "Internal Wall Insulation", + "Solid Floor Insulation", + "Suspended Floor Insulation", ] ].rename( columns={ @@ -783,29 +943,34 @@ def lewes_outputs(): # "estimated_heating_hotwater_kwh": "Estimated Heating & Hot Water kwh", "estimated_heating_hotwater_kwh_scaled": "Estimated Heating & Hot Water kwh", "post_scenario_heating_hotwater_kwh_scaled": "Post Scenario Heating & Hot Water kwh", - "estimated_cost": "Estimated Cost of Scenario" + "estimated_cost": "Estimated Cost of Scenario", } ) # We save this dataset, which will be shared with Lewes Council lewes_data.to_csv( - "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Newhaven/outputs/Lewes property data.csv", index=False + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Newhaven/outputs/Lewes property data.csv", + index=False, ) - df_pivot = property_scenario_impact.pivot_table(index='uprn', columns='scenario', - values=['post_scenario_heating_hotwater_kwh', - 'post_scenario_heating_hotwater_kwh_scaled']) + df_pivot = property_scenario_impact.pivot_table( + index="uprn", + columns="scenario", + values=[ + "post_scenario_heating_hotwater_kwh", + "post_scenario_heating_hotwater_kwh_scaled", + ], + ) # Flattening multi-index columns - df_pivot.columns = [f'{col[0]}_{col[1]}' for col in df_pivot.columns] + df_pivot.columns = [f"{col[0]}_{col[1]}" for col in df_pivot.columns] # Reset the index to have a clean dataframe df_pivot.reset_index(inplace=True) - next_gen_dataset = next_gen_dataset.merge( - df_pivot, how="left", on="uprn" - ) + next_gen_dataset = next_gen_dataset.merge(df_pivot, how="left", on="uprn") next_gen_dataset.to_csv( - "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Newhaven/outputs/next_gen_dataset.csv", index=False + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Newhaven/outputs/next_gen_dataset.csv", + index=False, ) diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/d_restart_failed_subtasks.py b/etl/customers/peabody/Nov 2025 Consulting Project/d_restart_failed_subtasks.py index 68978b08..d86be050 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/d_restart_failed_subtasks.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/d_restart_failed_subtasks.py @@ -10,6 +10,7 @@ Additionally, we wil find the problematic records and remove them Given we ran an EPC C scenario, we should check how many properties, below EPC C we have, that have no plan or recommendations in case something went wrong """ + import pandas as pd from sqlalchemy.orm import Session from backend.app.db.models.portfolio import PropertyModel @@ -19,8 +20,7 @@ from backend.app.db.connection import db_session def get_uprns_for_portfolio(session: Session, portfolio_id: int) -> list[int]: return [ uprn - for (uprn,) in - session.query(PropertyModel.uprn) + for (uprn,) in session.query(PropertyModel.uprn) .filter(PropertyModel.portfolio_id == portfolio_id) .all() if uprn is not None @@ -34,7 +34,7 @@ with db_session() as session: sal = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20251213 Model " "data.xlsx", - sheet_name="Standardised Asset List" + sheet_name="Standardised Asset List", ) missed_properties = sal[~sal["epc_os_uprn"].isin(completed_uprns)] @@ -44,7 +44,7 @@ missed_properties.to_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/" "d_failed_properties_to_restart_20260102.xlsx", sheet_name="Standardised Asset List", - index=False + index=False, ) # Fixing an error - triggered jobs without removing EWI/IWI so need to delete all plans associated to these scenarios: @@ -52,14 +52,14 @@ scenario_id = None from sqlalchemy import select, func from sqlalchemy.orm import Session -from backend.app.db.models.recommendations import Plan +from backend.app.db.models.recommendations import PlanModel def count_plans_for_scenario(session: Session, scenario_id: int) -> int: return session.execute( select(func.count()) - .select_from(Plan) - .where(Plan.scenario_id == scenario_id) + .select_from(PlanModel) + .where(PlanModel.scenario_id == scenario_id) ).scalar_one() @@ -69,8 +69,7 @@ with db_session() as session: def get_plan_ids_for_scenario(session: Session, scenario_id: int) -> list[int]: result = session.execute( - select(Plan.id) - .where(Plan.scenario_id == scenario_id) + select(PlanModel.id).where(PlanModel.scenario_id == scenario_id) ) return [row.id for row in result] @@ -84,7 +83,7 @@ from sqlalchemy.orm import Session def chunked(iterable, size): for i in range(0, len(iterable), size): - yield iterable[i:i + size] + yield iterable[i : i + size] from sqlalchemy import text @@ -103,12 +102,14 @@ def delete_plan_batch(session: Session, plan_ids: list[int]): # recommendation_materials # ---------------------------- session.execute( - text(""" + text( + """ DELETE FROM recommendation_materials rm USING plan_recommendations pr WHERE rm.recommendation_id = pr.recommendation_id AND pr.plan_id = ANY(:plan_ids) - """), + """ + ), params, ) @@ -116,10 +117,12 @@ def delete_plan_batch(session: Session, plan_ids: list[int]): # plan_recommendations # ---------------------------- session.execute( - text(""" + text( + """ DELETE FROM plan_recommendations WHERE plan_id = ANY(:plan_ids) - """), + """ + ), params, ) @@ -127,14 +130,16 @@ def delete_plan_batch(session: Session, plan_ids: list[int]): # recommendations (only those used by these plans) # ---------------------------- session.execute( - text(""" + text( + """ DELETE FROM recommendation r WHERE r.id IN ( SELECT DISTINCT recommendation_id FROM plan_recommendations WHERE plan_id = ANY(:plan_ids) ) - """), + """ + ), params, ) @@ -142,10 +147,12 @@ def delete_plan_batch(session: Session, plan_ids: list[int]): # plans LAST # ---------------------------- session.execute( - text(""" + text( + """ DELETE FROM plan WHERE id = ANY(:plan_ids) - """), + """ + ), params, ) diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/f_diagnostics.py b/etl/customers/peabody/Nov 2025 Consulting Project/f_diagnostics.py index 4b946c60..509c8179 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/f_diagnostics.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/f_diagnostics.py @@ -5,6 +5,7 @@ This includes: # EPC C, there should be a plan 2) If the plan is fabric first, make sure they are actually fabric first """ + import pandas as pd scenario_names = { @@ -33,7 +34,9 @@ for scenario_id, scenario_name in scenario_names.items(): ) # find properties that are below the scenario sap target, but have no recommended measures - df["below_scenario_target"] = df["current_sap_points"] < scenario_sap_targets[scenario_id] + df["below_scenario_target"] = ( + df["current_sap_points"] < scenario_sap_targets[scenario_id] + ) df["no_recommended_measures"] = df["sap_points"] == 0 df["zero_cost"] = df["total_retrofit_cost"] == 0 df["sap_points_above_zero"] = df["sap_points"] > 0 @@ -45,7 +48,9 @@ for scenario_id, scenario_name in scenario_names.items(): ].copy() if scenario_sap_targets[scenario_id] == 81: - problematic_properties = problematic_properties[problematic_properties["property_type"] != "Flat"] + problematic_properties = problematic_properties[ + problematic_properties["property_type"] != "Flat" + ] zero_cost_above_zero_sap = df[ (df["sap_points_above_zero"] & df["zero_cost"]) @@ -61,8 +66,12 @@ for scenario_id, scenario_name in scenario_names.items(): # pd.set_option('display.width', 1000) # problematic_properties.head(len(problematic_properties)) - print(f"We have {len(problematic_properties)} problematic properties for scenario {scenario_name} ({scenario_id})") - print(f"We have {len(zero_cost_above_zero_sap)} zero cost properties for scenario {scenario_name} ({scenario_id})") + print( + f"We have {len(problematic_properties)} problematic properties for scenario {scenario_name} ({scenario_id})" + ) + print( + f"We have {len(zero_cost_above_zero_sap)} zero cost properties for scenario {scenario_name} ({scenario_id})" + ) problems.append(problematic_properties) problems.append(zero_cost_above_zero_sap) @@ -97,12 +106,12 @@ all_problems = all_problems.drop_duplicates(subset=["uprn"]) sal = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20251213 Model " "data.xlsx", - sheet_name="Standardised Asset List" + sheet_name="Standardised Asset List", ) sal2 = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20260105 - additional " "UPRNS.xlsx", - sheet_name="Standardised Asset List" + sheet_name="Standardised Asset List", ) sal = pd.concat([sal, sal2]) @@ -114,7 +123,7 @@ retry.to_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/" "d_problematic_properties_to_review_20260106.xlsx", sheet_name="Standardised Asset List", - index=False + index=False, ) # Delete associated plans @@ -126,19 +135,20 @@ uprns = retry["epc_os_uprn"].tolist() from sqlalchemy.orm import Session from backend.app.db.models.portfolio import PropertyModel from backend.app.db.connection import db_session -from backend.app.db.models.recommendations import Plan +from backend.app.db.models.recommendations import PlanModel from sqlalchemy import select, delete from sqlalchemy.exc import NoResultFound from sqlalchemy.orm import sessionmaker -def get_property_ids_for_uprns(session: Session, portfolio_id: int, uprns: list[int]) -> list[int]: +def get_property_ids_for_uprns( + session: Session, portfolio_id: int, uprns: list[int] +) -> list[int]: return [ property.id for property in session.query(PropertyModel) .filter( - PropertyModel.portfolio_id == portfolio_id, - PropertyModel.uprn.in_(uprns) + PropertyModel.portfolio_id == portfolio_id, PropertyModel.uprn.in_(uprns) ) .all() ] @@ -149,15 +159,21 @@ with db_session() as session: # Get all and delete plans for these property IDs -def get_all_plans_for_property_ids(session: Session, property_ids: list[int]) -> list[Plan]: - return session.query(Plan).filter(Plan.property_id.in_(property_ids)).all() +def get_all_plans_for_property_ids( + session: Session, property_ids: list[int] +) -> list[PlanModel]: + return ( + session.query(PlanModel).filter(PlanModel.property_id.in_(property_ids)).all() + ) -def get_ids_of_plans_for_deletion(session: Session, property_ids: list[int]) -> list[int]: +def get_ids_of_plans_for_deletion( + session: Session, property_ids: list[int] +) -> list[int]: return [ plan.id - for plan in session.query(Plan) - .filter(Plan.property_id.in_(property_ids)) + for plan in session.query(PlanModel) + .filter(PlanModel.property_id.in_(property_ids)) .all() ] @@ -168,7 +184,7 @@ with db_session() as session: def chunked(iterable, size): for i in range(0, len(iterable), size): - yield iterable[i:i + size] + yield iterable[i : i + size] from sqlalchemy import text @@ -187,12 +203,14 @@ def delete_plan_batch(session: Session, plan_ids: list[int]): # recommendation_materials # ---------------------------- session.execute( - text(""" + text( + """ DELETE FROM recommendation_materials rm USING plan_recommendations pr WHERE rm.recommendation_id = pr.recommendation_id AND pr.plan_id = ANY(:plan_ids) - """), + """ + ), params, ) @@ -200,10 +218,12 @@ def delete_plan_batch(session: Session, plan_ids: list[int]): # plan_recommendations # ---------------------------- session.execute( - text(""" + text( + """ DELETE FROM plan_recommendations WHERE plan_id = ANY(:plan_ids) - """), + """ + ), params, ) @@ -211,14 +231,16 @@ def delete_plan_batch(session: Session, plan_ids: list[int]): # recommendations (only those used by these plans) # ---------------------------- session.execute( - text(""" + text( + """ DELETE FROM recommendation r WHERE r.id IN ( SELECT DISTINCT recommendation_id FROM plan_recommendations WHERE plan_id = ANY(:plan_ids) ) - """), + """ + ), params, ) @@ -226,10 +248,12 @@ def delete_plan_batch(session: Session, plan_ids: list[int]): # plans LAST # ---------------------------- session.execute( - text(""" + text( + """ DELETE FROM plan WHERE id = ANY(:plan_ids) - """), + """ + ), params, ) diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/g_rebaselining_installed_measrues.py b/etl/customers/peabody/Nov 2025 Consulting Project/g_rebaselining_installed_measrues.py index 4405d113..c451938d 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/g_rebaselining_installed_measrues.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/g_rebaselining_installed_measrues.py @@ -2,17 +2,22 @@ import pandas as pd from tqdm import tqdm from sqlalchemy.orm import sessionmaker from backend.app.db.connection import db_engine, db_read_session, db_session -from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations, RecommendationMaterials, \ - InstalledMeasure +from backend.app.db.models.recommendations import ( + Recommendation, + PlanModel, + PlanRecommendations, + RecommendationMaterials, + InstalledMeasure, +) from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel from backend.app.utils import sap_to_epc from typing import Dict, List, Set from recommendations.Costs import Costs from backend.app.db.models.portfolio import Epc -pd.set_option('display.max_rows', 500) -pd.set_option('display.max_columns', 500) -pd.set_option('display.width', 1000) +pd.set_option("display.max_rows", 500) +pd.set_option("display.max_columns", 500) +pd.set_option("display.width", 1000) def get_all_data(portfolio_id, scenario_ids): @@ -22,22 +27,26 @@ def get_all_data(portfolio_id, scenario_ids): # -------------------- # Properties # -------------------- - properties_query = session.query( - PropertyModel, - PropertyDetailsEpcModel - ).join( - PropertyDetailsEpcModel, - PropertyModel.id == PropertyDetailsEpcModel.property_id - ).filter( - PropertyModel.portfolio_id == portfolio_id - ).all() + properties_query = ( + session.query(PropertyModel, PropertyDetailsEpcModel) + .join( + PropertyDetailsEpcModel, + PropertyModel.id == PropertyDetailsEpcModel.property_id, + ) + .filter(PropertyModel.portfolio_id == portfolio_id) + .all() + ) properties_data = [ { - **{col.name: getattr(p.PropertyModel, col.name) - for col in PropertyModel.__table__.columns}, - **{col.name: getattr(p.PropertyDetailsEpcModel, col.name) - for col in PropertyDetailsEpcModel.__table__.columns}, + **{ + col.name: getattr(p.PropertyModel, col.name) + for col in PropertyModel.__table__.columns + }, + **{ + col.name: getattr(p.PropertyDetailsEpcModel, col.name) + for col in PropertyDetailsEpcModel.__table__.columns + }, } for p in properties_query ] @@ -45,12 +54,12 @@ def get_all_data(portfolio_id, scenario_ids): # -------------------- # Plans # -------------------- - plans_query = session.query(Plan).filter( - Plan.scenario_id.in_(scenario_ids) - ).all() + plans_query = ( + session.query(PlanModel).filter(PlanModel.scenario_id.in_(scenario_ids)).all() + ) plans_data = [ - {col.name: getattr(plan, col.name) for col in Plan.__table__.columns} + {col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns} for plan in plans_query ] @@ -59,25 +68,27 @@ def get_all_data(portfolio_id, scenario_ids): # -------------------- # Recommendations (NO materials yet) # -------------------- - recommendations_query = session.query( - Recommendation, - Plan.scenario_id - ).join( - PlanRecommendations, - Recommendation.id == PlanRecommendations.recommendation_id - ).join( - Plan, - Plan.id == PlanRecommendations.plan_id - ).filter( - PlanRecommendations.plan_id.in_(plan_ids), - ).all() + recommendations_query = ( + session.query(Recommendation, PlanModel.scenario_id) + .join( + PlanRecommendations, + Recommendation.id == PlanRecommendations.recommendation_id, + ) + .join(PlanModel, PlanModel.id == PlanRecommendations.plan_id) + .filter( + PlanRecommendations.plan_id.in_(plan_ids), + ) + .all() + ) recommendations_data = [ { - **{col.name: getattr(r.Recommendation, col.name) - for col in Recommendation.__table__.columns}, + **{ + col.name: getattr(r.Recommendation, col.name) + for col in Recommendation.__table__.columns + }, "scenario_id": r.scenario_id, - "materials": [] # placeholder + "materials": [], # placeholder } for r in recommendations_query ] @@ -131,7 +142,7 @@ recommendations_df = pd.read_csv( sustainability_data = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody " "- Data Extracts for Domna.xlsx", - sheet_name="Sustainability" + sheet_name="Sustainability", ) sustainability_data_with_sap = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/Parity Data " @@ -140,10 +151,16 @@ sustainability_data_with_sap = pd.read_excel( properties_df["uprn"] = properties_df["uprn"].astype(str) property_data_comparison = properties_df.merge( - sustainability_data, how="inner", left_on="uprn", right_on="UPRN", suffixes=("_prop", "_sust") + sustainability_data, + how="inner", + left_on="uprn", + right_on="UPRN", + suffixes=("_prop", "_sust"), ) -property_data_comparison["wall_type"] = property_data_comparison["walls"].str.split(",").str[0].str.strip() +property_data_comparison["wall_type"] = ( + property_data_comparison["walls"].str.split(",").str[0].str.strip() +) column_pairs = { "built_form": "Attachment", @@ -154,25 +171,28 @@ column_pairs = { combination_tables = {} for v1, v2 in column_pairs.items(): - df = property_data_comparison.groupby([v1, v2]).size().reset_index(name='count') + df = property_data_comparison.groupby([v1, v2]).size().reset_index(name="count") combination_tables[v1] = df # We just need all of the measure types, per property recommendation_measure_types = recommendations_df[ - ["property_id", "measure_type" - , "sap_points", "heat_demand", "kwh_savings", "co2_equivalent_savings", - "energy_cost_savings" - ] + [ + "property_id", + "measure_type", + "sap_points", + "heat_demand", + "kwh_savings", + "co2_equivalent_savings", + "energy_cost_savings", + ] ].drop_duplicates() recommendation_measure_types["flag"] = True # We pivot -recommendations_measures_pivot = recommendation_measure_types[ - ["property_id", "measure_type", "flag"] -].drop_duplicates().pivot( - index='property_id', - columns='measure_type', - values='flag' +recommendations_measures_pivot = ( + recommendation_measure_types[["property_id", "measure_type", "flag"]] + .drop_duplicates() + .pivot(index="property_id", columns="measure_type", values="flag") ) recommendations_measures_pivot = recommendations_measures_pivot.reset_index() @@ -180,137 +200,157 @@ properties_to_recs = properties_df.rename(columns={"solar_pv": "solar_data"}).me recommendations_measures_pivot, how="left", on="property_id" ) -sustainability_data["cavity_wall_insulation"] = sustainability_data["Wall Insulation"].isin( - ["FilledCavity", "FilledCavityPlusInternal", "FilledCavityPlusExternal"] -) -sustainability_data["internal_wall_insulation"] = sustainability_data["Wall Insulation"].isin( - ["Internal", "FilledCavityPlusInternal"] -) -sustainability_data["external_wall_insulation"] = sustainability_data["Wall Insulation"].isin( - ["External", "FilledCavityPlusExternal"] -) +sustainability_data["cavity_wall_insulation"] = sustainability_data[ + "Wall Insulation" +].isin(["FilledCavity", "FilledCavityPlusInternal", "FilledCavityPlusExternal"]) +sustainability_data["internal_wall_insulation"] = sustainability_data[ + "Wall Insulation" +].isin(["Internal", "FilledCavityPlusInternal"]) +sustainability_data["external_wall_insulation"] = sustainability_data[ + "Wall Insulation" +].isin(["External", "FilledCavityPlusExternal"]) sustainability_data["loft_insulation"] = sustainability_data["Roof Insulation"].isin( ["mm300", "mm250", "mm350", "mm400", "mm270"] ) sustainability_data["double_glazing"] = sustainability_data["Glazing"].isin( - ["Double 2002 or later", "Double but age unknown", "Triple", "DoubleKnownData", "Secondary", "TripleKnownData"] + [ + "Double 2002 or later", + "Double but age unknown", + "Triple", + "DoubleKnownData", + "Secondary", + "TripleKnownData", + ] ) sustainability_data["secondary_glazing"] = sustainability_data["Glazing"].isin( ["Secondary"] ) -sustainability_data["suspended_floor_insulation"] = sustainability_data["Floor Insulation"].isin( - ["RetroFitted"] +sustainability_data["suspended_floor_insulation"] = sustainability_data[ + "Floor Insulation" +].isin(["RetroFitted"]) + +sustainability_data["boiler_upgrade"] = sustainability_data["Heating"].isin( + ["Boilers"] +) & sustainability_data["Boiler Efficiency"].isin(["A"]) +sustainability_data["air_source_heat_pump"] = sustainability_data["Heating"].isin( + ["Heat pumps (wet)"] ) -sustainability_data["boiler_upgrade"] = ( - sustainability_data["Heating"].isin(["Boilers"]) & sustainability_data["Boiler Efficiency"].isin(["A"]) -) -sustainability_data["air_source_heat_pump"] = (sustainability_data["Heating"].isin(["Heat pumps (wet)"])) +sustainability_data["time_temperature_zone_control"] = sustainability_data[ + "Controls Adequacy" +].isin(["Top Spec"]) -sustainability_data["time_temperature_zone_control"] = ( - sustainability_data["Controls Adequacy"].isin(["Top Spec"]) -) - -sustainability_data["roomstat_programmer_trvs"] = ( - sustainability_data["Controls Adequacy"].isin(["Optimal"]) -) +sustainability_data["roomstat_programmer_trvs"] = sustainability_data[ + "Controls Adequacy" +].isin(["Optimal"]) sustainability_data["flat_roof_insulation"] = ( - (sustainability_data["Roof Construction"] == "Flat") & - (sustainability_data["Roof Insulation"].isin(["mm50", "mm150", "mm100"])) -) + sustainability_data["Roof Construction"] == "Flat" +) & (sustainability_data["Roof Insulation"].isin(["mm50", "mm150", "mm100"])) properties_to_recs["uprn"] = properties_to_recs["uprn"].astype(str) comparison = sustainability_data.merge( properties_to_recs[ - ["uprn", "cavity_wall_insulation", "external_wall_insulation", "internal_wall_insulation", "loft_insulation", - "double_glazing", "secondary_glazing", "suspended_floor_insulation", "boiler_upgrade", "air_source_heat_pump", - "time_temperature_zone_control", "roomstat_programmer_trvs", "flat_roof_insulation", "room_roof_insulation" - ] + [ + "uprn", + "cavity_wall_insulation", + "external_wall_insulation", + "internal_wall_insulation", + "loft_insulation", + "double_glazing", + "secondary_glazing", + "suspended_floor_insulation", + "boiler_upgrade", + "air_source_heat_pump", + "time_temperature_zone_control", + "roomstat_programmer_trvs", + "flat_roof_insulation", + "room_roof_insulation", + ] ], left_on="UPRN", right_on="uprn", how="left", - suffixes=("", "_from_recs") + suffixes=("", "_from_recs"), ) # Flag entries where we've been told that walls are already insulated, but we have recommendations for wall insulation # ------------ Walls ------------ cwi_conflicting = comparison[ - (comparison["cavity_wall_insulation"]) & - (pd.isnull(comparison["cavity_wall_insulation_from_recs"]) == False) - ].copy() + (comparison["cavity_wall_insulation"]) + & (pd.isnull(comparison["cavity_wall_insulation_from_recs"]) == False) +].copy() cwi_conflicting["conflict_cavity_wall_insulation"] = True iwi_conflicting = comparison[ - (comparison["internal_wall_insulation"]) & - (pd.isnull(comparison["internal_wall_insulation_from_recs"]) == False) - ].copy() + (comparison["internal_wall_insulation"]) + & (pd.isnull(comparison["internal_wall_insulation_from_recs"]) == False) +].copy() iwi_conflicting["conflict_iwi_wall_insulation"] = True ewi_conflicting = comparison[ - (comparison["external_wall_insulation"]) & - (pd.isnull(comparison["external_wall_insulation_from_recs"]) == False) - ].copy() + (comparison["external_wall_insulation"]) + & (pd.isnull(comparison["external_wall_insulation_from_recs"]) == False) +].copy() ewi_conflicting["conflict_ewi_wall_insulation"] = True # ------------ Roof ------------ loft_conflicting = comparison[ - (comparison["loft_insulation"]) & - (pd.isnull(comparison["loft_insulation_from_recs"]) == False) - ].copy() + (comparison["loft_insulation"]) + & (pd.isnull(comparison["loft_insulation_from_recs"]) == False) +].copy() loft_conflicting["conflict_loft_insulation"] = True # ------------ Windows ------------ double_glazing_conflicting = comparison[ - (comparison["double_glazing"] | comparison["secondary_glazing"]) & - (pd.isnull(comparison["double_glazing_from_recs"]) == False) & - (pd.isnull(comparison["secondary_glazing_from_recs"]) == True) - ].copy() + (comparison["double_glazing"] | comparison["secondary_glazing"]) + & (pd.isnull(comparison["double_glazing_from_recs"]) == False) + & (pd.isnull(comparison["secondary_glazing_from_recs"]) == True) +].copy() double_glazing_conflicting["conflict_double_glazing"] = True secondary_glazing_conflicting = comparison[ - (comparison["secondary_glazing"]) & - (pd.isnull(comparison["secondary_glazing_from_recs"]) == False) - ].copy() + (comparison["secondary_glazing"]) + & (pd.isnull(comparison["secondary_glazing_from_recs"]) == False) +].copy() secondary_glazing_conflicting["conflict_secondary_glazing"] = True # ------------ Floors ------------ floors_conflicting = comparison[ - (comparison["suspended_floor_insulation"]) & - (pd.isnull(comparison["suspended_floor_insulation_from_recs"]) == False) - ].copy() + (comparison["suspended_floor_insulation"]) + & (pd.isnull(comparison["suspended_floor_insulation_from_recs"]) == False) +].copy() floors_conflicting["conflict_suspended_floor_insulation"] = True # ------------ Boiler Upgrade ------------ boiler_conflicting = comparison[ - (comparison["boiler_upgrade"]) & - (pd.isnull(comparison["boiler_upgrade_from_recs"]) == False) - ].copy() + (comparison["boiler_upgrade"]) + & (pd.isnull(comparison["boiler_upgrade_from_recs"]) == False) +].copy() boiler_conflicting["conflict_boiler_upgrade"] = True # ------------ ASHP ------------ ashp_conflicting = comparison[ - (comparison["air_source_heat_pump"]) & - (pd.isnull(comparison["air_source_heat_pump_from_recs"]) == False) - ].copy() + (comparison["air_source_heat_pump"]) + & (pd.isnull(comparison["air_source_heat_pump_from_recs"]) == False) +].copy() ashp_conflicting["conflict_air_source_heat_pump"] = True # ------------ heat controls ------------ ttzc_conflicting = comparison[ - (comparison["time_temperature_zone_control"]) & - (pd.isnull(comparison["time_temperature_zone_control_from_recs"]) == False) - ].copy() + (comparison["time_temperature_zone_control"]) + & (pd.isnull(comparison["time_temperature_zone_control_from_recs"]) == False) +].copy() ttzc_conflicting["conflict_time_temperature_zone_control"] = True rst_conflicting = comparison[ - (comparison["roomstat_programmer_trvs"]) & - (pd.isnull(comparison["roomstat_programmer_trvs_from_recs"]) == False) - ].copy() + (comparison["roomstat_programmer_trvs"]) + & (pd.isnull(comparison["roomstat_programmer_trvs_from_recs"]) == False) +].copy() rst_conflicting["conflict_roomstat_programmer_trvs"] = True # ------------ Flat Roof Insulation ----------- flat_roof_conflicting = comparison[ - (comparison["flat_roof_insulation"]) & - (pd.isnull(comparison["flat_roof_insulation_from_recs"]) == False) - ].copy() + (comparison["flat_roof_insulation"]) + & (pd.isnull(comparison["flat_roof_insulation_from_recs"]) == False) +].copy() flat_roof_conflicting["conflict_flat_roof_insulation"] = True # All properties with conflicts @@ -327,22 +367,26 @@ all_conflicts = pd.concat( ashp_conflicting, ttzc_conflicting, rst_conflicting, - flat_roof_conflicting + flat_roof_conflicting, ] ) all_conflicts = all_conflicts[ [ "uprn", - 'conflict_cavity_wall_insulation', - 'conflict_iwi_wall_insulation', - 'conflict_ewi_wall_insulation', - 'conflict_loft_insulation', - 'conflict_double_glazing', - 'conflict_secondary_glazing', - 'conflict_suspended_floor_insulation', 'conflict_boiler_upgrade', - 'conflict_air_source_heat_pump', - 'conflict_time_temperature_zone_control', 'conflict_roomstat_programmer_trvs', 'conflict_flat_roof_insulation'] + "conflict_cavity_wall_insulation", + "conflict_iwi_wall_insulation", + "conflict_ewi_wall_insulation", + "conflict_loft_insulation", + "conflict_double_glazing", + "conflict_secondary_glazing", + "conflict_suspended_floor_insulation", + "conflict_boiler_upgrade", + "conflict_air_source_heat_pump", + "conflict_time_temperature_zone_control", + "conflict_roomstat_programmer_trvs", + "conflict_flat_roof_insulation", + ] ] all_conflicts = all_conflicts.rename( @@ -358,31 +402,29 @@ all_conflicts = all_conflicts.rename( "conflict_air_source_heat_pump": "air_source_heat_pump", "conflict_time_temperature_zone_control": "time_temperature_zone_control", "conflict_roomstat_programmer_trvs": "roomstat_programmer_trvs", - "conflict_flat_roof_insulation": "flat_roof_insulation" - + "conflict_flat_roof_insulation": "flat_roof_insulation", } ) # Reshape by UPRN by melting all_conflicts = all_conflicts.melt( - id_vars=["uprn"], - var_name="measure_type", - value_name="already_installed" + id_vars=["uprn"], var_name="measure_type", value_name="already_installed" ) -recommendations_df["property_id"] = recommendations_df["property_id"].astype(int).astype(str) +recommendations_df["property_id"] = ( + recommendations_df["property_id"].astype(int).astype(str) +) properties_df["property_id"] = properties_df["property_id"].astype(int).astype(str) recs_with_uprn = recommendations_df.merge( properties_df[["property_id", "uprn"]], on="property_id", how="left", - suffixes=("", "_prop") + suffixes=("", "_prop"), ) recs_with_uprn = ( - recs_with_uprn - .sort_values("sap_points", ascending=False) + recs_with_uprn.sort_values("sap_points", ascending=False) .groupby(["uprn", "measure_type"], as_index=False) .first() ) @@ -390,13 +432,24 @@ recs_with_uprn = ( recs_with_uprn["uprn"] = recs_with_uprn["uprn"].astype(str) installed_measures_df = all_conflicts.merge( - recs_with_uprn[["uprn", "measure_type", "sap_points", "heat_demand", "kwh_savings", "co2_equivalent_savings", - "energy_cost_savings"]], + recs_with_uprn[ + [ + "uprn", + "measure_type", + "sap_points", + "heat_demand", + "kwh_savings", + "co2_equivalent_savings", + "energy_cost_savings", + ] + ], how="left", - on=["uprn", "measure_type"] + on=["uprn", "measure_type"], ) -installed_measures_df = installed_measures_df[installed_measures_df["already_installed"] == True] +installed_measures_df = installed_measures_df[ + installed_measures_df["already_installed"] == True +] ## --- Sense checking ---- @@ -423,27 +476,26 @@ def add_mechanical_ventilation_for_fabric(installed_measures_df, recs_with_uprn) recs_with_uprn[ (recs_with_uprn["measure_type"] == "mechanical_ventilation") & (recs_with_uprn["uprn"].isin(fabric_uprns)) - ] + ] .sort_values("sap_points", ascending=False) .drop_duplicates(subset=["uprn"]) ) - mv_installed = mv_recs[[ - "uprn", - "measure_type", - "sap_points", - "heat_demand", - "kwh_savings", - "co2_equivalent_savings", - "energy_cost_savings", - ]].copy() + mv_installed = mv_recs[ + [ + "uprn", + "measure_type", + "sap_points", + "heat_demand", + "kwh_savings", + "co2_equivalent_savings", + "energy_cost_savings", + ] + ].copy() mv_installed["already_installed"] = True - return pd.concat( - [installed_measures_df, mv_installed], - ignore_index=True - ) + return pd.concat([installed_measures_df, mv_installed], ignore_index=True) # installed_measures_df = add_mechanical_ventilation_for_fabric( @@ -453,24 +505,39 @@ def add_mechanical_ventilation_for_fabric(installed_measures_df, recs_with_uprn) assert installed_measures_df[["uprn", "measure_type"]].duplicated().sum() == 0 -for col in ["sap_points", "heat_demand", "kwh_savings", "co2_equivalent_savings", "energy_cost_savings"]: - print(f"n missings for {col}: {pd.isnull(installed_measures_df[col]).sum()}", ) +for col in [ + "sap_points", + "heat_demand", + "kwh_savings", + "co2_equivalent_savings", + "energy_cost_savings", +]: + print( + f"n missings for {col}: {pd.isnull(installed_measures_df[col]).sum()}", + ) # Do some calcs on SAP impact sap_impact = installed_measures_df.groupby(["uprn"])["sap_points"].sum().reset_index() -properties_sap = properties_df[["uprn", "current_sap_points", "current_epc_rating"]].copy() +properties_sap = properties_df[ + ["uprn", "current_sap_points", "current_epc_rating"] +].copy() properties_sap["uprn"] = properties_sap["uprn"].astype(str) -old_sap_vs_new = properties_sap.merge( - sap_impact, how="inner", on="uprn" +old_sap_vs_new = properties_sap.merge(sap_impact, how="inner", on="uprn") +old_sap_vs_new["new_sap_points"] = ( + old_sap_vs_new["current_sap_points"] + old_sap_vs_new["sap_points"] +) +old_sap_vs_new["new_epc_rating"] = old_sap_vs_new["new_sap_points"].apply( + lambda x: sap_to_epc(x) ) -old_sap_vs_new["new_sap_points"] = old_sap_vs_new["current_sap_points"] + old_sap_vs_new["sap_points"] -old_sap_vs_new["new_epc_rating"] = old_sap_vs_new["new_sap_points"].apply(lambda x: sap_to_epc(x)) # How many properties go from below C to above -old_sap_vs_new[old_sap_vs_new["current_sap_points"] < 69]["new_epc_rating"].value_counts() +old_sap_vs_new[old_sap_vs_new["current_sap_points"] < 69][ + "new_epc_rating" +].value_counts() changed = old_sap_vs_new[ - (old_sap_vs_new["current_sap_points"] < 69) & (old_sap_vs_new["new_sap_points"] >= 69) - ] + (old_sap_vs_new["current_sap_points"] < 69) + & (old_sap_vs_new["new_sap_points"] >= 69) +] # What do I need to do: # TODO: - need to get a view of "all" measures for the property, not just recommended. We can do this but just looking @@ -499,22 +566,38 @@ def bulk_insert_installed_measures(installed_measures_df): now = datetime.utcnow() for _, row in installed_measures_df.iterrows(): - records.append({ - "uprn": int(row["uprn"]), - "measure_type": row["measure_type"], - "installed_at": now, - "sap_points": float(row["sap_points"]) if pd.notna(row["sap_points"]) else None, - "carbon_savings": float(row["co2_equivalent_savings"]) if pd.notna(row["co2_equivalent_savings"]) else None, - "kwh_savings": float(row["kwh_savings"]) if pd.notna(row["kwh_savings"]) else None, - "bill_savings": float(row["energy_cost_savings"]) if pd.notna(row["energy_cost_savings"]) else None, - "heat_demand_savings": float(row["heat_demand"]) if pd.notna(row["heat_demand"]) else None, - "source": SOURCE, - "is_active": True, - }) + records.append( + { + "uprn": int(row["uprn"]), + "measure_type": row["measure_type"], + "installed_at": now, + "sap_points": ( + float(row["sap_points"]) if pd.notna(row["sap_points"]) else None + ), + "carbon_savings": ( + float(row["co2_equivalent_savings"]) + if pd.notna(row["co2_equivalent_savings"]) + else None + ), + "kwh_savings": ( + float(row["kwh_savings"]) if pd.notna(row["kwh_savings"]) else None + ), + "bill_savings": ( + float(row["energy_cost_savings"]) + if pd.notna(row["energy_cost_savings"]) + else None + ), + "heat_demand_savings": ( + float(row["heat_demand"]) if pd.notna(row["heat_demand"]) else None + ), + "source": SOURCE, + "is_active": True, + } + ) try: for i in range(0, len(records), BATCH_SIZE): - batch = records[i:i + BATCH_SIZE] + batch = records[i : i + BATCH_SIZE] session.bulk_insert_mappings(InstalledMeasure, batch) session.commit() print(f"✅ Inserted {i + len(batch)} / {len(records)}") @@ -580,9 +663,7 @@ def get_installed_measure_adjustments_by_uprn_for_portfolio( def exclude_ventilation(column): return case( ( - InstalledMeasure.measure_type.notin_( - REBASING_EXCLUDED_MEASURES - ), + InstalledMeasure.measure_type.notin_(REBASING_EXCLUDED_MEASURES), column, ), else_=0.0, @@ -594,33 +675,24 @@ def get_installed_measure_adjustments_by_uprn_for_portfolio( rows = ( session.query( InstalledMeasure.uprn.label("uprn"), - func.coalesce( func.sum(exclude_ventilation(InstalledMeasure.sap_points)), 0.0, ).label("sap_points"), - func.coalesce( func.sum(exclude_ventilation(InstalledMeasure.carbon_savings)), 0.0, ).label("co2"), - func.coalesce( func.sum(exclude_ventilation(InstalledMeasure.kwh_savings)), 0.0, ).label("energy_kwh"), - func.coalesce( func.sum(exclude_ventilation(InstalledMeasure.bill_savings)), 0.0, ).label("energy_bill"), - func.coalesce( - func.sum( - exclude_ventilation( - InstalledMeasure.heat_demand_savings - ) - ), + func.sum(exclude_ventilation(InstalledMeasure.heat_demand_savings)), 0.0, ).label("heat_demand"), ) @@ -657,16 +729,14 @@ def get_installed_measure_types_by_uprn( ) # Convert enums → strings - return { - r[0].value if hasattr(r[0], "value") else r[0] - for r in rows - } + return {r[0].value if hasattr(r[0], "value") else r[0] for r in rows} # ------------------------------------------------------------ # PROPERTY REBASING (READ-ONLY) # ------------------------------------------------------------ + def compute_property_sap_updates( properties: List[PropertyModel], sap_adjustments: Dict[int, float], # keyed by uprn @@ -692,14 +762,16 @@ def compute_property_sap_updates( sap_delta = sap_adjustments[prop.uprn] new_sap = prop.original_sap_points + sap_delta - updates.append({ - "property_id": prop.id, - "uprn": prop.uprn, - "original_sap_points": prop.original_sap_points, - "installed_sap_delta": sap_delta, - "new_sap_points": new_sap, - "is_adjusted": True, - }) + updates.append( + { + "property_id": prop.id, + "uprn": prop.uprn, + "original_sap_points": prop.original_sap_points, + "installed_sap_delta": sap_delta, + "new_sap_points": new_sap, + "is_adjusted": True, + } + ) return updates @@ -708,6 +780,7 @@ def compute_property_sap_updates( # PLAN RECOMPUTATION HELPERS # ------------------------------------------------------------ + def get_effective_plan_recommendations( session, plan_id: int, excluded_measure_types: Set[str] ) -> List[Recommendation]: @@ -715,11 +788,10 @@ def get_effective_plan_recommendations( session.query(Recommendation) .join(PlanRecommendations) .filter(PlanRecommendations.plan_id == plan_id) - .filter(Recommendation.default.is_(True))) + .filter(Recommendation.default.is_(True)) + ) if excluded_measure_types: - q = q.filter( - ~Recommendation.measure_type.in_(excluded_measure_types) - ) + q = q.filter(~Recommendation.measure_type.in_(excluded_measure_types)) return q.all() @@ -791,7 +863,11 @@ def get_installed_measure_types_by_property_id_for_portfolio( installed_by_property[property_id].add(mt) # drag-along rules - if mt in {"cavity_wall_insulation", "internal_wall_insulation", "external_wall_insulation"}: + if mt in { + "cavity_wall_insulation", + "internal_wall_insulation", + "external_wall_insulation", + }: installed_by_property[property_id].add("mechanical_ventilation") return installed_by_property @@ -810,7 +886,9 @@ def get_all_default_plan_recommendations( PlanRecommendations.plan_id, Recommendation, ) - .join(Recommendation, Recommendation.id == PlanRecommendations.recommendation_id) + .join( + Recommendation, Recommendation.id == PlanRecommendations.recommendation_id + ) .filter(PlanRecommendations.plan_id.in_(plan_ids)) .filter(Recommendation.default.is_(True)) .all() @@ -835,9 +913,14 @@ def filter_remaining_recommendations( return recommendations return [ - r for r in recommendations + r + for r in recommendations if ( - (r.measure_type.value if hasattr(r.measure_type, "value") else r.measure_type) + ( + r.measure_type.value + if hasattr(r.measure_type, "value") + else r.measure_type + ) not in installed_types ) ] @@ -845,11 +928,11 @@ def filter_remaining_recommendations( def compute_plan_updates( session, - plans: List[Plan], + plans: List[PlanModel], properties_by_id: Dict[int, PropertyModel], epcs_by_property_id: Dict[int, PropertyDetailsEpcModel], installed_types_by_property_id, - all_ventilation_measures + all_ventilation_measures, ) -> List[dict]: """ Computes plan metrics after marking some recommendations as already installed. @@ -921,39 +1004,34 @@ def compute_plan_updates( # ): # continue - updates.append({ - "plan_id": plan.id, - "property_id": plan.property_id, - - # SAP / EPC - "post_sap_points": post_sap, - "post_epc_rating": sap_to_epc(post_sap), - - # Carbon - "co2_savings": remaining["co2_savings"], - "post_co2_emissions": post_co2, - - # Energy bills - "energy_bill_savings": remaining["energy_bill_savings"], - "post_energy_bill": post_bill, - - # Energy consumption - "energy_consumption_savings": remaining["energy_consumption_savings"], - "post_energy_consumption": post_kwh, - - # Valuation (safe) - "valuation_increase": remaining["valuation_increase"], - "valuation_post_retrofit": ( - prop.current_valuation - + remaining["valuation_increase"] - if prop.current_valuation is not None - else None - ), - - # Costs - "cost_of_works": remaining["cost_of_works"], - "contingency_cost": remaining["contingency_cost"], - }) + updates.append( + { + "plan_id": plan.id, + "property_id": plan.property_id, + # SAP / EPC + "post_sap_points": post_sap, + "post_epc_rating": sap_to_epc(post_sap), + # Carbon + "co2_savings": remaining["co2_savings"], + "post_co2_emissions": post_co2, + # Energy bills + "energy_bill_savings": remaining["energy_bill_savings"], + "post_energy_bill": post_bill, + # Energy consumption + "energy_consumption_savings": remaining["energy_consumption_savings"], + "post_energy_consumption": post_kwh, + # Valuation (safe) + "valuation_increase": remaining["valuation_increase"], + "valuation_post_retrofit": ( + prop.current_valuation + remaining["valuation_increase"] + if prop.current_valuation is not None + else None + ), + # Costs + "cost_of_works": remaining["cost_of_works"], + "contingency_cost": remaining["contingency_cost"], + } + ) property_to_installed_types[prop.id] = installed_types @@ -1065,7 +1143,6 @@ def compute_epc_rebasing_updates( updates[property_id] = { "property_id": property_id, - # Originals (only set once) "original_co2_emissions": ( epc.original_co2_emissions @@ -1087,7 +1164,6 @@ def compute_epc_rebasing_updates( if epc.original_current_energy_demand_heating_hotwater is not None else epc.current_energy_demand_heating_hotwater ), - # Adjustments (always re-applied from originals) "installed_measures_co2_adjustment": adj["co2"], "installed_measures_energy_demand_adjustment": adj["energy_kwh"], @@ -1106,8 +1182,8 @@ def persist_plan_updates(plan_updates: list[dict]): with db_session() as session: plans = ( - session.query(Plan) - .filter(Plan.id.in_([u["plan_id"] for u in plan_updates])) + session.query(PlanModel) + .filter(PlanModel.id.in_([u["plan_id"] for u in plan_updates])) .all() ) @@ -1168,20 +1244,17 @@ def persist_epc_rebasing_updates( # Store originals once epc.original_co2_emissions = u["original_co2_emissions"] - epc.original_primary_energy_consumption = ( - u["original_primary_energy_consumption"] - ) - epc.original_current_energy_demand = ( - u["original_current_energy_demand"] - ) - epc.original_current_energy_demand_heating_hotwater = ( - u["original_current_energy_demand_heating_hotwater"] - ) + epc.original_primary_energy_consumption = u[ + "original_primary_energy_consumption" + ] + epc.original_current_energy_demand = u["original_current_energy_demand"] + epc.original_current_energy_demand_heating_hotwater = u[ + "original_current_energy_demand_heating_hotwater" + ] # Apply rebased values epc.co2_emissions = ( - u["original_co2_emissions"] - - u["installed_measures_co2_adjustment"] + u["original_co2_emissions"] - u["installed_measures_co2_adjustment"] ) epc.primary_energy_consumption = ( @@ -1195,18 +1268,18 @@ def persist_epc_rebasing_updates( ) # Flags + audit fields - epc.installed_measures_co2_adjustment = ( - u["installed_measures_co2_adjustment"] - ) - epc.installed_measures_energy_demand_adjustment = ( - u["installed_measures_energy_demand_adjustment"] - ) - epc.installed_measures_total_energy_bill_adjustment = ( - u["installed_measures_total_energy_bill_adjustment"] - ) - epc.installed_measures_heat_demand_adjustment = ( - u["installed_measures_heat_demand_adjustment"] - ) + epc.installed_measures_co2_adjustment = u[ + "installed_measures_co2_adjustment" + ] + epc.installed_measures_energy_demand_adjustment = u[ + "installed_measures_energy_demand_adjustment" + ] + epc.installed_measures_total_energy_bill_adjustment = u[ + "installed_measures_total_energy_bill_adjustment" + ] + epc.installed_measures_heat_demand_adjustment = u[ + "installed_measures_heat_demand_adjustment" + ] epc.is_epc_adjusted_for_installed_measures = True print(f"✅ Updated {len(epcs)} EPC records") @@ -1254,9 +1327,7 @@ def initialise_original_property_and_epc_values(portfolio_id: int): updated = True if epc.original_primary_energy_consumption is None: - epc.original_primary_energy_consumption = ( - epc.primary_energy_consumption - ) + epc.original_primary_energy_consumption = epc.primary_energy_consumption updated = True if epc.original_current_energy_demand is None: @@ -1314,21 +1385,19 @@ def get_installed_ventilation_adjustments_by_uprn_for_portfolio( rows = ( session.query( InstalledMeasure.uprn.label("uprn"), - - func.coalesce(func.sum(InstalledMeasure.sap_points), 0.0) - .label("sap_points"), - - func.coalesce(func.sum(InstalledMeasure.carbon_savings), 0.0) - .label("co2"), - - func.coalesce(func.sum(InstalledMeasure.kwh_savings), 0.0) - .label("energy_kwh"), - - func.coalesce(func.sum(InstalledMeasure.bill_savings), 0.0) - .label("energy_bill"), - - func.coalesce(func.sum(InstalledMeasure.heat_demand_savings), 0.0) - .label("heat_demand"), + func.coalesce(func.sum(InstalledMeasure.sap_points), 0.0).label( + "sap_points" + ), + func.coalesce(func.sum(InstalledMeasure.carbon_savings), 0.0).label("co2"), + func.coalesce(func.sum(InstalledMeasure.kwh_savings), 0.0).label( + "energy_kwh" + ), + func.coalesce(func.sum(InstalledMeasure.bill_savings), 0.0).label( + "energy_bill" + ), + func.coalesce(func.sum(InstalledMeasure.heat_demand_savings), 0.0).label( + "heat_demand" + ), ) .filter(InstalledMeasure.is_active.is_(True)) .filter(InstalledMeasure.measure_type == "mechanical_ventilation") @@ -1370,8 +1439,9 @@ def mark_recommendations_as_installed( stmt = ( update(Recommendation) .where( - tuple_(Recommendation.property_id, Recommendation.measure_type) - .in_(property_measure_pairs) + tuple_(Recommendation.property_id, Recommendation.measure_type).in_( + property_measure_pairs + ) ) .values(already_installed=True) ) @@ -1400,13 +1470,17 @@ with db_read_session() as session: .all() ) - all_ventilation_measures = get_installed_ventilation_adjustments_by_uprn_for_portfolio(session, PORTFOLIO_ID) - installed_types_by_property_id = get_installed_measure_types_by_property_id_for_portfolio(session, PORTFOLIO_ID) + all_ventilation_measures = ( + get_installed_ventilation_adjustments_by_uprn_for_portfolio( + session, PORTFOLIO_ID + ) + ) + installed_types_by_property_id = ( + get_installed_measure_types_by_property_id_for_portfolio(session, PORTFOLIO_ID) + ) plans = ( - session.query(Plan) - .filter(Plan.portfolio_id == PORTFOLIO_ID) - .all() + session.query(PlanModel).filter(PlanModel.portfolio_id == PORTFOLIO_ID).all() ) epcs = { @@ -1419,23 +1493,17 @@ with db_read_session() as session: ) } - installed_adjustments = ( - get_installed_measure_adjustments_by_uprn_for_portfolio( - session, - PORTFOLIO_ID, - ) + installed_adjustments = get_installed_measure_adjustments_by_uprn_for_portfolio( + session, + PORTFOLIO_ID, ) property_updates = compute_property_sap_updates( - properties, - {uprn: v["sap_points"] for uprn, v in installed_adjustments.items()} + properties, {uprn: v["sap_points"] for uprn, v in installed_adjustments.items()} ) properties_by_id = {p.id: p for p in properties} - property_updates_by_id = { - u["property_id"]: u - for u in property_updates - } + property_updates_by_id = {u["property_id"]: u for u in property_updates} epc_updates = compute_epc_rebasing_updates( epcs, @@ -1453,9 +1521,7 @@ with db_read_session() as session: ) # Used to mark recommendations - pairs = build_installed_recommendation_pairs( - installed_types_by_property_id - ) + pairs = build_installed_recommendation_pairs(installed_types_by_property_id) from copy import deepcopy @@ -1466,36 +1532,33 @@ for u in plan_updates_comparison: if not before: continue - u.update({ - # SAP - "before_sap_points": before.post_sap_points, - "after_sap_points": u["post_sap_points"], - - # Carbon - "before_post_co2_emissions": before.post_co2_emissions, - "after_post_co2_emissions": u["post_co2_emissions"], - - # Costs - "before_cost_of_works": before.cost_of_works, - "after_cost_of_works": u["cost_of_works"], - - "before_contingency_cost": before.contingency_cost, - "after_contingency_cost": u["contingency_cost"], - }) + u.update( + { + # SAP + "before_sap_points": before.post_sap_points, + "after_sap_points": u["post_sap_points"], + # Carbon + "before_post_co2_emissions": before.post_co2_emissions, + "after_post_co2_emissions": u["post_co2_emissions"], + # Costs + "before_cost_of_works": before.cost_of_works, + "after_cost_of_works": u["cost_of_works"], + "before_contingency_cost": before.contingency_cost, + "after_contingency_cost": u["contingency_cost"], + } + ) plan_updates_df = pd.DataFrame(plan_updates_comparison) plan_updates_df["delta_sap_points"] = ( - plan_updates_df["after_sap_points"] - - plan_updates_df["before_sap_points"] + plan_updates_df["after_sap_points"] - plan_updates_df["before_sap_points"] ) plan_updates_df["delta_carbon"] = ( plan_updates_df["after_post_co2_emissions"] - plan_updates_df["before_post_co2_emissions"] ) plan_updates_df["delta_cost_of_works"] = ( - plan_updates_df["after_cost_of_works"] - - plan_updates_df["before_cost_of_works"] + plan_updates_df["after_cost_of_works"] - plan_updates_df["before_cost_of_works"] ) plan_updates_df["delta_contingency_cost"] = ( plan_updates_df["after_contingency_cost"] @@ -1503,12 +1566,14 @@ plan_updates_df["delta_contingency_cost"] = ( ) # High-level sanity checks -summary = plan_updates_df[[ - "delta_sap_points", - "delta_carbon", - "delta_cost_of_works", - "delta_contingency_cost", -]].sum() +summary = plan_updates_df[ + [ + "delta_sap_points", + "delta_carbon", + "delta_cost_of_works", + "delta_contingency_cost", + ] +].sum() print(summary) @@ -1619,17 +1684,15 @@ def apply_appliance_carbon_to_plans( .all() ) - epc_by_property_id = { - e.property_id: e for e in epcs - } + epc_by_property_id = {e.property_id: e for e in epcs} # -------------------------------------------- # Load plans with post carbon # -------------------------------------------- plans = ( - session.query(Plan) - .filter(Plan.portfolio_id == portfolio_id) - .filter(Plan.post_co2_emissions.isnot(None)) + session.query(PlanModel) + .filter(PlanModel.portfolio_id == portfolio_id) + .filter(PlanModel.post_co2_emissions.isnot(None)) .all() ) @@ -1682,13 +1745,7 @@ def apply_appliance_carbon_to_plans( # Get all uprns for entries in already installed, from the database with db_read_session() as session: - db_uprns = { - str(r[0]) - for r in ( - session.query(InstalledMeasure.uprn) - .all() - ) - } + db_uprns = {str(r[0]) for r in (session.query(InstalledMeasure.uprn).all())} # What is the overlap of these properties and the properties in portfolo 430 sal_data = pd.read_excel( diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/h_reset_estimated_epcs.py b/etl/customers/peabody/Nov 2025 Consulting Project/h_reset_estimated_epcs.py index 67ff2c85..e3008f65 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/h_reset_estimated_epcs.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/h_reset_estimated_epcs.py @@ -3,31 +3,41 @@ from sqlalchemy.orm import Session from sqlalchemy import text, select from backend.app.db.connection import db_read_session from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel -from backend.app.db.models.recommendations import Plan +from backend.app.db.models.recommendations import PlanModel PORTFOLIO_ID = 435 with db_read_session() as session: # Get all properties from PropertyDetailsEpcModel, where estimated is True, for portfolio 419 - estimated_epcs = session.query(PropertyDetailsEpcModel).filter( - # PropertyDetailsEpcModel.estimated == True, - PropertyDetailsEpcModel.property_id.in_( - session.query(PropertyModel.id).filter(PropertyModel.portfolio_id == PORTFOLIO_ID) + estimated_epcs = ( + session.query(PropertyDetailsEpcModel) + .filter( + # PropertyDetailsEpcModel.estimated == True, + PropertyDetailsEpcModel.property_id.in_( + session.query(PropertyModel.id).filter( + PropertyModel.portfolio_id == PORTFOLIO_ID + ) + ) ) - ).all() + .all() + ) # Get the ids estimated_epc_ids = [epc.property_id for epc in estimated_epcs] # I want to get the UPRNS for these properties, from the property model with db_read_session() as session: - estimated_uprns = session.query(PropertyModel.uprn).filter( - PropertyModel.id.in_( - session.query(PropertyDetailsEpcModel.property_id).filter( - PropertyDetailsEpcModel.id.in_(estimated_epc_ids) + estimated_uprns = ( + session.query(PropertyModel.uprn) + .filter( + PropertyModel.id.in_( + session.query(PropertyDetailsEpcModel.property_id).filter( + PropertyDetailsEpcModel.id.in_(estimated_epc_ids) + ) ) ) - ).all() + .all() + ) estimated_uprns_list = [uprn for (uprn,) in estimated_uprns] @@ -35,16 +45,16 @@ with db_read_session() as session: sal_1 = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20251213 Model " "data.xlsx", - sheet_name="Standardised Asset List" + sheet_name="Standardised Asset List", ) sal_2 = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20260105 - additional " "UPRNS.xlsx", - sheet_name="Standardised Asset List" + sheet_name="Standardised Asset List", ) sal = pd.concat([sal_1, sal_2]) -sal = sal.drop_duplicates(subset=['epc_os_uprn']) +sal = sal.drop_duplicates(subset=["epc_os_uprn"]) estimated_to_refresh = sal[sal["epc_os_uprn"].isin(estimated_uprns_list)].copy() @@ -55,20 +65,24 @@ SCENARIOS = [ # 861, # EPC C, No EWI/IWI, No Solid Floor, ASHP 3.0 COP # 859, # EPC C - no solid floor, ashp 3.0 # 885, # EPC B - fabric first, no solid floor, ashp 3.0 - 908, 909, 910 + 908, + 909, + 910, ] # Get all plans, associated to these properties - the property IDs are in estimated_epc_ids with db_read_session() as session: result = session.execute( - select(Plan.id, Plan.property_id) - .where(Plan.property_id.in_(estimated_epc_ids)) + select(PlanModel.id, PlanModel.property_id).where( + PlanModel.property_id.in_(estimated_epc_ids) + ) ) plans = [ { "plan_id": row.id, "property_id": row.property_id, - } for row in result + } + for row in result ] df = pd.DataFrame(plans) @@ -96,12 +110,14 @@ def delete_plan_batch(session: Session, plan_ids: list[int]): # recommendation_materials # ---------------------------- session.execute( - text(""" + text( + """ DELETE FROM recommendation_materials rm USING plan_recommendations pr WHERE rm.recommendation_id = pr.recommendation_id AND pr.plan_id = ANY(:plan_ids) - """), + """ + ), params, ) @@ -109,10 +125,12 @@ def delete_plan_batch(session: Session, plan_ids: list[int]): # plan_recommendations # ---------------------------- session.execute( - text(""" + text( + """ DELETE FROM plan_recommendations WHERE plan_id = ANY(:plan_ids) - """), + """ + ), params, ) @@ -120,14 +138,16 @@ def delete_plan_batch(session: Session, plan_ids: list[int]): # recommendations (only those used by these plans) # ---------------------------- session.execute( - text(""" + text( + """ DELETE FROM recommendation r WHERE r.id IN ( SELECT DISTINCT recommendation_id FROM plan_recommendations WHERE plan_id = ANY(:plan_ids) ) - """), + """ + ), params, ) @@ -135,17 +155,21 @@ def delete_plan_batch(session: Session, plan_ids: list[int]): # plans LAST # ---------------------------- session.execute( - text(""" + text( + """ DELETE FROM plan WHERE id = ANY(:plan_ids) - """), + """ + ), params, ) # Store the SAL -filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20260101 " - "sal.xlsx") +filename = ( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/20260101 " + "sal.xlsx" +) with pd.ExcelWriter(filename) as writer: sal.to_excel(writer, sheet_name="Standardised Asset List", index=False) @@ -164,34 +188,36 @@ with pd.ExcelWriter(filename) as writer: b1 = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 " "sal.xlsx", - sheet_name="batch 1" + sheet_name="batch 1", ) b2 = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 " "sal.xlsx", - sheet_name="batch 2" + sheet_name="batch 2", ) b3 = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 " "sal.xlsx", - sheet_name="batch 3" + sheet_name="batch 3", ) b4 = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 " "sal.xlsx", - sheet_name="batch 4" + sheet_name="batch 4", ) b5 = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260101 " "sal.xlsx", - sheet_name="batch 5" + sheet_name="batch 5", ) # Batch 6 should be the remaining total = pd.concat([b1, b2, b3, b4, b5]) remaining = sal[~sal["epc_os_uprn"].isin(total["epc_os_uprn"].values)] # Create new output -filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/" - "20260107 corrected batch 6 sal.xlsx") +filename = ( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/" + "20260107 corrected batch 6 sal.xlsx" +) with pd.ExcelWriter(filename) as writer: sal.to_excel(writer, sheet_name="Standardised Asset List", index=False) @@ -206,6 +232,4 @@ with pd.ExcelWriter(filename) as writer: b5.to_excel(writer, sheet_name="batch 5", index=False) remaining.to_excel(writer, sheet_name="batch 6", index=False) -all_together = pd.concat( - [b1, b2, b3, b4, b5, remaining] -) +all_together = pd.concat([b1, b2, b3, b4, b5, remaining]) diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/k_deck_stats.py b/etl/customers/peabody/Nov 2025 Consulting Project/k_deck_stats.py index 68655e80..0ec34e7c 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/k_deck_stats.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/k_deck_stats.py @@ -110,14 +110,17 @@ import pandas as pd # Solar PV savings - we need the amount of solar PV bill savings from sqlalchemy.orm import sessionmaker from backend.app.db.connection import db_engine -from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations, RecommendationMaterials +from backend.app.db.models.recommendations import ( + Recommendation, + PlanModel, + PlanRecommendations, + RecommendationMaterials, +) from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel from collections import defaultdict PORTFOLIO_ID = 485 # Peabody -SCENARIOS = [ - 970 -] +SCENARIOS = [970] scenario_names = { 970: "EPC C - no solid floor, ashp 3.0", } @@ -130,22 +133,26 @@ def get_data(portfolio_id, scenario_ids): # -------------------- # Properties # -------------------- - properties_query = session.query( - PropertyModel, - PropertyDetailsEpcModel - ).join( - PropertyDetailsEpcModel, - PropertyModel.id == PropertyDetailsEpcModel.property_id - ).filter( - PropertyModel.portfolio_id == portfolio_id - ).all() + properties_query = ( + session.query(PropertyModel, PropertyDetailsEpcModel) + .join( + PropertyDetailsEpcModel, + PropertyModel.id == PropertyDetailsEpcModel.property_id, + ) + .filter(PropertyModel.portfolio_id == portfolio_id) + .all() + ) properties_data = [ { - **{col.name: getattr(p.PropertyModel, col.name) - for col in PropertyModel.__table__.columns}, - **{col.name: getattr(p.PropertyDetailsEpcModel, col.name) - for col in PropertyDetailsEpcModel.__table__.columns}, + **{ + col.name: getattr(p.PropertyModel, col.name) + for col in PropertyModel.__table__.columns + }, + **{ + col.name: getattr(p.PropertyDetailsEpcModel, col.name) + for col in PropertyDetailsEpcModel.__table__.columns + }, } for p in properties_query ] @@ -153,12 +160,12 @@ def get_data(portfolio_id, scenario_ids): # -------------------- # Plans # -------------------- - plans_query = session.query(Plan).filter( - Plan.scenario_id.in_(scenario_ids) - ).all() + plans_query = ( + session.query(PlanModel).filter(PlanModel.scenario_id.in_(scenario_ids)).all() + ) plans_data = [ - {col.name: getattr(plan, col.name) for col in Plan.__table__.columns} + {col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns} for plan in plans_query ] @@ -167,27 +174,29 @@ def get_data(portfolio_id, scenario_ids): # -------------------- # Recommendations (NO materials yet) # -------------------- - recommendations_query = session.query( - Recommendation, - Plan.scenario_id - ).join( - PlanRecommendations, - Recommendation.id == PlanRecommendations.recommendation_id - ).join( - Plan, - Plan.id == PlanRecommendations.plan_id - ).filter( - PlanRecommendations.plan_id.in_(plan_ids), - Recommendation.default.is_(True), - Recommendation.already_installed.is_(False) - ).all() + recommendations_query = ( + session.query(Recommendation, PlanModel.scenario_id) + .join( + PlanRecommendations, + Recommendation.id == PlanRecommendations.recommendation_id, + ) + .join(PlanModel, PlanModel.id == PlanRecommendations.plan_id) + .filter( + PlanRecommendations.plan_id.in_(plan_ids), + Recommendation.default.is_(True), + Recommendation.already_installed.is_(False), + ) + .all() + ) recommendations_data = [ { - **{col.name: getattr(r.Recommendation, col.name) - for col in Recommendation.__table__.columns}, + **{ + col.name: getattr(r.Recommendation, col.name) + for col in Recommendation.__table__.columns + }, "scenario_id": r.scenario_id, - "materials": [] # placeholder + "materials": [], # placeholder } for r in recommendations_query ] @@ -197,23 +206,25 @@ def get_data(portfolio_id, scenario_ids): # -------------------- # Recommendation materials (SEPARATE QUERY) # -------------------- - materials_query = session.query( - RecommendationMaterials - ).filter( - RecommendationMaterials.recommendation_id.in_(recommendation_ids) - ).all() + materials_query = ( + session.query(RecommendationMaterials) + .filter(RecommendationMaterials.recommendation_id.in_(recommendation_ids)) + .all() + ) # Group materials by recommendation_id materials_by_recommendation = defaultdict(list) for m in materials_query: - materials_by_recommendation[m.recommendation_id].append({ - "material_id": m.material_id, - "depth": m.depth, - "quantity": m.quantity, - "quantity_unit": m.quantity_unit, - "estimated_cost": m.estimated_cost, - }) + materials_by_recommendation[m.recommendation_id].append( + { + "material_id": m.material_id, + "depth": m.depth, + "quantity": m.quantity, + "quantity_unit": m.quantity_unit, + "estimated_cost": m.estimated_cost, + } + ) # Attach materials safely (no filtering side effects) for r in recommendations_data: @@ -236,12 +247,11 @@ with pd.ExcelWriter("hackney.xlsx", engine="openpyxl") as writer: recommendations_df.to_excel(writer, sheet_name="recommendations", index=False) properties_df.to_excel(writer, sheet_name="properties", index=False) - + # solar_pv_recommendations = recommendations_df[recommendations_df["measure_type"] == "solar_pv"] # average_savings = solar_pv_recommendations.groupby("scenario_id")["energy_cost_savings"].mean().reset_index() - # # Check tenures # initial_asset_data = pd.read_excel( # "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/2025_11_11 - Peabody " diff --git a/etl/customers/peabody/Nov 2025 Consulting Project/m_reduced_sample_revised.py b/etl/customers/peabody/Nov 2025 Consulting Project/m_reduced_sample_revised.py index a18dc315..b7010cf7 100644 --- a/etl/customers/peabody/Nov 2025 Consulting Project/m_reduced_sample_revised.py +++ b/etl/customers/peabody/Nov 2025 Consulting Project/m_reduced_sample_revised.py @@ -4,7 +4,7 @@ import pandas as pd full_sal = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final " "SAL/Depracated/20260107 corrected batch 6 sal.xlsx", - sheet_name="Standardised Asset List" + sheet_name="Standardised Asset List", ) # ------Pull in the reduced sample ------ @@ -12,7 +12,7 @@ full_sal = pd.read_excel( reduced_sal = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/20260112 - " "ownership filtered sal.xlsx", - sheet_name="Standardised Asset List" + sheet_name="Standardised Asset List", ) # ------ Pull in the confirmed ownership column from Peabody ------ @@ -20,18 +20,20 @@ new_asset_data = pd.read_excel( "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final SAL/2025_11_11 " "- Peabody " "- Data Extracts for Domna v2.xlsx", - sheet_name="Properties" + sheet_name="Properties", ) correct_sample = new_asset_data[ ~new_asset_data["AH Tenure"].isin( - ["Commercial", - "Freeholder", - "HOMEBUY / EQUITY LOAN", - "Leaseholder", - "Outright Sale", - "SHARED EQUITY", - "Shared Ownership"] + [ + "Commercial", + "Freeholder", + "HOMEBUY / EQUITY LOAN", + "Leaseholder", + "Outright Sale", + "SHARED EQUITY", + "Shared Ownership", + ] ) ].copy() @@ -41,9 +43,7 @@ stuff_to_add = correct_sample[ ~correct_sample["UPRN"].isin(reduced_sal["landlord_property_id"].values) ]["UPRN"].values -sal_to_add = full_sal[ - full_sal["domna_property_id"].isin(stuff_to_add) -].copy() +sal_to_add = full_sal[full_sal["domna_property_id"].isin(stuff_to_add)].copy() # ------- Stuff to remove ------- stuff_to_remove = reduced_sal[ @@ -88,7 +88,7 @@ from backend.app.db.models.portfolio import PropertyModel from backend.app.db.connection import db_session, db_read_session from sqlalchemy import select, func from sqlalchemy.orm import Session -from backend.app.db.models.recommendations import Plan +from backend.app.db.models.recommendations import PlanModel uprns_to_be_deleted = to_delete["epc_os_uprn"].values.tolist() diff --git a/etl/customers/slide_utils.py b/etl/customers/slide_utils.py index 9170ab17..5e027a56 100644 --- a/etl/customers/slide_utils.py +++ b/etl/customers/slide_utils.py @@ -7,7 +7,7 @@ from sqlalchemy.sql import true from backend.app.db.utils import row2dict from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel from backend.app.db.models.recommendations import Recommendation -from backend.app.db.models.recommendations import Plan +from backend.app.db.models.recommendations import PlanModel from backend.app.utils import sap_to_epc EPC_COLOURS = { @@ -17,7 +17,7 @@ EPC_COLOURS = { "D": "#fdd401", "E": "#fdab67", "F": "#ee8023", - "G": "#e71437" + "G": "#e71437", } @@ -33,22 +33,27 @@ def get_properties_with_default_recommendations(session: Session, portfolio_id: its associated default recommendations if any. """ # Adjust the join to correctly filter recommendations while including all properties - query = session.query(PropertyModel, Recommendation).outerjoin(Recommendation, - (Recommendation.property_id == PropertyModel.id) & ( - Recommendation.default == true())) \ - .filter(PropertyModel.portfolio_id == portfolio_id) \ + query = ( + session.query(PropertyModel, Recommendation) + .outerjoin( + Recommendation, + (Recommendation.property_id == PropertyModel.id) + & (Recommendation.default == true()), + ) + .filter(PropertyModel.portfolio_id == portfolio_id) .all() + ) properties = {} for property, recommendation in query: # Ensure the property is added once with an empty list of recommendations initially if property.id not in properties: properties[property.id] = row2dict(property) - properties[property.id]['recommendations'] = [] + properties[property.id]["recommendations"] = [] # Append recommendations if they exist and meet the criteria (already filtered by the query) if recommendation and recommendation.default: - properties[property.id]['recommendations'].append(row2dict(recommendation)) + properties[property.id]["recommendations"].append(row2dict(recommendation)) return list(properties.values()) @@ -62,11 +67,16 @@ def get_property_details_by_portfolio_id(session: Session, portfolio_id: int): :return: A list of dictionaries, where each dictionary represents a property's details. Returns an empty list if no property details are found. """ - property_details = session.query(PropertyDetailsEpcModel).filter( - PropertyDetailsEpcModel.portfolio_id == portfolio_id).all() + property_details = ( + session.query(PropertyDetailsEpcModel) + .filter(PropertyDetailsEpcModel.portfolio_id == portfolio_id) + .all() + ) # Convert the SQLAlchemy objects to dictionaries - property_details_dict = [row2dict(pd) for pd in property_details] if property_details else [] + property_details_dict = ( + [row2dict(pd) for pd in property_details] if property_details else [] + ) return property_details_dict @@ -80,7 +90,9 @@ def get_plan_by_portfolio_id(session: Session, portfolio_id: int): :return: A list of dictionaries, where each dictionary represents a plan. Returns an empty list if no plans are found. """ - plans = session.query(Plan).filter(Plan.portfolio_id == portfolio_id).all() + plans = ( + session.query(PlanModel).filter(PlanModel.portfolio_id == portfolio_id).all() + ) # Convert the SQLAlchemy objects to dictionaries plans_dict = [row2dict(plan) for plan in plans] if plans else [] @@ -88,7 +100,14 @@ def get_plan_by_portfolio_id(session: Session, portfolio_id: int): return plans_dict -def plot_epc_distribution(df, customer_key, title='Your Units', background_color='white', bar_height=0.4, font_size=15): +def plot_epc_distribution( + df, + customer_key, + title="Your Units", + background_color="white", + bar_height=0.4, + font_size=15, +): """ Plots a horizontal bar chart of EPC rating distribution with adjustable bar thickness and text sizes. Allows setting the plot background color and dynamically adjusts text size and bar spacing. @@ -100,75 +119,113 @@ def plot_epc_distribution(df, customer_key, title='Your Units', background_color :param font_size: Base font size for text annotations (default 15) """ # Calculate dynamic figure size or adjust based on preferences - square_size = max(6, len(df) * 0.6) # Ensure minimum size and adjust based on number of entries + square_size = max( + 6, len(df) * 0.6 + ) # Ensure minimum size and adjust based on number of entries fig, ax = plt.subplots(figsize=(square_size, square_size)) fig.patch.set_facecolor(background_color) # Set figure background color ax.set_facecolor(background_color) # Set axes background color - df['percentage'] = df['percentage'].round(1) # Round the percentage values to 1 decimal place - df_sorted = df.sort_values('percentage', ascending=True) + df["percentage"] = df["percentage"].round( + 1 + ) # Round the percentage values to 1 decimal place + df_sorted = df.sort_values("percentage", ascending=True) # Plot bars with specified height for adjustable thickness - bars = ax.barh(df_sorted['current_epc_rating'], df_sorted['percentage'], - color=df_sorted['current_epc_rating'].map(EPC_COLOURS), edgecolor='none', height=bar_height) + bars = ax.barh( + df_sorted["current_epc_rating"], + df_sorted["percentage"], + color=df_sorted["current_epc_rating"].map(EPC_COLOURS), + edgecolor="none", + height=bar_height, + ) - epc_rating_font_size = font_size * 2 # EPC rating font size larger than base font size - count_percentage_font_size = font_size # Count (percentage) font size as base font size + epc_rating_font_size = ( + font_size * 2 + ) # EPC rating font size larger than base font size + count_percentage_font_size = ( + font_size # Count (percentage) font size as base font size + ) # Annotate bars with EPC ratings inside and count with percentage values outside for index, bar in enumerate(bars): width = bar.get_width() - epc_rating = df_sorted.iloc[index]['current_epc_rating'] - count = df_sorted.iloc[index]['count'] - percentage = df_sorted.iloc[index]['percentage'] + epc_rating = df_sorted.iloc[index]["current_epc_rating"] + count = df_sorted.iloc[index]["count"] + percentage = df_sorted.iloc[index]["percentage"] # EPC rating inside the bar with increased font size - ax.text(width - (width * 0.05), bar.get_y() + bar.get_height() / 2, - f"{epc_rating}", va='center', ha='right', color='white', fontsize=epc_rating_font_size) + ax.text( + width - (width * 0.05), + bar.get_y() + bar.get_height() / 2, + f"{epc_rating}", + va="center", + ha="right", + color="white", + fontsize=epc_rating_font_size, + ) # Count and percentage outside the bar, original font size - ax.text(width + 1, bar.get_y() + bar.get_height() / 2, - f"{count} ({percentage}%)", va='center', color='black', fontsize=count_percentage_font_size) + ax.text( + width + 1, + bar.get_y() + bar.get_height() / 2, + f"{count} ({percentage}%)", + va="center", + color="black", + fontsize=count_percentage_font_size, + ) - ax.set_title(title, fontsize=font_size * 1.2) # Adjust title font size proportionally - ax.tick_params(axis='x', which='both', bottom=False, top=False, - labelbottom=False) # Remove x-axis tick marks and values - ax.tick_params(axis='y', which='both', left=False, right=False, - labelleft=False) # Remove y-axis tick marks and labels - ax.spines['top'].set_visible(False) # Remove top spine - ax.spines['right'].set_visible(False) # Remove right spine - ax.spines['left'].set_visible(False) # Remove left spine - ax.spines['bottom'].set_visible(False) # Remove bottom spine + ax.set_title( + title, fontsize=font_size * 1.2 + ) # Adjust title font size proportionally + ax.tick_params( + axis="x", which="both", bottom=False, top=False, labelbottom=False + ) # Remove x-axis tick marks and values + ax.tick_params( + axis="y", which="both", left=False, right=False, labelleft=False + ) # Remove y-axis tick marks and labels + ax.spines["top"].set_visible(False) # Remove top spine + ax.spines["right"].set_visible(False) # Remove right spine + ax.spines["left"].set_visible(False) # Remove left spine + ax.spines["bottom"].set_visible(False) # Remove bottom spine plt.tight_layout() # Adjust layout plt.show() # Save the figure as an image - figure_path = f'etl/customers/{customer_key}/epc_distribution_plot.png' - fig.savefig(figure_path, bbox_inches='tight') + figure_path = f"etl/customers/{customer_key}/epc_distribution_plot.png" + fig.savefig(figure_path, bbox_inches="tight") plt.close(fig) # Close the figure to free memory return fig, figure_path -def save_plot_to_image(figure, path='plot.png'): +def save_plot_to_image(figure, path="plot.png"): """ Saves a matplotlib figure to an image file for insertion into PowerPoint. """ - figure.savefig(path, bbox_inches='tight') + figure.savefig(path, bbox_inches="tight") plt.close(figure) -def save_figure_as_image(figure, filename='temp_plot.png'): +def save_figure_as_image(figure, filename="temp_plot.png"): """ Saves a matplotlib figure to an image file. """ figure.savefig(filename, dpi=300) - plt.close(figure) # Close the figure to prevent it from displaying in notebooks or Python environments + plt.close( + figure + ) # Close the figure to prevent it from displaying in notebooks or Python environments -def add_commentary_with_bullets(slide, commentary, top_inches, left_inches=Inches(1), width_inches=Inches(8), - height_inches=Inches(2)): +def add_commentary_with_bullets( + slide, + commentary, + top_inches, + left_inches=Inches(1), + width_inches=Inches(8), + height_inches=Inches(2), +): """ Adds commentary with bullet points to a slide. @@ -179,7 +236,9 @@ def add_commentary_with_bullets(slide, commentary, top_inches, left_inches=Inche :param width_inches: The width of the commentary text box. :param height_inches: The height of the commentary text box. """ - txBox = slide.shapes.add_textbox(left_inches, top_inches, width_inches, height_inches) + txBox = slide.shapes.add_textbox( + left_inches, top_inches, width_inches, height_inches + ) tf = txBox.text_frame # Configure text frame @@ -192,7 +251,9 @@ def add_commentary_with_bullets(slide, commentary, top_inches, left_inches=Inche for i, section in enumerate(sections): if i > 0: - p = tf.add_paragraph() # Add a new paragraph for each section after the first + p = ( + tf.add_paragraph() + ) # Add a new paragraph for each section after the first else: p = tf.paragraphs[0] # Use the first paragraph for the first section p.text = section @@ -215,7 +276,9 @@ def add_slide_with_image(prs, title, img_path=None, commentary=None): # Determine the position of the commentary text box based on whether an image is included if img_path: # Add the image - slide.shapes.add_picture(img_path, Inches(1), Inches(1.5), Inches(8), Inches(4.5)) + slide.shapes.add_picture( + img_path, Inches(1), Inches(1.5), Inches(8), Inches(4.5) + ) # Position for commentary when image is present commentary_top = Inches(6) else: @@ -237,16 +300,18 @@ def create_powerpoint(data, save_location): prs = Presentation() for slide, slide_data in data.items(): - slide_figure_path = data[slide].get('image_path') - text = data[slide].get('text') - title = data[slide].get('title', "") + slide_figure_path = data[slide].get("image_path") + text = data[slide].get("text") + title = data[slide].get("title", "") add_slide_with_image(prs, title, slide_figure_path, text) # Save the presentation prs.save(save_location) -def create_recommendations_summary(recommendations_df, properties_df, property_details_df, sap_target): +def create_recommendations_summary( + recommendations_df, properties_df, property_details_df, sap_target +): # Aggregate the impact of the recommendations # We want: # Total number of sap points @@ -254,40 +319,52 @@ def create_recommendations_summary(recommendations_df, properties_df, property_d # total bill savings # total cost # Total Co2 impact - recommendations_summary = recommendations_df.groupby(["property_id"]).agg( - total_sap_points=("sap_points", "sum"), - total_valuation_impact=("property_valuation_increase", "sum"), - total_bill_savings=("energy_cost_savings", "sum"), - total_cost=("estimated_cost", "sum"), - total_carbon=("co2_equivalent_savings", "sum"), - adjusted_heat_demand=("adjusted_heat_demand", "sum") - ).reset_index() + recommendations_summary = ( + recommendations_df.groupby(["property_id"]) + .agg( + total_sap_points=("sap_points", "sum"), + total_valuation_impact=("property_valuation_increase", "sum"), + total_bill_savings=("energy_cost_savings", "sum"), + total_cost=("estimated_cost", "sum"), + total_carbon=("co2_equivalent_savings", "sum"), + adjusted_heat_demand=("adjusted_heat_demand", "sum"), + ) + .reset_index() + ) # Merge on current sap points, current CO2, current adjusted_heat_demand, current annual bill recommendations_summary = recommendations_summary.merge( - properties_df[["id", "uprn", "current_sap_points"]].rename(columns={"id": "property_id"}), on="property_id", - how="left" + properties_df[["id", "uprn", "current_sap_points"]].rename( + columns={"id": "property_id"} + ), + on="property_id", + how="left", ) recommendations_summary["expected_sap_points"] = ( - recommendations_summary["current_sap_points"] + recommendations_summary["total_sap_points"] + recommendations_summary["current_sap_points"] + + recommendations_summary["total_sap_points"] ) - recommendations_summary["expected_epc_rating"] = recommendations_summary["expected_sap_points"].apply( - lambda x: sap_to_epc(x) + recommendations_summary["expected_epc_rating"] = recommendations_summary[ + "expected_sap_points" + ].apply(lambda x: sap_to_epc(x)) + recommendations_summary["sap_difference"] = ( + sap_target - recommendations_summary["expected_sap_points"] ) - recommendations_summary["sap_difference"] = sap_target - recommendations_summary["expected_sap_points"] if property_details_df is not None: recommendations_summary = recommendations_summary.merge( - property_details_df[["uprn", "co2_emissions", "adjusted_energy_consumption", "energy_bill"]].rename( + property_details_df[ + ["uprn", "co2_emissions", "adjusted_energy_consumption", "energy_bill"] + ].rename( columns={ "id": "property_id", "co2_emissions": "current_co2", "adjusted_energy_consumption": "current_energy", - "energy_bill": "current_energy_bill" + "energy_bill": "current_energy_bill", } ), on="uprn", - how="left" + how="left", ) return recommendations_summary diff --git a/infrastructure/terraform/lambda/_template/README.md b/infrastructure/terraform/lambda/_template/README.md index a7282fc9..5bb10627 100644 --- a/infrastructure/terraform/lambda/_template/README.md +++ b/infrastructure/terraform/lambda/_template/README.md @@ -3,7 +3,7 @@ ### 1. Create the Lambda scaffold - Copy the template: - cp -r lambda/_template lambda/ + `cp -r lambda/_template lambda/` --- @@ -12,8 +12,7 @@ infrastructure/terraform/shared/main.tf -- Apply the shared stack - - This requires commenting 'if env.stage == "prod"' in .github/workflows/deploy_terraform.yml +- Create a PR to deploy this to main then dev in order to deploy the shared stack - Verify the ECR repository exists in AWS diff --git a/infrastructure/terraform/lambda/_template/main.tf b/infrastructure/terraform/lambda/_template/main.tf index 3010aa8a..c6015ea1 100644 --- a/infrastructure/terraform/lambda/_template/main.tf +++ b/infrastructure/terraform/lambda/_template/main.tf @@ -1,3 +1,30 @@ +# ============================================================================== +# TEMPLATE: Lambda Configuration with Optional S3 IAM Policy +# ============================================================================== +# Instructions: +# 1. Replace "REPLACE ME" with your lambda name (e.g., "my-lambda-name") +# 2. Add any additional environment variables as needed +# 3. To attach S3 IAM policies from shared state: +# - Uncomment the S3 policy attachment section below +# - Update the policy_arn to match the output from shared/main.tf +# - Available shared outputs (examples): +# - data.terraform_remote_state.shared.outputs.condition_etl_s3_read_arn +# - data.terraform_remote_state.shared.outputs.postcode_splitter_s3_read_arn +# 4. To create a NEW S3 policy: +# - Add a new module "lambda_s3_policy" in shared/main.tf using the +# s3_iam_policy module (see examples in shared/main.tf) +# - Then reference it here using data.terraform_remote_state.shared.outputs +# ============================================================================== + +data "terraform_remote_state" "shared" { + backend = "s3" + config = { + bucket = "assessment-model-terraform-state" + key = "env:/${var.stage}/terraform.tfstate" + region = "eu-west-2" + } +} + module "lambda" { source = "../modules/lambda_with_sqs" @@ -6,9 +33,35 @@ module "lambda" { image_uri = local.image_uri + # Optional: Set maximum_concurrency to limit concurrent SQS-triggered invocations (2-1000) + maximum_concurrency = var.maximum_concurrency + + batch_size = var.batch_size environment = { STAGE = var.stage LOG_LEVEL = "info" } } + +# ====================================================================== +# OPTIONAL: Attach S3 IAM policy to Lambda execution role +# ====================================================================== +# Uncomment and configure the resource below to attach S3 permissions +# +# Example 1: Attach existing policy from shared state +# resource "aws_iam_role_policy_attachment" "lambda_s3_policy" { +# role = module.lambda.role_name +# policy_arn = data.terraform_remote_state.shared.outputs.YOUR_POLICY_OUTPUT_NAME_arn +# } +# +# Example 2: Attach multiple policies +# resource "aws_iam_role_policy_attachment" "lambda_read_policy" { +# role = module.lambda.role_name +# policy_arn = data.terraform_remote_state.shared.outputs.postcode_splitter_s3_read_arn +# } +# +# resource "aws_iam_role_policy_attachment" "lambda_write_policy" { +# role = module.lambda.role_name +# policy_arn = data.terraform_remote_state.shared.outputs.another_policy_arn +# } diff --git a/infrastructure/terraform/lambda/_template/variables.tf b/infrastructure/terraform/lambda/_template/variables.tf index e4bab243..e7646811 100644 --- a/infrastructure/terraform/lambda/_template/variables.tf +++ b/infrastructure/terraform/lambda/_template/variables.tf @@ -17,6 +17,16 @@ variable "image_digest" { description = "Image digest (sha256:...)" } +variable "maximum_concurrency" { + type = number + default = null + description = "Maximum number of concurrent Lambda invocations from SQS (2-1000). null = no limit." +} + +variable "batch_size" { + type = number + default = 1 +} locals { image_uri = "${var.ecr_repo_url}@${var.image_digest}" diff --git a/infrastructure/terraform/lambda/address2UPRN/main.tf b/infrastructure/terraform/lambda/address2UPRN/main.tf index 46b193f2..2d185497 100644 --- a/infrastructure/terraform/lambda/address2UPRN/main.tf +++ b/infrastructure/terraform/lambda/address2UPRN/main.tf @@ -1,3 +1,19 @@ +data "terraform_remote_state" "shared" { + backend = "s3" + config = { + bucket = "assessment-model-terraform-state" + key = "env:/${var.stage}/terraform.tfstate" + region = "eu-west-2" + } +} +data "aws_secretsmanager_secret_version" "db_credentials" { + secret_id = "${var.stage}/assessment_model/db_credentials" +} + +locals { + db_credentials = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string) +} + module "address2uprn" { source = "../modules/lambda_with_sqs" @@ -6,9 +22,37 @@ module "address2uprn" { image_uri = local.image_uri + timeout = 900 - environment = { - STAGE = var.stage - LOG_LEVEL = "info" - } + # Optional: Set maximum_concurrency to limit concurrent SQS-triggered invocations (2-1000) + maximum_concurrency = var.maximum_concurrency + + environment = merge( + { + STAGE = var.stage + LOG_LEVEL = "info" + DB_USERNAME = local.db_credentials.db_assessment_model_username + DB_PASSWORD = local.db_credentials.db_assessment_model_password + GOOGLE_SOLAR_API_KEY = "test" + SAP_PREDICTIONS_BUCKET = "test" + CARBON_PREDICTIONS_BUCKET = "test" + HEAT_PREDICTIONS_BUCKET = "test" + HEATING_KWH_PREDICTIONS_BUCKET = "test" + HOTWATER_KWH_PREDICTIONS_BUCKET = "test" + API_KEY = "test" + ENVIRONMENT = "test" + SECRET_KEY = "test" + PLAN_TRIGGER_BUCKET = "test" + DATA_BUCKET = "test" + ENGINE_SQS_URL = "test" + ENERGY_ASSESSMENTS_BUCKET = "test" + S3_BUCKET_NAME = data.terraform_remote_state.shared.outputs.retrofit_sap_data_bucket_name + }, + ) } + +# Attach S3 read policy to the Lambda execution role +resource "aws_iam_role_policy_attachment" "address2uprn_read_and_write" { + role = module.address2uprn.role_name + policy_arn = data.terraform_remote_state.shared.outputs.address_2_uprn_s3_read_and_write_arn +} \ No newline at end of file diff --git a/infrastructure/terraform/lambda/address2UPRN/outputs.tf b/infrastructure/terraform/lambda/address2UPRN/outputs.tf new file mode 100644 index 00000000..e4645a0a --- /dev/null +++ b/infrastructure/terraform/lambda/address2UPRN/outputs.tf @@ -0,0 +1,14 @@ +output "address2uprn_queue_url" { + value = module.address2uprn.queue_url + description = "URL of the address2UPRN SQS queue" +} + +output "address2uprn_queue_arn" { + value = module.address2uprn.queue_arn + description = "ARN of the address2UPRN SQS queue" +} + +output "address2uprn_lambda_arn" { + value = module.address2uprn.lambda_arn + description = "ARN of the address2UPRN Lambda function" +} diff --git a/infrastructure/terraform/lambda/address2UPRN/variables.tf b/infrastructure/terraform/lambda/address2UPRN/variables.tf index e4bab243..347964de 100644 --- a/infrastructure/terraform/lambda/address2UPRN/variables.tf +++ b/infrastructure/terraform/lambda/address2UPRN/variables.tf @@ -17,6 +17,11 @@ variable "image_digest" { description = "Image digest (sha256:...)" } +variable "maximum_concurrency" { + type = number + default = 10 # null if you don't want to set it for this handler + description = "Maximum number of concurrent Lambda invocations from SQS (2-1000). null = no limit." +} locals { image_uri = "${var.ecr_repo_url}@${var.image_digest}" diff --git a/infrastructure/terraform/lambda/categorisation/main.tf b/infrastructure/terraform/lambda/categorisation/main.tf new file mode 100644 index 00000000..b7193da4 --- /dev/null +++ b/infrastructure/terraform/lambda/categorisation/main.tf @@ -0,0 +1,37 @@ +data "aws_secretsmanager_secret_version" "db_credentials" { + secret_id = "${var.stage}/assessment_model/db_credentials" +} + +data "terraform_remote_state" "shared" { + backend = "s3" + config = { + bucket = "assessment-model-terraform-state" + key = "env:/${var.stage}/terraform.tfstate" + region = "eu-west-2" + } +} + +locals { + db_credentials = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string) +} + +module "lambda" { + source = "../modules/lambda_with_sqs" + + name = "categorisation" + stage = var.stage + image_uri = local.image_uri + maximum_concurrency = var.maximum_concurrency + batch_size = var.batch_size + + timeout = 120 + + environment = merge( + { + STAGE = var.stage + LOG_LEVEL = "info" + DB_USERNAME = local.db_credentials.db_assessment_model_username + DB_PASSWORD = local.db_credentials.db_assessment_model_password + } + ) +} \ No newline at end of file diff --git a/infrastructure/terraform/lambda/categorisation/provider.tf b/infrastructure/terraform/lambda/categorisation/provider.tf new file mode 100644 index 00000000..fe497c81 --- /dev/null +++ b/infrastructure/terraform/lambda/categorisation/provider.tf @@ -0,0 +1,16 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 4.16" + } + } + + backend "s3" { + bucket = "categorisation-terraform-state" + key = "terraform.tfstate" + region = "eu-west-2" + } + + required_version = ">= 1.2.0" +} \ No newline at end of file diff --git a/infrastructure/terraform/lambda/categorisation/variables.tf b/infrastructure/terraform/lambda/categorisation/variables.tf new file mode 100644 index 00000000..23a78875 --- /dev/null +++ b/infrastructure/terraform/lambda/categorisation/variables.tf @@ -0,0 +1,37 @@ +variable "lambda_name" { + type = string + description = "Logical name of the lambda (e.g. address2uprn)" +} + +variable "stage" { + description = "Deployment stage (e.g. dev, prod)" + type = string +} +variable "ecr_repo_url" { + type = string + description = "ECR repository URL (no tag, no digest)" +} + +variable "image_digest" { + type = string + description = "Image digest (sha256:...)" +} + +variable "maximum_concurrency" { + type = number + default = 10 # null if you don't want to set it for this handler + description = "Maximum number of concurrent Lambda invocations from SQS (2-1000). null = no limit." +} + +variable "batch_size" { + type = number + default = 2 +} + +locals { + image_uri = "${var.ecr_repo_url}@${var.image_digest}" +} + +output "resolved_image_uri" { + value = local.image_uri +} diff --git a/infrastructure/terraform/lambda/condition-etl/main.tf b/infrastructure/terraform/lambda/condition-etl/main.tf index 4219f209..0128f975 100644 --- a/infrastructure/terraform/lambda/condition-etl/main.tf +++ b/infrastructure/terraform/lambda/condition-etl/main.tf @@ -23,7 +23,6 @@ module "lambda" { stage = var.stage image_uri = local.image_uri - timeout = 180 environment = merge( diff --git a/infrastructure/terraform/lambda/engine/main.tf b/infrastructure/terraform/lambda/engine/main.tf new file mode 100644 index 00000000..9d44c9ed --- /dev/null +++ b/infrastructure/terraform/lambda/engine/main.tf @@ -0,0 +1,74 @@ +data "terraform_remote_state" "shared" { + backend = "s3" + config = { + bucket = "assessment-model-terraform-state" + key = "env:/${var.stage}/terraform.tfstate" + region = "eu-west-2" + } +} + +data "aws_secretsmanager_secret_version" "db_credentials" { + secret_id = "${var.stage}/assessment_model/db_credentials" +} + +locals { + db_credentials = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string) +} + + +module "lambda" { + source = "../modules/lambda_with_sqs" + + name = "engine" + stage = var.stage + + image_uri = local.image_uri + + # Optional: Set maximum_concurrency to limit concurrent SQS-triggered invocations (2-1000) + maximum_concurrency = var.maximum_concurrency + + environment = merge( + { + STAGE = var.stage + LOG_LEVEL = "info" + + # DB from Secrets Manager + DB_USERNAME = local.db_credentials.db_assessment_model_username + DB_PASSWORD = local.db_credentials.db_assessment_model_password + + # Secrets from GitHub + DB_HOST = var.db_host + DB_NAME = var.db_name + DB_PORT = var.db_port + API_KEY = var.api_key + SECRET_KEY = var.secret_key + DOMAIN_NAME = var.domain_name + EPC_AUTH_TOKEN = var.epc_auth_token + GOOGLE_SOLAR_API_KEY = var.google_solar_api_key + + # Buckets - from terraform state + PLAN_TRIGGER_BUCKET = data.terraform_remote_state.shared.outputs.retrofit_plan_trigger_bucket_name + DATA_BUCKET = data.terraform_remote_state.shared.outputs.retrofit_sap_data_bucket_name + SAP_PREDICTIONS_BUCKET = data.terraform_remote_state.shared.outputs.retrofit_sap_predictions_bucket_name + CARBON_PREDICTIONS_BUCKET = data.terraform_remote_state.shared.outputs.retrofit_carbon_predictions_bucket_name + HEAT_PREDICTIONS_BUCKET = data.terraform_remote_state.shared.outputs.retrofit_heat_predictions_bucket_name + HEATING_KWH_PREDICTIONS_BUCKET = data.terraform_remote_state.shared.outputs.retrofit_heating_kwh_predictions_bucket_name + HOTWATER_KWH_PREDICTIONS_BUCKET = data.terraform_remote_state.shared.outputs.retrofit_hotwater_kwh_predictions_bucket_name + ENERGY_ASSESSMENTS_BUCKET = data.terraform_remote_state.shared.outputs.retrofit_energy_assessments_bucket_name + + # SQS + ENGINE_SQS_URL = "test" # Not actually needed by engine, only to satisfy Settings + + # Deployment + ECR_URI = var.ecr_repo_url + GITHUB_SHA = var.image_digest + } + ) +} + +### Policies and IAM +# S3 +resource "aws_iam_role_policy_attachment" "engine_s3_read_and_write" { + role = module.lambda.role_name + policy_arn = data.terraform_remote_state.shared.outputs.engine_s3_read_and_write_arn +} \ No newline at end of file diff --git a/infrastructure/terraform/lambda/engine/provider.tf b/infrastructure/terraform/lambda/engine/provider.tf new file mode 100644 index 00000000..2895d039 --- /dev/null +++ b/infrastructure/terraform/lambda/engine/provider.tf @@ -0,0 +1,16 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 4.16" + } + } + + backend "s3" { + bucket = "ara-engine-terraform-state" + key = "terraform.tfstate" + region = "eu-west-2" + } + + required_version = ">= 1.2.0" +} \ No newline at end of file diff --git a/infrastructure/terraform/lambda/engine/variables.tf b/infrastructure/terraform/lambda/engine/variables.tf new file mode 100644 index 00000000..9805d409 --- /dev/null +++ b/infrastructure/terraform/lambda/engine/variables.tf @@ -0,0 +1,71 @@ +variable "lambda_name" { + type = string + description = "Logical name of the lambda (e.g. address2uprn)" +} + +variable "stage" { + description = "Deployment stage (e.g. dev, prod)" + type = string +} +variable "ecr_repo_url" { + type = string + description = "ECR repository URL (no tag, no digest)" +} + +variable "image_digest" { + type = string + description = "Image digest (sha256:...)" +} + +variable "maximum_concurrency" { + type = number + default = 12 + description = "Maximum number of concurrent Lambda invocations from SQS (2-1000). null = no limit." +} + +variable "db_host" { + type = string + sensitive = true +} + +variable "db_name" { + type = string + sensitive = true +} + +variable "db_port" { + type = string + sensitive = true +} + +variable "api_key" { + type = string + sensitive = true +} + +variable "secret_key" { + type = string + sensitive = true +} + +variable "domain_name" { + type = string +} + +variable "epc_auth_token" { + type = string + sensitive = true +} + +variable "google_solar_api_key" { + type = string + sensitive = true +} + +locals { + image_uri = "${var.ecr_repo_url}@${var.image_digest}" +} + +output "resolved_image_uri" { + value = local.image_uri +} \ No newline at end of file diff --git a/infrastructure/terraform/lambda/modules/lambda_with_sqs/main.tf b/infrastructure/terraform/lambda/modules/lambda_with_sqs/main.tf index 065fb790..74345d24 100644 --- a/infrastructure/terraform/lambda/modules/lambda_with_sqs/main.tf +++ b/infrastructure/terraform/lambda/modules/lambda_with_sqs/main.tf @@ -44,5 +44,6 @@ module "sqs_trigger" { lambda_role_name = module.role.role_name queue_arn = module.queue.queue_arn - batch_size = var.batch_size + batch_size = var.batch_size + maximum_concurrency = var.maximum_concurrency } diff --git a/infrastructure/terraform/lambda/modules/lambda_with_sqs/outputs.tf b/infrastructure/terraform/lambda/modules/lambda_with_sqs/outputs.tf index afc9246d..b408593f 100644 --- a/infrastructure/terraform/lambda/modules/lambda_with_sqs/outputs.tf +++ b/infrastructure/terraform/lambda/modules/lambda_with_sqs/outputs.tf @@ -9,3 +9,4 @@ output "queue_arn" { output "queue_url" { value = module.queue.queue_url } + diff --git a/infrastructure/terraform/lambda/modules/lambda_with_sqs/variables.tf b/infrastructure/terraform/lambda/modules/lambda_with_sqs/variables.tf index b20ab2a8..7c2832d2 100644 --- a/infrastructure/terraform/lambda/modules/lambda_with_sqs/variables.tf +++ b/infrastructure/terraform/lambda/modules/lambda_with_sqs/variables.tf @@ -34,3 +34,9 @@ variable "batch_size" { type = number default = 10 } + +variable "maximum_concurrency" { + type = number + default = null + description = "Maximum number of concurrent Lambda invocations from SQS. null = no limit." +} diff --git a/infrastructure/terraform/lambda/postcodeSplitter/main.tf b/infrastructure/terraform/lambda/postcodeSplitter/main.tf index ebbdbfdc..d37a01c9 100644 --- a/infrastructure/terraform/lambda/postcodeSplitter/main.tf +++ b/infrastructure/terraform/lambda/postcodeSplitter/main.tf @@ -1,3 +1,30 @@ +data "terraform_remote_state" "shared" { + backend = "s3" + config = { + bucket = "assessment-model-terraform-state" + key = "env:/${var.stage}/terraform.tfstate" + region = "eu-west-2" + } +} +data "aws_secretsmanager_secret_version" "db_credentials" { + secret_id = "${var.stage}/assessment_model/db_credentials" +} + + +locals { + db_credentials = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string) +} + +# Reference the existing address2UPRN Lambda outputs from address2uprn state +data "terraform_remote_state" "address2uprn" { + backend = "s3" + config = { + bucket = "address2uprn-terraform-state" + key = "env:/${var.stage}/terraform.tfstate" + region = "eu-west-2" + } +} + module "lambda" { source = "../modules/lambda_with_sqs" @@ -7,8 +34,56 @@ module "lambda" { image_uri = local.image_uri - environment = { - STAGE = var.stage - LOG_LEVEL = "info" - } + environment = merge( + { + STAGE = var.stage + LOG_LEVEL = "info" + DB_USERNAME = local.db_credentials.db_assessment_model_username + DB_PASSWORD = local.db_credentials.db_assessment_model_password + GOOGLE_SOLAR_API_KEY = "test" + SAP_PREDICTIONS_BUCKET = "test" + CARBON_PREDICTIONS_BUCKET = "test" + HEAT_PREDICTIONS_BUCKET = "test" + HEATING_KWH_PREDICTIONS_BUCKET = "test" + HOTWATER_KWH_PREDICTIONS_BUCKET = "test" + API_KEY = "test" + ENVIRONMENT = "test" + SECRET_KEY = "test" + PLAN_TRIGGER_BUCKET = "test" + DATA_BUCKET = "test" + EPC_AUTH_TOKEN = "test" + ENGINE_SQS_URL = "test" + ENERGY_ASSESSMENTS_BUCKET = "test" + ADDRESS2UPRN_QUEUE_URL = data.terraform_remote_state.address2uprn.outputs.address2uprn_queue_url + S3_BUCKET_NAME = data.terraform_remote_state.shared.outputs.retrofit_sap_data_bucket_name + }, + ) } + +# Attach S3 read policy to the Lambda execution role +resource "aws_iam_role_policy_attachment" "postcode_splitter_s3_read" { + role = module.lambda.role_name + policy_arn = data.terraform_remote_state.shared.outputs.postcode_splitter_s3_read_arn +} + +# Create SQS send policy for address2UPRN queue +module "postcode_splitter_sqs_policy" { + source = "../../modules/general_iam_policy" + + policy_name = "postcode-splitter-sqs-send-${var.stage}" + policy_description = "Allow postcode-splitter Lambda to send messages to address2UPRN queue" + + actions = [ + "sqs:SendMessage" + ] + + resources = [ + data.terraform_remote_state.address2uprn.outputs.address2uprn_queue_arn + ] +} + +# Attach SQS policy to the Lambda execution role +resource "aws_iam_role_policy_attachment" "postcode_splitter_sqs_send" { + role = module.lambda.role_name + policy_arn = module.postcode_splitter_sqs_policy.policy_arn +} \ No newline at end of file diff --git a/infrastructure/terraform/lambda/postcodeSplitter/variables.tf b/infrastructure/terraform/lambda/postcodeSplitter/variables.tf index 9ce45fa5..7bd68543 100644 --- a/infrastructure/terraform/lambda/postcodeSplitter/variables.tf +++ b/infrastructure/terraform/lambda/postcodeSplitter/variables.tf @@ -24,3 +24,12 @@ locals { output "resolved_image_uri" { value = local.image_uri } + + + + + + + + + diff --git a/infrastructure/terraform/modules/general_iam_policy/main.tf b/infrastructure/terraform/modules/general_iam_policy/main.tf new file mode 100644 index 00000000..f7ffe4a1 --- /dev/null +++ b/infrastructure/terraform/modules/general_iam_policy/main.tf @@ -0,0 +1,21 @@ +# IAM Policy with dynamic actions and resources +resource "aws_iam_policy" "policy" { + name = var.policy_name + description = var.policy_description + + policy = jsonencode({ + Version = "2012-10-17" + Statement = [ + merge( + { + Effect = "Allow" + Action = var.actions + Resource = var.resources + }, + var.conditions != null ? { Condition = var.conditions } : {} + ) + ] + }) + + tags = var.tags +} diff --git a/infrastructure/terraform/modules/general_iam_policy/outputs.tf b/infrastructure/terraform/modules/general_iam_policy/outputs.tf new file mode 100644 index 00000000..cfceab05 --- /dev/null +++ b/infrastructure/terraform/modules/general_iam_policy/outputs.tf @@ -0,0 +1,9 @@ +output "policy_arn" { + value = aws_iam_policy.policy.arn + description = "ARN of the created IAM policy" +} + +output "policy_name" { + value = aws_iam_policy.policy.name + description = "Name of the created IAM policy" +} diff --git a/infrastructure/terraform/modules/general_iam_policy/variables.tf b/infrastructure/terraform/modules/general_iam_policy/variables.tf new file mode 100644 index 00000000..0d824eb5 --- /dev/null +++ b/infrastructure/terraform/modules/general_iam_policy/variables.tf @@ -0,0 +1,32 @@ +variable "policy_name" { + description = "Name of the IAM policy" + type = string +} + +variable "policy_description" { + description = "Description of the IAM policy" + type = string + default = "" +} + +variable "actions" { + description = "List of IAM actions allowed by this policy" + type = list(string) +} + +variable "resources" { + description = "List of AWS resources this policy applies to" + type = list(string) +} + +variable "conditions" { + description = "Optional IAM policy conditions" + type = any + default = null +} + +variable "tags" { + description = "Tags to apply to the policy" + type = map(string) + default = {} +} diff --git a/infrastructure/terraform/modules/lambda_execution_role/main.tf b/infrastructure/terraform/modules/lambda_execution_role/main.tf index fa657afd..e593b17c 100644 --- a/infrastructure/terraform/modules/lambda_execution_role/main.tf +++ b/infrastructure/terraform/modules/lambda_execution_role/main.tf @@ -19,19 +19,3 @@ resource "aws_iam_role_policy_attachment" "basic_logs" { policy_arn = "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole" } -resource "aws_iam_role_policy" "ecr_pull" { - role = aws_iam_role.this.name - - policy = jsonencode({ - Version = "2012-10-17" - Statement = [{ - Effect = "Allow" - Action = [ - "ecr:GetAuthorizationToken", - "ecr:BatchGetImage", - "ecr:GetDownloadUrlForLayer" - ] - Resource = "*" - }] - }) -} diff --git a/infrastructure/terraform/modules/lambda_sqs_trigger/main.tf b/infrastructure/terraform/modules/lambda_sqs_trigger/main.tf index 5919e10f..4afaf773 100644 --- a/infrastructure/terraform/modules/lambda_sqs_trigger/main.tf +++ b/infrastructure/terraform/modules/lambda_sqs_trigger/main.tf @@ -3,6 +3,13 @@ resource "aws_lambda_event_source_mapping" "this" { function_name = var.lambda_arn batch_size = var.batch_size enabled = true + + dynamic "scaling_config" { + for_each = var.maximum_concurrency != null ? [1] : [] + content { + maximum_concurrency = var.maximum_concurrency + } + } } resource "aws_iam_role_policy" "allow_sqs" { diff --git a/infrastructure/terraform/modules/lambda_sqs_trigger/variables.tf b/infrastructure/terraform/modules/lambda_sqs_trigger/variables.tf index 0e50cd54..c3127c74 100644 --- a/infrastructure/terraform/modules/lambda_sqs_trigger/variables.tf +++ b/infrastructure/terraform/modules/lambda_sqs_trigger/variables.tf @@ -6,3 +6,9 @@ variable "batch_size" { type = number default = 10 } + +variable "maximum_concurrency" { + type = number + default = null + description = "Maximum number of concurrent Lambda invocations from SQS. null = no limit." +} diff --git a/infrastructure/terraform/modules/s3_iam_policy/main.tf b/infrastructure/terraform/modules/s3_iam_policy/main.tf new file mode 100644 index 00000000..397bd963 --- /dev/null +++ b/infrastructure/terraform/modules/s3_iam_policy/main.tf @@ -0,0 +1,31 @@ +# Dynamically build S3 resources list from bucket ARNs and resource paths +locals { + # Generate full resource ARNs by combining bucket ARNs with resource paths + resources = flatten([ + for bucket_arn in var.bucket_arns : [ + for path in var.resource_paths : "${bucket_arn}${path}" + ] + ]) +} + +# IAM Policy with dynamic actions and resources +resource "aws_iam_policy" "s3_policy" { + name = var.policy_name + description = var.policy_description + + policy = jsonencode({ + Version = "2012-10-17" + Statement = [ + merge( + { + Effect = "Allow" + Action = var.actions + Resource = local.resources + }, + var.conditions != null ? { Condition = var.conditions } : {} + ) + ] + }) + + tags = var.tags +} diff --git a/infrastructure/terraform/modules/s3_iam_policy/outputs.tf b/infrastructure/terraform/modules/s3_iam_policy/outputs.tf new file mode 100644 index 00000000..85defd9c --- /dev/null +++ b/infrastructure/terraform/modules/s3_iam_policy/outputs.tf @@ -0,0 +1,14 @@ +output "policy_arn" { + description = "ARN of the S3 IAM policy" + value = aws_iam_policy.s3_policy.arn +} + +output "policy_name" { + description = "Name of the S3 IAM policy" + value = aws_iam_policy.s3_policy.name +} + +output "policy_id" { + description = "ID of the S3 IAM policy" + value = aws_iam_policy.s3_policy.id +} diff --git a/infrastructure/terraform/modules/s3_iam_policy/variables.tf b/infrastructure/terraform/modules/s3_iam_policy/variables.tf new file mode 100644 index 00000000..e2b3d7a8 --- /dev/null +++ b/infrastructure/terraform/modules/s3_iam_policy/variables.tf @@ -0,0 +1,42 @@ +variable "policy_name" { + description = "Name of the IAM policy" + type = string +} + +variable "policy_description" { + description = "Description of the IAM policy" + type = string + default = "" +} + +variable "bucket_arns" { + description = "List of S3 bucket ARNs to grant access to" + type = list(string) +} + +variable "actions" { + description = "List of S3 actions to allow (e.g., ['s3:GetObject'], ['s3:PutObject'], ['s3:DeleteObject'])" + type = list(string) + default = ["s3:GetObject"] +} + +variable "resource_paths" { + description = "List of resource paths within buckets (e.g., ['/*'] for all objects, ['/specific-prefix/*'] for specific prefix)" + type = list(string) + default = ["/*"] +} + +variable "conditions" { + description = "Optional IAM policy conditions to apply to the statement" + type = any + default = null +} + +variable "tags" { + description = "Tags to apply to the policy" + type = map(string) + default = {} +} + + + diff --git a/infrastructure/terraform/modules/ses/main.tf b/infrastructure/terraform/modules/ses/main.tf index e8f183ae..cb7f9087 100644 --- a/infrastructure/terraform/modules/ses/main.tf +++ b/infrastructure/terraform/modules/ses/main.tf @@ -12,6 +12,35 @@ resource "aws_iam_user" "ses_user" { name = "${var.stage}-ses-user" } +# SES configuration set for tracking events +resource "aws_ses_configuration_set" "this" { + name = "${var.stage}-ses-config" +} + +# SNS topic for SES event notifications +resource "aws_sns_topic" "ses_events" { + name = "${var.stage}-ses-events" +} + +# SES event destination for debugging +resource "aws_ses_event_destination" "sns" { + name = "ses-event-destination" + configuration_set_name = aws_ses_configuration_set.this.name + enabled = true + + matching_types = [ + "send", + "bounce", + "reject", + "complaint", + "delivery" + ] + + sns_destination { + topic_arn = aws_sns_topic.ses_events.arn + } +} + resource "aws_iam_user_policy" "ses_send_policy" { name = "AllowSESSendEmail" user = aws_iam_user.ses_user.name @@ -20,8 +49,8 @@ resource "aws_iam_user_policy" "ses_send_policy" { Version = "2012-10-17" Statement = [ { - Effect = "Allow" - Action = [ + Effect = "Allow" + Action = [ "ses:SendEmail", "ses:SendRawEmail" ] @@ -42,9 +71,9 @@ resource "aws_secretsmanager_secret" "ses_smtp" { } resource "aws_secretsmanager_secret_version" "ses_smtp" { - secret_id = aws_secretsmanager_secret.ses_smtp.id + secret_id = aws_secretsmanager_secret.ses_smtp.id secret_string = jsonencode({ username = aws_iam_access_key.ses_user.id password = aws_iam_access_key.ses_user.ses_smtp_password_v4 }) -} \ No newline at end of file +} diff --git a/infrastructure/terraform/modules/ses/variables.tf b/infrastructure/terraform/modules/ses/variables.tf index d8c97d6d..26d63b82 100644 --- a/infrastructure/terraform/modules/ses/variables.tf +++ b/infrastructure/terraform/modules/ses/variables.tf @@ -7,3 +7,4 @@ variable "stage" { description = "Deployment stage (e.g. dev, prod)" type = string } + diff --git a/infrastructure/terraform/modules/sqs_queue/main.tf b/infrastructure/terraform/modules/sqs_queue/main.tf index 580e67bd..afb7dc27 100644 --- a/infrastructure/terraform/modules/sqs_queue/main.tf +++ b/infrastructure/terraform/modules/sqs_queue/main.tf @@ -5,7 +5,7 @@ resource "aws_sqs_queue" "dlq" { resource "aws_sqs_queue" "this" { name = var.name - visibility_timeout_seconds = 120 + visibility_timeout_seconds = 1000 redrive_policy = jsonencode({ deadLetterTargetArn = aws_sqs_queue.dlq.arn diff --git a/infrastructure/terraform/modules/sqs_queue/variables.tf b/infrastructure/terraform/modules/sqs_queue/variables.tf index 943a7a16..95b33231 100644 --- a/infrastructure/terraform/modules/sqs_queue/variables.tf +++ b/infrastructure/terraform/modules/sqs_queue/variables.tf @@ -2,5 +2,5 @@ variable "name" { type = string } variable "max_receive_count" { type = number - default = 5 + default = 1 } diff --git a/infrastructure/terraform/shared/main.tf b/infrastructure/terraform/shared/main.tf index b1474055..c19e3a0c 100644 --- a/infrastructure/terraform/shared/main.tf +++ b/infrastructure/terraform/shared/main.tf @@ -85,6 +85,13 @@ resource "aws_db_instance" "default" { apply_immediately = true # Set up storage type to gp3 for better performance storage_type = "gp3" + + # Automated backups configuration + backup_retention_period = 14 + backup_window = "03:00-04:00" + maintenance_window = "Sun:02:00-Sun:02:30" + copy_tags_to_snapshot = true + deletion_protection = true } # Set up the bucket that recieve the csv uploads of epc to be retrofit @@ -95,6 +102,11 @@ module "s3_presignable_bucket" { allowed_origins = var.allowed_origins } +output "retrofit_plan_trigger_bucket_name" { + value = module.s3_presignable_bucket.bucket_name + description = "Name of the retrofit plan trigger bucket" +} + module "s3_due_considerations_bucket" { source = "../modules/s3_presignable_bucket" bucketname = "retrofit-due-considerations-${var.stage}" @@ -127,24 +139,44 @@ module "retrofit_sap_predictions" { allowed_origins = var.allowed_origins } +output "retrofit_sap_predictions_bucket_name" { + value = module.retrofit_sap_predictions.bucket_name + description = "Name of the retrofit SAP predictions bucket" +} + module "retrofit_sap_data" { source = "../modules/s3" bucketname = "retrofit-data-${var.stage}" allowed_origins = var.allowed_origins } +output "retrofit_sap_data_bucket_name" { + value = module.retrofit_sap_data.bucket_name + description = "Name of the retrofit SAP data bucket" +} + module "retrofit_carbon_predictions" { source = "../modules/s3" bucketname = "retrofit-carbon-predictions-${var.stage}" allowed_origins = var.allowed_origins } +output "retrofit_carbon_predictions_bucket_name" { + value = module.retrofit_carbon_predictions.bucket_name + description = "Name of the retrofit carbon predictions bucket" +} + module "retrofit_heat_predictions" { source = "../modules/s3" bucketname = "retrofit-heat-predictions-${var.stage}" allowed_origins = var.allowed_origins } +output "retrofit_heat_predictions_bucket_name" { + value = module.retrofit_heat_predictions.bucket_name + description = "Name of the retrofit heat predictions bucket" +} + module "retrofit_lighting_cost_predictions" { source = "../modules/s3" bucketname = "retrofit-lighting-cost-predictions-${var.stage}" @@ -169,12 +201,22 @@ module "retrofit_heating_kwh_predictions" { allowed_origins = var.allowed_origins } +output "retrofit_heating_kwh_predictions_bucket_name" { + value = module.retrofit_heating_kwh_predictions.bucket_name + description = "Name of the retrofit heating kWh predictions bucket" +} + module "retrofit_hotwater_kwh_predictions" { source = "../modules/s3" bucketname = "retrofit-hotwater-kwh-predictions-${var.stage}" allowed_origins = var.allowed_origins } +output "retrofit_hotwater_kwh_predictions_bucket_name" { + value = module.retrofit_hotwater_kwh_predictions.bucket_name + description = "Name of the retrofit hotwater kWh predictions bucket" +} + module "retrofit_sap_baseline_predictions" { source = "../modules/s3" bucketname = "retrofit-sap-baseline-predictions-${var.stage}" @@ -189,6 +231,11 @@ module "retrofit_energy_assessments" { environment = var.stage } +output "retrofit_energy_assessments_bucket_name" { + value = module.retrofit_energy_assessments.bucket_name + description = "Name of the retrofit energy assessments bucket" +} + # Set up the route53 record for the API module "route53" { source = "../modules/route53" @@ -305,6 +352,21 @@ module "address2uprn_registry" { } +# S3 policy for postcode splitter to read from retrofit data bucket +module "address2uprn_s3_read_and_write" { + source = "../modules/s3_iam_policy" + + policy_name = "Address2UPRNReadandWriteS3" + policy_description = "Allow address2uprn Lambda to read and write from retrofit-data bucket" + bucket_arns = ["arn:aws:s3:::retrofit-data-${var.stage}"] + actions = ["s3:GetObject", "s3:ListBucket", "s3:PutObject"] + resource_paths = ["/*"] +} + +output "address_2_uprn_s3_read_and_write_arn" { + value = module.address2uprn_s3_read_and_write.policy_arn +} + ################################################ # Condition ETL – Lambda ECR ################################################ @@ -321,6 +383,28 @@ module "condition_etl_registry" { } +# Condition Data S3 Bucket to store initial data +module "condition_data_bucket" { + source = "../modules/s3" + bucketname = "condition-data-${var.stage}" + allowed_origins = var.allowed_origins +} + +module "condition_etl_s3_read" { + source = "../modules/s3_iam_policy" + + policy_name = "ConditionETLReadS3" + policy_description = "Allow Lambda to read objects from condition-data-${var.stage}" + bucket_arns = ["arn:aws:s3:::condition-data-${var.stage}"] + actions = ["s3:GetObject"] + resource_paths = ["/*"] +} + +output "condition_etl_s3_read_arn" { + value = module.condition_etl_s3_read.policy_arn +} + + ################################################ # Postcode Splitter – Lambda ECR ################################################ @@ -337,30 +421,71 @@ module "postcode_splitter_registry" { } -################################################ -# Conidition data – S3 bucket -################################################ -module "condition_data_bucket" { - source = "../modules/s3" - bucketname = "condition-data-${var.stage}" - allowed_origins = var.allowed_origins +# S3 policy for postcode splitter to read from retrofit data bucket +module "postcode_splitter_s3_read" { + source = "../modules/s3_iam_policy" + + policy_name = "PostcodeSplitterReadS3" + policy_description = "Allow postcode splitter Lambda to read from retrofit-data bucket" + bucket_arns = ["arn:aws:s3:::retrofit-data-${var.stage}"] + actions = ["s3:GetObject", "s3:ListBucket", "s3:PutObject"] + resource_paths = ["/*"] } -resource "aws_iam_policy" "condition_etl_s3_read" { - name = "ConditionETLReadS3" - description = "Allow Lambda to read objects from condition-data-${var.stage}" - policy = jsonencode({ - Version = "2012-10-17" - Statement = [ - { - Effect = "Allow" - Action = ["s3:GetObject"] - Resource = "arn:aws:s3:::condition-data-${var.stage}/*" - } - ] - }) +output "postcode_splitter_s3_read_arn" { + value = module.postcode_splitter_s3_read.policy_arn } -output "condition_etl_s3_read_arn" { - value = aws_iam_policy.condition_etl_s3_read.arn -} \ No newline at end of file +################################################ +# Categorisation – Lambda ECR +################################################ +module "categorisation_state_bucket" { + source = "../modules/tf_state_bucket" + bucket_name = "categorisation-terraform-state" + +} + +module "categorisation_registry" { + source = "../modules/container_registry" + name = "categorisation" + stage = var.stage +} + +################################################ +# Engine – Lambda ECR +################################################ +module "engine_state_bucket" { + source = "../modules/tf_state_bucket" + bucket_name = "ara-engine-terraform-state" + +} + +module "engine_registry" { + source = "../modules/container_registry" + name = "engine" + stage = var.stage +} + +# S3 policy for Engine to read and write from various S3 buckets +module "engine_s3_read_and_write" { + source = "../modules/s3_iam_policy" + + policy_name = "EngineReadandWriteS3" + policy_description = "Allow Engine Lambda to read from and write to various S3 buckets" + bucket_arns = [ + "arn:aws:s3:::${module.s3_presignable_bucket.bucket_name}", + "arn:aws:s3:::${module.retrofit_sap_data.bucket_name}", + "arn:aws:s3:::${module.retrofit_sap_predictions.bucket_name}", + "arn:aws:s3:::${module.retrofit_carbon_predictions.bucket_name}", + "arn:aws:s3:::${module.retrofit_heat_predictions.bucket_name}", + "arn:aws:s3:::${module.retrofit_heating_kwh_predictions.bucket_name}", + "arn:aws:s3:::${module.retrofit_hotwater_kwh_predictions.bucket_name}", + "arn:aws:s3:::${module.retrofit_energy_assessments.bucket_name}" + ] + actions = ["s3:*"] + resource_paths = ["/*"] +} + +output "engine_s3_read_and_write_arn" { + value = module.engine_s3_read_and_write.policy_arn +} diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..72ec3f0c --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,3 @@ +[tool.pyright] +reportUnknownMemberType = false +reportUnknownVariableType = false \ No newline at end of file diff --git a/pyrightconfig.json b/pyrightconfig.json new file mode 100644 index 00000000..d4e0e2a4 --- /dev/null +++ b/pyrightconfig.json @@ -0,0 +1,8 @@ +{ + "typeCheckingMode": "strict", + "venvPath": "/Users/khalimconn-kowlessar/opt/anaconda3/envs/", + "venv": "Fastapi-backend", + "include": [ + "." + ] +} \ No newline at end of file diff --git a/pytest.ini b/pytest.ini index ee203d46..608d5e0c 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,4 +1,6 @@ [pytest] pythonpath = . +log_cli = true +log_cli_level = INFO addopts = --cov-report term-missing --cov=etl/epc --cov=recommendations --cov=backend --cov=etl/epc_clean --cov=etl/spatial -testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests backend/address2UPRN/tests backend/onboarders/tests +testpaths = recommendations/tests backend/tests etl/epc/tests etl/epc_clean/tests etl/spatial/tests backend/condition/tests backend/address2UPRN/tests backend/onboarders/tests backend/categorisation/tests backend/export/tests diff --git a/recommendations/Recommendations.py b/recommendations/Recommendations.py index acd49e05..80cc06b4 100644 --- a/recommendations/Recommendations.py +++ b/recommendations/Recommendations.py @@ -499,8 +499,16 @@ class Recommendations: return predicted_appliances_cost_reduction, predicted_appliances_kwh_reduction @staticmethod - def _check_ventilation_out_of_bounds(sap_impact, ventilation_sap_limit): - return (sap_impact < ventilation_sap_limit) or (sap_impact >= 0) + def _check_ventilation_out_of_bounds(sap_impact: float, ventilation_sap_limit: float) -> bool: + """ + Checks if the SAP impact of a ventilation recommendation is out of bounds, which would indicate that the + recommendation is not appropriate. + :param sap_impact: The SAP impact of the ventilation recommendation, which is typically negative or zero + :param ventilation_sap_limit: The SAP limit for ventilation recommendations, which is typically a negative + number. E.g. -4 + :return: + """ + return (sap_impact < ventilation_sap_limit) or (sap_impact > 0) @staticmethod def _adjust_ventilation_sap(sap_impact, ventilation_sap_limit): @@ -574,6 +582,7 @@ class Recommendations: if rec_phase == starting_phase: return { "sap": float(property_instance.data["current-energy-efficiency"]), + "sap_prediction": float(property_instance.data["current-energy-efficiency"]), "carbon": float(property_instance.data["co2-emissions-current"]), "heat_demand": float(property_instance.data["energy-consumption-current"]), } @@ -591,12 +600,13 @@ class Recommendations: if not previous_phase_reps: return { "sap": float(property_instance.data["current-energy-efficiency"]), + "sap_prediction": float(property_instance.data["current-energy-efficiency"]), "carbon": float(property_instance.data["co2-emissions-current"]), "heat_demand": float(property_instance.data["energy-consumption-current"]), } # Median fallback (including zero-length case) - keys = ("sap", "carbon", "heat_demand") + keys = ("sap", "sap_prediction", "carbon", "heat_demand") return { key: np.median([item[key] for item in previous_phase_reps]) for key in keys @@ -691,7 +701,8 @@ class Recommendations: previous_phase_values: dict, current_phase_values: dict, adjustments: list, - property_instance, + property_instance: Property, + model_predicted_sap: float, ): # For the moment, we cap the number of SAP points that can be achieved by LEDs at 2 if rec["type"] == "low_energy_lighting": @@ -785,7 +796,6 @@ class Recommendations: # Update the current phase values current_phase_values["sap"] = previous_phase_values["sap"] + property_phase_impact["sap"] - elif rec["type"] == "loft_insulation": # When we have a loft insulation recommendation, where there is an extension and the existing # amount of loft insulation is already good, we limit the SAP points @@ -831,6 +841,27 @@ class Recommendations: # Update the current phase values current_phase_values["sap"] = previous_phase_values["sap"] + property_phase_impact["sap"] + elif rec["measure_type"] in ["roomstat_programmer_trvs", "time_temperature_zone_control"]: + # We trim the SAP point recommendations based on the minimum of the predicted and the survey SAP + # points + predicted_difference = model_predicted_sap - previous_phase_values["sap_prediction"] + proposed_impact = property_phase_impact["sap"] + numerically_the_same = np.isclose(proposed_impact, predicted_difference) + + if predicted_difference > 0 and (predicted_difference < proposed_impact) and not numerically_the_same: + # We constrain the impact based on what the model predicts. + # We update the proposed impact to be the predicted difference + adjustments.append( + { + "recommendation_id": rec["recommendation_id"], + "phase": rec["phase"], + # If we've made an adjustment, it will be negative + "sap_adjustment": property_phase_impact["sap"] - predicted_difference, + } + ) + property_phase_impact["sap"] = predicted_difference + # Update the current phase values + current_phase_values["sap"] = previous_phase_values["sap"] + property_phase_impact["sap"] return property_phase_impact, current_phase_values, adjustments @@ -963,7 +994,8 @@ class Recommendations: previous_phase_values=previous_phase_values, current_phase_values=current_phase_values, adjustments=adjustments, - property_instance=property_instance + property_instance=property_instance, + model_predicted_sap=phase_energy_efficiency_metrics["sap_change"], ) # Insert this information into the recommendation. diff --git a/recommendations/SecondaryHeating.py b/recommendations/SecondaryHeating.py index ee7eae1c..ef0fc2d2 100644 --- a/recommendations/SecondaryHeating.py +++ b/recommendations/SecondaryHeating.py @@ -18,6 +18,9 @@ class SecondaryHeating: def recommend(self, phase: int): # Reset self.recommendation = [] + if self.property.epc_record.secondheat_description in ["None", None]: + # No secondary heating system, so no recommendation to remove it + return if self.property.data['number-habitable-rooms'] > self.property.data['number-heated-rooms']: n_rooms = self.property.data['number-habitable-rooms'] - self.property.data['number-heated-rooms'] diff --git a/recommendations/optimiser/CostOptimiser.py b/recommendations/optimiser/CostOptimiser.py index 8f030123..43e303a7 100644 --- a/recommendations/optimiser/CostOptimiser.py +++ b/recommendations/optimiser/CostOptimiser.py @@ -1,4 +1,5 @@ from mip import Model, xsum, minimize, BINARY, OptimizationStatus +from typing import Mapping from utils.logger import setup_logger logger = setup_logger() @@ -12,13 +13,20 @@ class CostOptimiser: # We add an optional buffer to the minimum gain to allow for slack in the optimisation BUFFER = 0.2 - def __init__(self, components, min_gain, verbose=False): + def __init__( + self, + components: list[list[Mapping[str, int | float | str]]], + min_gain: float | int, + verbose: bool = False, + allow_slack: bool = True + ): self.components = components self.min_gain = min_gain self.gain_constraint = None self.m = None self.variables = [] self.solution = [] + self.allow_slack = allow_slack self.solution_cost = None self.solution_gain = None @@ -81,6 +89,20 @@ class CostOptimiser: for group_vars in self.variables: self.m += xsum(var for var in group_vars) <= 1 + def add_budget_constraint(self, budget: int | float) -> None: + # Inject budget constraint, which ensures that sum of cost_ig * x_ig <= budget, where cost_ig represents the + # cost for the ith component in group g, and x_ig is the binary decision variable for the ith component in + # group g + + self.m += ( + xsum( + item["cost"] * var + for group, group_vars in zip(self.components, self.variables) + for item, var in zip(group, group_vars) + ) + <= budget + ) + def setup_slack(self): # Remove the original gain constraint @@ -109,10 +131,17 @@ class CostOptimiser: self.m.optimize() if self.m.status == OptimizationStatus.INFEASIBLE: - # Turn off logging - too noisy - # logger.info("We have an infeasible model, setting up slack model") - self.setup_slack() - self.m.optimize() + if self.allow_slack: + self.setup_slack() + self.m.optimize() + else: + # Explicity return an empty solution + self.solution = [] + self.solution_cost = 0 + self.solution_gain = 0 + return + + # If we still have an infeasible solution, we return an empty solution self.solution = [ item for group, group_vars in zip(self.components, self.variables) for item, var in zip(group, group_vars) diff --git a/recommendations/optimiser/GainOptimiser.py b/recommendations/optimiser/GainOptimiser.py index 6b757bf1..bd907b4d 100644 --- a/recommendations/optimiser/GainOptimiser.py +++ b/recommendations/optimiser/GainOptimiser.py @@ -1,5 +1,6 @@ from mip import Model, xsum, maximize, BINARY, OptimizationStatus from utils.logger import setup_logger +from typing import Mapping logger = setup_logger() @@ -9,7 +10,14 @@ class GainOptimiser: This class is used to maximise gain, given a constrained cost """ - def __init__(self, components, max_cost, max_gain, allow_slack=True, verbose=False): + def __init__( + self, + components: list[list[Mapping[str, int | float | str]]], + max_cost: float | int, + max_gain: float | int | None, + allow_slack: bool = True, + verbose: bool = False + ): """ This function will try and maximise the gain, given a constrained cost. If we specific a max_gain, then the optimisation routine is constained to try not to exceed a maximum increase @@ -21,8 +29,8 @@ class GainOptimiser: :param components: List of components, where each component is a dictionary with keys "id", "cost" and "gain" :param max_cost: Maximum cost constraint :param max_gain: Maximum gain constraint - :param allow_slack: If True, allows the model to use slack variables to relax the cost constraint if the model - is infeasible. Defaults to True. + :param allow_slack: If True, and the solution is infeasible, allows the model to use slack variables to relax + the cost constraint if the model. Defaults to True. :param verbose: If True, enables verbose logging """ self.components = components @@ -148,5 +156,5 @@ class GainOptimiser: self.solution = solution - self.solution_gain = self.m.objective.x + self.solution_gain = sum(component['gain'] for component in self.solution) self.solution_cost = sum([component['cost'] for component in self.solution]) diff --git a/recommendations/optimiser/StrategicOptimiser.py b/recommendations/optimiser/StrategicOptimiser.py new file mode 100644 index 00000000..81998368 --- /dev/null +++ b/recommendations/optimiser/StrategicOptimiser.py @@ -0,0 +1,176 @@ +from enum import Enum +from mip import OptimizationStatus +from typing import Mapping, Optional, TypedDict, List +from recommendations.optimiser.CostOptimiser import CostOptimiser +from recommendations.optimiser.GainOptimiser import GainOptimiser + + +class Measure(TypedDict): + id: str + cost: float + gain: float + + +class Strategies(Enum): + CASE_1_TRY_MIN_COST_WITH_CONSTRAINTS = "case_1_try_min_cost_with_constraints" + CASE_1_SOLVE_MAX_GAIN_UNDER_BUDGET = "case_1_solve_max_gain_under_budget" + CASE_2_SOLVE_MAX_GAIN_UNDER_BUDGET = "case_2_solve_max_gain_under_budget" + CASE_3_SOLVE_MIN_COST_FOR_TARGET = "case_3_solve_min_cost_for_target" + + +class StrategicOptimiser: + """ + Domain-level optimiser implementing logical optimisation logic. + + Behaviour: + + 1) If both budget and target_gain are provided: + - Minimise cost subject to: + gain >= target_gain + cost <= budget + - If infeasible: + maximise gain subject to cost <= budget + + 2) If only budget is provided: + - Maximise gain under budget + + 3) If only target_gain is provided: + - Minimise cost to achieve gain + + """ + + def __init__( + self, + components: list[list[Mapping[str, int | float | str]]], + budget: Optional[float] = None, + target_gain: Optional[float] = None, + allow_slack: bool = False, + verbose: bool = False, + ) -> None: + + if not components: + raise ValueError("Components cannot be empty.") + + if budget is None and target_gain is None: + raise ValueError("At least one of budget or target_gain must be provided.") + + self.components = components + self.budget = budget + self.target_gain = target_gain + self.verbose = verbose + self.allow_slack = allow_slack + + self.solution: List[Measure] = [] + self.solution_cost: float = 0.0 + self.solution_gain: float = 0.0 + + # For debugging purposes, we keep a record of which option was selected + self.strategy_used: Optional[Strategies] = None + + def solve(self) -> None: + """ + Primary entry point for solving the optimisation problem based on the provided budget and target gain. + :return: + """ + + # Case 1: budget + target + if self.budget is not None and self.target_gain is not None: + # Given: + # Budget B + # Target gain G + # + # We want the solution to: + # + # Primary problem (P1) + # min cost + # subject to + # + # gain >= G + # cost <= B + # multiple-choice constraints + # + # If (P1) is feasible → that solution is exactly what you want. + # If (P1) is infeasible → solve the following problem (P2): + # + # max gain + # subject to + # + # cost <= B + if self._try_min_cost_with_constraints(): + # Keep a record of the strategy used to solve the problem, for debugging purposes + self.strategy_used = Strategies.CASE_1_TRY_MIN_COST_WITH_CONSTRAINTS + return + self._solve_max_gain_under_budget() + self.strategy_used = Strategies.CASE_1_SOLVE_MAX_GAIN_UNDER_BUDGET + return + + # Case 2: budget only + if self.budget is not None: + self._solve_max_gain_under_budget() + self.strategy_used = Strategies.CASE_2_SOLVE_MAX_GAIN_UNDER_BUDGET + return + + # Case 3: target only + self._solve_min_cost_for_target() + self.strategy_used = Strategies.CASE_3_SOLVE_MIN_COST_FOR_TARGET + return + + # --------------------------------------------------------- + # Internal Functions + # --------------------------------------------------------- + + def _try_min_cost_with_constraints(self) -> bool: + """ + Try to minimise cost while satisfying: + gain >= target_gain + cost <= budget + """ + + opt = CostOptimiser( + self.components, + min_gain=self.target_gain, + verbose=self.verbose, + allow_slack=self.allow_slack + ) + + opt.setup() + opt.add_budget_constraint(self.budget) + opt.solve() + + if opt.m.status == OptimizationStatus.INFEASIBLE: + return False + + self._store_solution(opt.solution) + return True + + def _solve_max_gain_under_budget(self) -> None: + + opt = GainOptimiser( + self.components, + max_cost=self.budget, + max_gain=None, + allow_slack=self.allow_slack, + verbose=self.verbose + ) + + opt.setup() + opt.solve() + + self._store_solution(opt.solution) + + def _solve_min_cost_for_target(self) -> None: + + opt = CostOptimiser( + self.components, + min_gain=self.target_gain, + verbose=self.verbose + ) + opt.setup() + opt.solve() + + self._store_solution(opt.solution) + + def _store_solution(self, solution: List[Measure]) -> None: + self.solution = solution + self.solution_cost = sum(m["cost"] for m in solution) + self.solution_gain = sum(m["gain"] for m in solution) diff --git a/recommendations/optimiser/funding_optimiser.py b/recommendations/optimiser/funding_optimiser.py index 6afe7d78..a91c05bd 100644 --- a/recommendations/optimiser/funding_optimiser.py +++ b/recommendations/optimiser/funding_optimiser.py @@ -14,10 +14,10 @@ from typing import Mapping, Union from itertools import product from backend.app.plan.schemas import ( - WALL_INSULATION_MEASURES, ROOF_INSULATION_MEASURES, ECO4_ELIGIBILE_FABRIC_MEASURES + WALL_INSULATION_MEASURES, ROOF_INSULATION_MEASURES, ECO4_ELIGIBILE_FABRIC_MEASURES, + WALL_INSULATION_WITH_VENTILATION_MEASURES ) -from recommendations.optimiser.CostOptimiser import CostOptimiser -from recommendations.optimiser.GainOptimiser import GainOptimiser +from recommendations.optimiser.StrategicOptimiser import StrategicOptimiser from utils.logger import setup_logger from backend.Funding import Funding from backend.app.BatterySapScorer import BatterySAPScorer @@ -654,6 +654,11 @@ def optimise_with_scenarios( 1) With air source heat pump AND required insulation """ + # Universally handle zero gain + if target_gain is not None: + if target_gain <= 0: + return pd.DataFrame([]) + solutions = [] paths = [] # Produce the unique list of measure types @@ -683,9 +688,10 @@ def optimise_with_scenarios( # - Only once the fabric has been upgraded, do we consider heating upgrades # This should be wall insulation, roof insulation, floor insulation and windows - fabric_measures = WALL_INSULATION_MEASURES + ROOF_INSULATION_MEASURES + ECO4_ELIGIBILE_FABRIC_MEASURES + [ - "internal_wall_insulation+mechanical_ventilation", "external_wall_insulation+mechanical_ventilation" - ] + fabric_measures = ( + WALL_INSULATION_MEASURES + ROOF_INSULATION_MEASURES + ECO4_ELIGIBILE_FABRIC_MEASURES + + WALL_INSULATION_WITH_VENTILATION_MEASURES + ) fabric_only_measures = [ [opt for opt in group if opt["type"] in fabric_measures] for group in optimisation_measures @@ -713,7 +719,9 @@ def optimise_with_scenarios( remaining_measures.append(kept) remaining_budget = budget - fabric_cost if budget is not None else None - remaining_budget = 0 if remaining_budget < 0 else remaining_budget + + if remaining_budget is not None: + remaining_budget = 0 if remaining_budget < 0 else remaining_budget picked_extra, extra_cost, extra_gain = run_optimizer( remaining_measures, @@ -743,11 +751,9 @@ def optimise_with_scenarios( # Scenario 1: Air source heat pump with required insulation # ------------------------------------------------------------------ if enforce_heat_pump_insulation: - # Wall measures could be IWI or EWI + # Wall measures could be IWI, EWI or CWI remaining_wall_measures = [ - x for x in all_measure_types if x in WALL_INSULATION_MEASURES + [ - "internal_wall_insulation+mechanical_ventilation", "external_wall_insulation+mechanical_ventilation" - ] + x for x in all_measure_types if x in WALL_INSULATION_MEASURES + WALL_INSULATION_WITH_VENTILATION_MEASURES ] remaining_roof_measures = [x for x in all_measure_types if x in ROOF_INSULATION_MEASURES] @@ -767,6 +773,11 @@ def optimise_with_scenarios( for fixed in fixed_selections: + if target_gain is not None: + if target_gain <= 0: + # If we don't have any gain, we don't actually need to do this + continue + # fixed = [(gi, oi, opt), ...] fixed_items = [opt for (_, _, opt) in fixed] fixed_groups = {gi for (gi, _, _) in fixed} @@ -1111,28 +1122,30 @@ def run_optimizer( allow_slack: bool = False ): """ - Thin wrapper over your optimisers. - Returns: list[dict] selected_options + Thin wrapper around the StrategicOptimiser to run it on a subset of measures with an optional budget and target + gain. Handles the cases of no input measures, and extracts the outputs for ease of use. + :param input_measures: list of groups of measures (each group is a list of measure dicts) + :param budget: optional budget to constrain the optimisation + :param sub_target_gain: optional target gain to achieve from this optimisation run + :param allow_slack: whether to allow solutions that exceed the target gain (True) or only solutions that meet it + exactly (False) + :return: tuple of (picked measures, total cost, total gain) where picked measures is a list of measure dicts """ if not input_measures: return None, 0.0, 0.0 - if budget is not None: - opt = GainOptimiser( - input_measures, max_cost=budget, max_gain=0 if sub_target_gain == 0 else (sub_target_gain or float("inf")), - allow_slack=allow_slack - ) - else: - if sub_target_gain is None: - raise ValueError("Either budget or target_gain must be provided.") - opt = CostOptimiser(input_measures, min_gain=sub_target_gain) + opt = StrategicOptimiser( + components=input_measures, + budget=budget, + target_gain=sub_target_gain, + allow_slack=allow_slack, + verbose=False, + ) - opt.setup() opt.solve() - cost = sum([x["cost"] for x in opt.solution]) - return opt.solution, cost, opt.solution_gain + return opt.solution, opt.solution_cost, opt.solution_gain # ---- Define optimisation paths ---------------------------------------------------------- diff --git a/recommendations/optimiser/optimiser_functions.py b/recommendations/optimiser/optimiser_functions.py index d704b3fb..6fd70c20 100644 --- a/recommendations/optimiser/optimiser_functions.py +++ b/recommendations/optimiser/optimiser_functions.py @@ -1,4 +1,5 @@ import pandas as pd +from typing import List, Dict, Any, Set import backend.app.assumptions as assumptions from backend.Property import Property from backend.app.plan.schemas import PlanTriggerRequest @@ -78,14 +79,14 @@ def prepare_input_measures( # if recs[0]["type"] == "solar_pv": # recs = [r for r in recs if ~r["has_battery"]] - # Only include measures with non-negative cost savings + # Only include measures with non-negative cost savings - we allow for a minor negative impact if eco_measures: recs_to_append = [ - rec for rec in recs if (rec["energy_cost_savings"] >= 0) or (rec["measure_type"] in eco_measures) + rec for rec in recs if (rec["energy_cost_savings"] >= -10) or (rec["measure_type"] in eco_measures) ] else: recs_to_append = [ - rec for rec in recs if (rec["energy_cost_savings"] >= 0) + rec for rec in recs if (rec["energy_cost_savings"] >= -10) ] if not recs_to_append: continue @@ -300,7 +301,12 @@ def add_required_measures(property_id, property_required_measures, recommendatio ] -def add_best_practice_measures(property_id, solution, recommendations, selected): +def add_best_practice_measures( + property_id: int, + solution: List[Dict[str, Any]], + recommendations: Dict[int, List[List[Dict[str, Any]]]], + selected: Set[str], +): """ Ensures best-practice measures like ventilation and trickle vents are included in the selected recommendations when appropriate. @@ -331,11 +337,11 @@ def add_best_practice_measures(property_id, solution, recommendations, selected) # If ventilation has been selected, or one of the measures needs ventilation, we need to ensure ventilation is # included - needs_ventilation = any( + measures_selected_needing_ventilation = any( x in [r["type"] for r in solution] for x in assumptions.measures_needing_ventilation - ) or len(ventilation_selected) > 0 + ) - if needs_ventilation: + if measures_selected_needing_ventilation or len(ventilation_selected) > 0: ventilation_rec = next( (r[0] for r in recommendations[property_id] if r[0]["type"] == "mechanical_ventilation"), None @@ -395,3 +401,30 @@ def flatten_recommendations_with_defaults(property_id, recommendations, selected # Flatten the nested list of lists into a single list return [rec for recommendations_by_type in final_recommendations for rec in recommendations_by_type] + + +def check_needs_ventilation( + property_measure_types: Set[str], + measures_needing_ventilation: List[str], + property_already_has_ventilation: bool, + ventilation_in_included_measures: bool +) -> bool: + """ + Function to check if we need to include ventilation based on the measures selected and the property + features + :param property_measure_types: The set of measure types recommended for the property + :param measures_needing_ventilation: The set of measure types that require ventilation + :param property_already_has_ventilation: Whether the property currently has ventilation + :param ventilation_in_included_measures: Whether ventilation is already included in the recommended + measures + :return: Boolean indicating whether ventilation needs to be included in the recommendations + + # TODO - none of the inputs of this function are well structured and so this is quite brittle - we should + consider refactoring to make this more robust + """ + + needs_ventilation = any( + x in property_measure_types for x in measures_needing_ventilation + ) + + return needs_ventilation and not property_already_has_ventilation and ventilation_in_included_measures diff --git a/recommendations/tests/test_optimiser_functions.py b/recommendations/tests/test_optimiser_functions.py index c2927790..40fa56b6 100644 --- a/recommendations/tests/test_optimiser_functions.py +++ b/recommendations/tests/test_optimiser_functions.py @@ -3,8 +3,19 @@ import numpy as np from types import SimpleNamespace from recommendations.tests.test_data.measures_to_optimise import measures_to_optimise from recommendations.optimiser import optimiser_functions +from recommendations.optimiser.funding_optimiser import optimise_with_scenarios from recommendations.optimiser.GainOptimiser import GainOptimiser from recommendations.optimiser.CostOptimiser import CostOptimiser +from recommendations.optimiser.StrategicOptimiser import StrategicOptimiser + + +@pytest.fixture +def property_instance(): + return SimpleNamespace( + id="P1", + has_ventilation=False, + data={"current-energy-efficiency": "52"}, + ) class TestPrepareInputMeasures: @@ -47,8 +58,9 @@ class TestPrepareInputMeasures: def test_filters_out_negative_cost_savings(self): recs = [ [{"recommendation_id": "bad1", "type": "loft_insulation", "total": 200, "kwh_savings": 100, - "energy_cost_savings": -5, "has_battery": False, - "partial_project_funding": 0, "partial_project_score": 0, "uplift_project_score": 0, }], + "energy_cost_savings": -100, "has_battery": False, + "partial_project_funding": 0, "partial_project_score": 0, "uplift_project_score": 0, + "measure_type": "roof_insulation"}], ] measures = optimiser_functions.prepare_input_measures(recs, goal="Energy Savings", needs_ventilation=False) assert measures == [] # should skip negative cost saving recs @@ -142,7 +154,9 @@ class TestAddBestPracticeMeasures: ] } selected = set() - updated = optimiser_functions.add_best_practice_measures(property_id, solution, recommendations, selected) + updated = optimiser_functions.add_best_practice_measures( + property_id, solution, recommendations, selected + ) assert "vent1" in updated assert "trickle1" in updated @@ -287,3 +301,535 @@ class TestIncreasingEpcE2e: # We don't add ventilation as major insulation work isn't done ventilation_added = any(rec["recommendation_id"] == "3_phase=2" and rec["default"] for rec in flattened) assert not ventilation_added, "Ventilation should not be added without major insulation work" + + +class TestStrategicOptimiser: + + @pytest.fixture + def components(self): + components = [ + [ + {'id': '0_phase=0', 'cost': 819.0, 'gain': 5.6, 'type': 'loft_insulation', 'innovation_uplift': 0, + 'cost_minus_uplift': 819.0, 'raw_cost': 819.0, 'partial_project_funding': 0, + 'partial_project_score': 0, 'uplift_project_score': 0, 'already_installed': False, + 'has_battery': False, 'array_size': 0}, + {'id': '1_phase=0', 'cost': 702.0, 'gain': 5.6, 'type': 'loft_insulation', 'innovation_uplift': 0, + 'cost_minus_uplift': 702.0, 'raw_cost': 702.0, 'partial_project_funding': 0, + 'partial_project_score': 0, 'uplift_project_score': 0, 'already_installed': False, + 'has_battery': False, 'array_size': 0}, + {'id': '2_phase=0', 'cost': 585.0, 'gain': 5.6, 'type': 'loft_insulation', 'innovation_uplift': 0, + 'cost_minus_uplift': 585.0, 'raw_cost': 585.0, 'partial_project_funding': 0, + 'partial_project_score': 0, 'uplift_project_score': 0, 'already_installed': False, + 'has_battery': False, 'array_size': 0}], + [{'id': '4_phase=2', 'cost': 3656.25, 'gain': 2.0, 'type': 'suspended_floor_insulation', + 'innovation_uplift': 0, 'cost_minus_uplift': 3656.25, 'raw_cost': 3656.25, 'partial_project_funding': 0, + 'partial_project_score': 0, 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, + 'array_size': 0}], + [{'id': '5_phase=3', 'cost': 17.5, 'gain': 1.0, 'type': 'low_energy_lighting', 'innovation_uplift': 0, + 'cost_minus_uplift': 17.5, 'raw_cost': 17.5, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 0}], + [{'id': '6_phase=4', 'cost': 140, 'gain': 3.4, 'type': 'roomstat_programmer_trvs', 'innovation_uplift': 0, + 'cost_minus_uplift': 140, 'raw_cost': 140, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 0}, + {'id': '7_phase=4', 'cost': 874.5680000000001, 'gain': 4.2, 'type': 'time_temperature_zone_control', + 'innovation_uplift': 0, 'cost_minus_uplift': 874.5680000000001, 'raw_cost': 874.5680000000001, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': False, 'array_size': 0}], + [{'id': '9_phase=6', 'cost': 5420.0, 'gain': 13.2, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5420.0, 'raw_cost': 5420.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.6}, + {'id': '10_phase=6', 'cost': 6210.0, 'gain': 16.2, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6210.0, 'raw_cost': 6210.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.6, + 'battery_gain': 3}, + {'id': '11_phase=6', 'cost': 6820.0, 'gain': 16.2, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6820.0, 'raw_cost': 6820.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.6, + 'battery_gain': 3}, + {'id': '12_phase=6', 'cost': 7202.0, 'gain': 14.5, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 7202.0, 'raw_cost': 7202.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.915}, + {'id': '13_phase=6', 'cost': 6495.0, 'gain': 14.5, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6495.0, 'raw_cost': 6495.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.92}, + {'id': '14_phase=6', 'cost': 7285.0, 'gain': 17.5, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 7285.0, 'raw_cost': 7285.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.92, + 'battery_gain': 3}, + {'id': '15_phase=6', 'cost': 7895.0, 'gain': 17.5, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 7895.0, 'raw_cost': 7895.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.92, + 'battery_gain': 3}, + {'id': '16_phase=6', 'cost': 5520.0, 'gain': 15.0, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5520.0, 'raw_cost': 5520.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 4.0}, + {'id': '17_phase=6', 'cost': 6310.0, 'gain': 18.0, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6310.0, 'raw_cost': 6310.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 4.0, + 'battery_gain': 3}, + {'id': '18_phase=6', 'cost': 6920.0, 'gain': 18.0, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6920.0, 'raw_cost': 6920.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 4.0, + 'battery_gain': 3}, + {'id': '19_phase=6', 'cost': 5320.0, 'gain': 12.1, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5320.0, 'raw_cost': 5320.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.2}, + {'id': '20_phase=6', 'cost': 6110.0, 'gain': 14.1, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6110.0, 'raw_cost': 6110.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.2, + 'battery_gain': 2}, + {'id': '21_phase=6', 'cost': 6720.0, 'gain': 14.1, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6720.0, 'raw_cost': 6720.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.2, + 'battery_gain': 2}, + {'id': '22_phase=6', 'cost': 6932.0, 'gain': 13.2, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6932.0, 'raw_cost': 6932.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.48}, + {'id': '23_phase=6', 'cost': 6295.0, 'gain': 13.2, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6295.0, 'raw_cost': 6295.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.48}, + {'id': '24_phase=6', 'cost': 7085.0, 'gain': 16.2, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 7085.0, 'raw_cost': 7085.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.48, + 'battery_gain': 3}, + {'id': '25_phase=6', 'cost': 7695.0, 'gain': 16.2, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 7695.0, 'raw_cost': 7695.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.48, + 'battery_gain': 3}, + {'id': '26_phase=6', 'cost': 5220.0, 'gain': 10.2, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5220.0, 'raw_cost': 5220.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.8}, + {'id': '27_phase=6', 'cost': 6662.0, 'gain': 12.3, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6662.0, 'raw_cost': 6662.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.045}, + {'id': '28_phase=6', 'cost': 6095.0, 'gain': 12.3, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6095.0, 'raw_cost': 6095.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.05}, + {'id': '29_phase=6', 'cost': 5160.0, 'gain': 9.0, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5160.0, 'raw_cost': 5160.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.4}, + {'id': '30_phase=6', 'cost': 6392.0, 'gain': 10.2, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6392.0, 'raw_cost': 6392.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.61}, + {'id': '31_phase=6', 'cost': 5910.0, 'gain': 10.2, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5910.0, 'raw_cost': 5910.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.61}, + {'id': '32_phase=6', 'cost': 5100.0, 'gain': 8.0, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5100.0, 'raw_cost': 5100.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.0}, + {'id': '33_phase=6', 'cost': 6098.0, 'gain': 8.0, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6098.0, 'raw_cost': 6098.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.175}, + {'id': '34_phase=6', 'cost': 5725.0, 'gain': 8.0, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5725.0, 'raw_cost': 5725.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.18}, + {'id': '35_phase=6', 'cost': 5040.0, 'gain': 6.0, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5040.0, 'raw_cost': 5040.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 1.6}, + {'id': '36_phase=6', 'cost': 5828.0, 'gain': 7.0, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5828.0, 'raw_cost': 5828.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 1.74}, + {'id': '37_phase=6', 'cost': 5540.0, 'gain': 7.0, 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5540.0, 'raw_cost': 5540.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 1.74} + ] + ] + return components + + def test_budget_and_target_gain_strategy_case_1_try_min_cost_with_constraints(self, components): + budget = 5000 + target_gain = 11.5 + + opt = StrategicOptimiser( + components=components, + target_gain=target_gain, + budget=budget, + ) + + opt.solve() + + # check strategy used + assert opt.strategy_used.value == "case_1_try_min_cost_with_constraints" + # Check the solution values + assert opt.solution_cost == 4398.75 + assert opt.solution_gain == 12 + + def test_budget_and_target_gain_expecting_case_1_solve_max_gain_under_budget_strategy(self, components): + budget = 4000 + target_gain = 11.5 + + opt = StrategicOptimiser( + components=components, + target_gain=target_gain, + budget=budget, + ) + + opt.solve() + + # We expect to use case 1, but we won't be able to meet the target gain, so we should get the best solution + # possible within the budget. We end up with an infeasible solution when we try + # case_1_try_min_cost_with_constraints + assert opt.strategy_used.value == "case_1_solve_max_gain_under_budget" + assert opt.solution_cost == 1477.0680000000002 + assert opt.solution_gain == 10.8 + + def test_just_gain_expecting_case_3_solve_min_cost_for_target_strategy(self, components): + budget = None + target_gain = 11.5 + + opt = StrategicOptimiser( + components=components, + target_gain=target_gain, + budget=budget, + ) + + opt.solve() + + # Should be case 3 - minimise cost for target gain + assert opt.strategy_used.value == "case_3_solve_min_cost_for_target" + assert opt.solution_cost == 4398.75 + assert opt.solution_gain == 12 + + def test_just_gain_of_20_expecting_case_3_solve_min_cost_for_target_strategy(self, components): + budget = None + target_gain = 20 + + opt = StrategicOptimiser( + components=components, + target_gain=target_gain, + budget=budget, + ) + + opt.solve() + + # Should be case 3 - minimise cost for target gain + assert opt.strategy_used.value == "case_3_solve_min_cost_for_target" + assert opt.solution_cost == 5962.5 + assert opt.solution_gain == 20.2 + + def test_just_budget_expecting_case_2_solve_max_gain_under_budget_strategy(self, components): + budget = 10000 + target_gain = None + + opt = StrategicOptimiser( + components=components, + target_gain=target_gain, + budget=budget, + ) + + opt.solve() + + # Should be case 2 - minimise cost for target gain + assert opt.strategy_used.value == "case_2_solve_max_gain_under_budget" + assert opt.solution_cost == 7787.068 + assert opt.solution_gain == 28.8 + + +class TestCheckNeedsVentilation: + + def measure_types_includes_ventilation_no_existing_ventilation(self): + property_measure_types = {'mechanical_ventilation', 'cavity_wall_insulation', 'suspended_floor_insulation', + 'secondary_heating', 'loft_insulation', 'heating', 'low_energy_lighting'} + + measures_needing_ventilation = ['internal_wall_insulation', 'external_wall_insulation', + 'cavity_wall_insulation'] + + has_ventilation = False + + ventilation_included = True + + result = optimiser_functions.check_needs_ventilation( + property_measure_types, measures_needing_ventilation, has_ventilation, + ventilation_included + ) + + assert result == True + + def measure_types_includes_ventilation_existing_ventilation(self): + property_measure_types = {'mechanical_ventilation', 'cavity_wall_insulation', 'suspended_floor_insulation', + 'secondary_heating', 'loft_insulation', 'heating', 'low_energy_lighting'} + + measures_needing_ventilation = ['internal_wall_insulation', 'external_wall_insulation', + 'cavity_wall_insulation'] + + has_ventilation = True + + ventilation_included = True + + result = optimiser_functions.check_needs_ventilation( + property_measure_types, measures_needing_ventilation, has_ventilation, + ventilation_included + ) + + assert result == False + + def measure_types_includes_ventilation_existing_ventilation(self): + property_measure_types_without_ventilation = { + 'cavity_wall_insulation', 'suspended_floor_insulation', + 'secondary_heating', 'loft_insulation', 'heating', + 'low_energy_lighting' + } + + measures_needing_ventilation = ['internal_wall_insulation', 'external_wall_insulation', + 'cavity_wall_insulation'] + + has_ventilation = False + + ventilation_included = True + + result = optimiser_functions.check_needs_ventilation( + property_measure_types_without_ventilation, measures_needing_ventilation, has_ventilation, + ventilation_included + ) + + assert result == False + + +class TestOptimiseWithScenarios: + + def test_zero_gain(self, property_instance): + input_measures = [ + [ + {'already_installed': False, 'id': '0_phase=0', + 'type': 'internal_wall_insulation+mechanical_ventilation', + 'gain': np.float64(2.0), 'cost': 16901.01977922431} + ], + [ + {'already_installed': False, 'id': '1_phase=1', 'type': 'loft_insulation', 'gain': 0, 'cost': 1197.0}, + ], + [ + {'already_installed': False, 'id': '5_phase=3', 'type': 'suspended_floor_insulation', 'gain': 1, + 'cost': 5343.75}], + [ + {'already_installed': False, 'id': '6_phase=4', 'type': 'time_temperature_zone_control', + 'gain': np.float64(0.9000000000000057), 'cost': 1009.5600000000001}, + {'already_installed': False, 'id': '7_phase=4', 'type': 'air_source_heat_pump', 'gain': np.float64(6.9), + 'cost': 18979.9}], + [ + {'already_installed': False, 'id': '8_phase=5', 'type': 'solar_pv', 'gain': np.float64(9.0), + 'cost': 5420.0, "has_battery": False}, + {'already_installed': False, 'id': '9_phase=5', 'type': 'solar_pv', 'gain': np.float64(9.0), + 'cost': 6210.0, "has_battery": False}, + ] + ] + + solutions = optimise_with_scenarios( + p=property_instance, + input_measures=input_measures, + budget=None, + target_gain=0, + enforce_heat_pump_insulation=True, + enforce_fabric_first=False, + already_installed_sap=0, # To be passed to output + ) + + assert solutions.empty + + def test_ashp_needing_cwi_first(self, property_instance): + input_measures = [ + [ + {'id': '0_phase=0', 'cost': 1653.5495595376553, 'gain': 1, + 'type': 'cavity_wall_insulation+mechanical_ventilation', 'already_installed': False}, + {'id': '1_phase=0', 'cost': 1535.3279855335845, 'gain': 1, + 'type': 'cavity_wall_insulation+mechanical_ventilation', 'already_installed': False}, + {'id': '2_phase=0', 'cost': 1801.326527042744, 'gain': 1, + 'type': 'cavity_wall_insulation+mechanical_ventilation', 'already_installed': False}, + {'id': '3_phase=0', 'cost': 1505.7725920325668, 'gain': 1, + 'type': 'cavity_wall_insulation+mechanical_ventilation', 'already_installed': False} + ], + [ + {'id': '4_phase=1', 'cost': 766.5, 'gain': 0, 'type': 'loft_insulation', 'already_installed': False}, + {'id': '5_phase=1', 'cost': 657.0, 'gain': 0, 'type': 'loft_insulation', 'already_installed': False}, + {'id': '6_phase=1', 'cost': 547.5, 'gain': 0, 'type': 'loft_insulation', 'already_installed': False} + ], + [ + {'id': '8_phase=3', 'cost': 7.0, 'gain': 0, 'type': 'low_energy_lighting', 'already_installed': False} + ], + [ + {'id': '9_phase=4', 'cost': 1009.5600000000001, 'gain': np.float64(0.3), + 'type': 'time_temperature_zone_control', 'already_installed': False}, + {'id': '10_phase=4', 'cost': 18979.9, 'gain': np.float64(7.5), 'type': 'air_source_heat_pump', + 'already_installed': False} + ], + [ + {'id': '11_phase=5', 'cost': 150.0, 'gain': np.float64(3.3), 'type': 'secondary_heating', + 'already_installed': False} + ], + [ + {'id': '12_phase=6', 'cost': 5420.0, 'gain': np.float64(15.4), 'type': 'solar_pv', + 'already_installed': False, "has_battery": False}, + {'id': '13_phase=6', 'cost': 6210.0, 'gain': np.float64(15.4), 'type': 'solar_pv', + 'already_installed': False, "has_battery": False} + ] + ] + + solutions = optimise_with_scenarios( + p=property_instance, + input_measures=input_measures, + budget=None, + target_gain=7.5, + enforce_heat_pump_insulation=True, + enforce_fabric_first=False, + already_installed_sap=0, # To be passed to output + ) + + # heat pump solutions + heat_pump_solutions = solutions[solutions["scenario"] == "heat_pump_with_insulation"] + assert len(heat_pump_solutions) == 12 + + for x in heat_pump_solutions["items"].values: + res = [y["type"] for y in x] + # All results should include loft & CWI + assert "loft_insulation" in res + assert "cavity_wall_insulation+mechanical_ventilation" in res + + def test_fabric_first(self, property_instance): + input_measures = [ + [{'id': '0_phase=0', 'cost': 1653.5495595376553, 'gain': 1, + 'type': 'cavity_wall_insulation+mechanical_ventilation', 'innovation_uplift': 0, + 'cost_minus_uplift': 1653.5495595376553, 'raw_cost': 1093.5495595376553, 'partial_project_funding': 0, + 'partial_project_score': 0, 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, + 'array_size': 0}, + {'id': '1_phase=0', 'cost': 1535.3279855335845, 'gain': 1, + 'type': 'cavity_wall_insulation+mechanical_ventilation', 'innovation_uplift': 0, + 'cost_minus_uplift': 1535.3279855335845, 'raw_cost': 975.3279855335845, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': False, 'array_size': 0}, + {'id': '2_phase=0', 'cost': 1801.326527042744, 'gain': 1, + 'type': 'cavity_wall_insulation+mechanical_ventilation', 'innovation_uplift': 0, + 'cost_minus_uplift': 1801.326527042744, 'raw_cost': 1241.326527042744, 'partial_project_funding': 0, + 'partial_project_score': 0, 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, + 'array_size': 0}, + {'id': '3_phase=0', 'cost': 1505.7725920325668, 'gain': 1, + 'type': 'cavity_wall_insulation+mechanical_ventilation', 'innovation_uplift': 0, + 'cost_minus_uplift': 1505.7725920325668, 'raw_cost': 945.7725920325668, + 'partial_project_funding': 0, 'partial_project_score': 0, 'uplift_project_score': 0, + 'already_installed': False, 'has_battery': False, 'array_size': 0}], + [{'id': '4_phase=1', 'cost': 766.5, 'gain': 1, 'type': 'loft_insulation', 'innovation_uplift': 0, + 'cost_minus_uplift': 766.5, 'raw_cost': 766.5, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 0}, + {'id': '5_phase=1', 'cost': 657.0, 'gain': 1, 'type': 'loft_insulation', 'innovation_uplift': 0, + 'cost_minus_uplift': 657.0, 'raw_cost': 657.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 0}, + {'id': '6_phase=1', 'cost': 547.5, 'gain': 1, 'type': 'loft_insulation', 'innovation_uplift': 0, + 'cost_minus_uplift': 547.5, 'raw_cost': 547.5, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 0}], + [{'id': '8_phase=3', 'cost': 7.0, 'gain': 1, 'type': 'low_energy_lighting', 'innovation_uplift': 0, + 'cost_minus_uplift': 7.0, 'raw_cost': 7.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 0}], + [{'id': '9_phase=4', 'cost': 1009.5600000000001, 'gain': np.float64(0.3), + 'type': 'time_temperature_zone_control', 'innovation_uplift': 0, 'cost_minus_uplift': 1009.5600000000001, + 'raw_cost': 1009.5600000000001, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 0}, + {'id': '10_phase=4', 'cost': 18979.9, 'gain': np.float64(7.5), 'type': 'air_source_heat_pump', + 'innovation_uplift': 0, 'cost_minus_uplift': 18979.9, 'raw_cost': 18979.9, 'partial_project_funding': 0, + 'partial_project_score': 0, 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, + 'array_size': 0}], + [{'id': '11_phase=5', 'cost': 150.0, 'gain': np.float64(3.3), 'type': 'secondary_heating', + 'innovation_uplift': 0, 'cost_minus_uplift': 150.0, 'raw_cost': 150.0, 'partial_project_funding': 0, + 'partial_project_score': 0, 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, + 'array_size': 0}], + [{'id': '12_phase=6', 'cost': 5420.0, 'gain': np.float64(15.4), 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5420.0, 'raw_cost': 5420.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.6}, + {'id': '13_phase=6', 'cost': 6210.0, 'gain': np.float64(15.4), 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6210.0, 'raw_cost': 6210.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.6}, + {'id': '14_phase=6', 'cost': 6820.0, 'gain': np.float64(15.4), 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6820.0, 'raw_cost': 6820.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.6}, + {'id': '15_phase=6', 'cost': 7202.0, 'gain': np.float64(15.9), 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 7202.0, 'raw_cost': 7202.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.915}, + {'id': '16_phase=6', 'cost': 6495.0, 'gain': np.float64(15.9), 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6495.0, 'raw_cost': 6495.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.92}, + {'id': '17_phase=6', 'cost': 7285.0, 'gain': np.float64(15.9), 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 7285.0, 'raw_cost': 7285.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.92}, + {'id': '18_phase=6', 'cost': 7895.0, 'gain': np.float64(15.9), 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 7895.0, 'raw_cost': 7895.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.92}, + {'id': '19_phase=6', 'cost': 5520.0, 'gain': np.float64(16.7), 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5520.0, 'raw_cost': 5520.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 4.0}, + {'id': '20_phase=6', 'cost': 6310.0, 'gain': np.float64(16.7), 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6310.0, 'raw_cost': 6310.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 4.0}, + {'id': '21_phase=6', 'cost': 6920.0, 'gain': np.float64(16.7), 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6920.0, 'raw_cost': 6920.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 4.0}, + {'id': '22_phase=6', 'cost': 5320.0, 'gain': np.float64(13.6), 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5320.0, 'raw_cost': 5320.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.2}, + {'id': '23_phase=6', 'cost': 6110.0, 'gain': np.float64(13.6), 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6110.0, 'raw_cost': 6110.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.2}, + {'id': '24_phase=6', 'cost': 6720.0, 'gain': np.float64(13.6), 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6720.0, 'raw_cost': 6720.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.2}, + {'id': '25_phase=6', 'cost': 6932.0, 'gain': np.float64(15.4), 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6932.0, 'raw_cost': 6932.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.48}, + {'id': '26_phase=6', 'cost': 6295.0, 'gain': np.float64(15.4), 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6295.0, 'raw_cost': 6295.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.48}, + {'id': '27_phase=6', 'cost': 7085.0, 'gain': np.float64(15.4), 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 7085.0, 'raw_cost': 7085.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.48}, + {'id': '28_phase=6', 'cost': 7695.0, 'gain': np.float64(15.4), 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 7695.0, 'raw_cost': 7695.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': True, 'array_size': 3.48}, + {'id': '29_phase=6', 'cost': 5220.0, 'gain': np.float64(12.2), 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5220.0, 'raw_cost': 5220.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.8}, + {'id': '30_phase=6', 'cost': 6662.0, 'gain': np.float64(12.8), 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6662.0, 'raw_cost': 6662.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.045}, + {'id': '31_phase=6', 'cost': 6095.0, 'gain': np.float64(12.8), 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6095.0, 'raw_cost': 6095.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 3.05}, + {'id': '32_phase=6', 'cost': 5160.0, 'gain': np.float64(10.1), 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5160.0, 'raw_cost': 5160.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.4}, + {'id': '33_phase=6', 'cost': 6392.0, 'gain': np.float64(10.1), 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6392.0, 'raw_cost': 6392.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.61}, + {'id': '34_phase=6', 'cost': 5910.0, 'gain': np.float64(10.1), 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5910.0, 'raw_cost': 5910.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.61}, + {'id': '35_phase=6', 'cost': 5100.0, 'gain': np.float64(8.0), 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5100.0, 'raw_cost': 5100.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.0}, + {'id': '36_phase=6', 'cost': 6098.0, 'gain': np.float64(9.1), 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 6098.0, 'raw_cost': 6098.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.175}, + {'id': '37_phase=6', 'cost': 5725.0, 'gain': np.float64(9.1), 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5725.0, 'raw_cost': 5725.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 2.18}, + {'id': '38_phase=6', 'cost': 5040.0, 'gain': np.float64(7.0), 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5040.0, 'raw_cost': 5040.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 1.6}, + {'id': '39_phase=6', 'cost': 5828.0, 'gain': np.float64(7.0), 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5828.0, 'raw_cost': 5828.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 1.74}, + {'id': '40_phase=6', 'cost': 5540.0, 'gain': np.float64(7.0), 'type': 'solar_pv', 'innovation_uplift': 0, + 'cost_minus_uplift': 5540.0, 'raw_cost': 5540.0, 'partial_project_funding': 0, 'partial_project_score': 0, + 'uplift_project_score': 0, 'already_installed': False, 'has_battery': False, 'array_size': 1.74}] + ] + + solutions = optimise_with_scenarios( + p=property_instance, + input_measures=input_measures, + budget=None, + target_gain=7.5, + enforce_heat_pump_insulation=True, + enforce_fabric_first=True, + already_installed_sap=0, # To be passed to output + ) + + assert solutions.shape[0] == 1 + items = solutions["items"].values[0] + types = [x["type"] for x in items] + + assert types == ['cavity_wall_insulation+mechanical_ventilation', 'loft_insulation', 'solar_pv'] diff --git a/recommendations/tests/test_optimisers.py b/recommendations/tests/test_optimisers.py index 0c794119..63280907 100644 --- a/recommendations/tests/test_optimisers.py +++ b/recommendations/tests/test_optimisers.py @@ -1,74 +1,24 @@ import pytest -from recommendations.optimiser.funding_optimiser import build_heat_pump_paths -from recommendations.optimiser.funding_optimiser import run_optimizer - - -class DummyProp: - """Minimal property stub exposing just what your code reads.""" - - def __init__(self): - self.data = { - "current-energy-rating": "E", # or "D" for the special Social+D path - "current-energy-efficiency": 55, # numeric SAP points used in eligibility calc - "mainheat-energy-eff": "Very Good", - } - self.has_ventilation = False - self.floor_area = 70.0 - self.main_heating_controls = {"clean_description": "time and temperature zone control"} - self.walls = {'original_description': 'Solid brick, as built, no insulation (assumed)', - 'thermal_transmittance': None, - 'thermal_transmittance_unit': None, 'is_cavity_wall': False, 'is_filled_cavity': False, - 'is_solid_brick': True, - 'is_system_built': False, 'is_timber_frame': False, 'is_granite_or_whinstone': False, - 'is_as_built': True, - 'is_cob': False, 'is_assumed': True, 'is_sandstone_or_limestone': False, - 'insulation_thickness': 'none', - 'external_insulation': False, 'internal_insulation': False} - - self.main_heating = { - 'original_description': 'Boiler and radiators, mains gas', - 'clean_description': 'Boiler and radiators, mains gas', - 'has_radiators': True, 'has_fan_coil_units': False, 'has_pipes_in_screed_above_insulation': False, - 'has_pipes_in_insulated_timber_floor': False, 'has_pipes_in_concrete_slab': False, 'has_boiler': True, - 'has_air_source_heat_pump': False, 'has_room_heaters': False, 'has_electric_storage_heaters': False, - 'has_warm_air': False, 'has_electric_underfloor_heating': False, 'has_electric_ceiling_heating': False, - 'has_community_scheme': False, 'has_ground_source_heat_pump': False, 'has_no_system_present': False, - 'has_portable_electric_heaters': False, 'has_water_source_heat_pump': False, 'has_electric_heat_pump': - False, - 'has_micro-cogeneration': False, 'has_solar_assisted_heat_pump': False, 'has_exhaust_source_heat_pump': - False, - 'has_community_heat_pump': False, 'has_hot-water-only': False, 'has_electric': False, 'has_mains_gas': - True, - 'has_wood_logs': False, 'has_coal': False, 'has_oil': False, 'has_wood_pellets': False, - 'has_anthracite': False, - 'has_dual_fuel_mineral_and_wood': False, 'has_smokeless_fuel': False, 'has_lpg': False, 'has_b30k': False, - 'has_mineral_and_wood': False, 'has_dual_fuel_appliance': False, 'has_assumed': False, - 'has_electricaire': False, - 'has_assumed_for_most_rooms': False, 'has_underfloor_heating': False - } - - self.main_fuel = { - 'original_description': 'mains gas (not community)', 'clean_description': 'Mains gas not community', - 'fuel_type': 'mains gas', 'tariff_type': None, 'is_community': False, - 'no_individual_heating_or_community_network': False, 'complex_fuel_type': None - } - - -@pytest.fixture -def p(): - return DummyProp() +from recommendations.optimiser.funding_optimiser import ( + build_heat_pump_paths, + run_optimizer, +) def test_build_heat_pump_paths(): eg1 = build_heat_pump_paths([], ["loft_insulation"]) - assert eg1 == [{'AND': ['loft_insulation', 'air_source_heat_pump']}] - eg2 = build_heat_pump_paths(["internal_wall_insulation", "external_wall_insulation"], ["loft_insulation"]) + eg2 = build_heat_pump_paths( + ["internal_wall_insulation", "external_wall_insulation"], + ["loft_insulation"], + ) - assert eg2 == [{'AND': ['internal_wall_insulation', 'loft_insulation', 'air_source_heat_pump']}, - {'AND': ['external_wall_insulation', 'loft_insulation', 'air_source_heat_pump']}] + assert eg2 == [ + {'AND': ['internal_wall_insulation', 'loft_insulation', 'air_source_heat_pump']}, + {'AND': ['external_wall_insulation', 'loft_insulation', 'air_source_heat_pump']}, + ] def test_run_optimizer_empty_input(): @@ -78,134 +28,154 @@ def test_run_optimizer_empty_input(): assert gain == 0.0 -def test_uses_gain_optimiser_when_budget_provided(monkeypatch): - captured_args = {} +def test_budget_and_target_are_passed_correctly(monkeypatch): + captured = {} - class FakeGainOptimiser: - def __init__(self, measures, max_cost, max_gain, allow_slack): - captured_args["measures"] = measures - captured_args["max_cost"] = max_cost - captured_args["max_gain"] = max_gain - captured_args["allow_slack"] = allow_slack - self.solution = [{"cost": 100}] + class FakeStrategicOptimiser: + def __init__( + self, + components, + budget=None, + target_gain=None, + allow_slack=False, + verbose=False, + ): + captured["components"] = components + captured["budget"] = budget + captured["target_gain"] = target_gain + captured["allow_slack"] = allow_slack + + self.solution = [{"cost": 100, "gain": 5}] + self.solution_cost = 100 self.solution_gain = 5 - def setup(self): - pass - def solve(self): pass monkeypatch.setattr( - "recommendations.optimiser.funding_optimiser.GainOptimiser", - FakeGainOptimiser + "recommendations.optimiser.funding_optimiser.StrategicOptimiser", + FakeStrategicOptimiser, ) - measures = [[{"cost": 100, "gain": 5}]] - solution, cost, gain = run_optimizer( - measures, + [[{"cost": 100, "gain": 5}]], budget=500, sub_target_gain=10, - allow_slack=True + allow_slack=True, ) - assert captured_args["max_cost"] == 500 - assert captured_args["max_gain"] == 10 - assert captured_args["allow_slack"] is True + assert captured["budget"] == 500 + assert captured["target_gain"] == 10 + assert captured["allow_slack"] is True + assert cost == 100 assert gain == 5 + assert solution == [{"cost": 100, "gain": 5}] -def test_sub_target_gain_zero_sets_max_gain_zero(monkeypatch): - captured_args = {} +def test_sub_target_gain_zero_is_passed_as_zero(monkeypatch): + captured = {} - class FakeGainOptimiser: - def __init__(self, measures, max_cost, max_gain, allow_slack): - captured_args["max_gain"] = max_gain + class FakeStrategicOptimiser: + def __init__( + self, + components, + budget=None, + target_gain=None, + allow_slack=False, + verbose=False, + ): + captured["target_gain"] = target_gain self.solution = [] - self.solution_gain = 0 - - def setup(self): - pass + self.solution_cost = 0.0 + self.solution_gain = 0.0 def solve(self): pass monkeypatch.setattr( - "recommendations.optimiser.funding_optimiser.GainOptimiser", - FakeGainOptimiser + "recommendations.optimiser.funding_optimiser.StrategicOptimiser", + FakeStrategicOptimiser, ) - measures = [[{"cost": 100, "gain": 5}]] - run_optimizer( - measures, + [[{"cost": 100, "gain": 5}]], budget=500, - sub_target_gain=0 + sub_target_gain=0, ) - assert captured_args["max_gain"] == 0 + assert captured["target_gain"] == 0 -def test_sub_target_gain_none_sets_max_gain_infinity(monkeypatch): - captured_args = {} +def test_sub_target_gain_none_becomes_infinity(monkeypatch): + captured = {} - class FakeGainOptimiser: - def __init__(self, measures, max_cost, max_gain, allow_slack): - captured_args["max_gain"] = max_gain + class FakeStrategicOptimiser: + def __init__( + self, + components, + budget=None, + target_gain=None, + allow_slack=False, + verbose=False, + ): + captured["target_gain"] = target_gain self.solution = [] - self.solution_gain = 0 - - def setup(self): - pass + self.solution_cost = 0.0 + self.solution_gain = 0.0 def solve(self): pass monkeypatch.setattr( - "recommendations.optimiser.funding_optimiser.GainOptimiser", - FakeGainOptimiser + "recommendations.optimiser.funding_optimiser.StrategicOptimiser", + FakeStrategicOptimiser, ) - measures = [[{"cost": 100, "gain": 5}]] - run_optimizer( - measures, + [[{"cost": 100, "gain": 5}]], budget=500, - sub_target_gain=None + sub_target_gain=None, ) - assert captured_args["max_gain"] == float("inf") + assert captured["target_gain"] == None -def test_uses_cost_optimiser_when_no_budget(monkeypatch): - captured_args = {} +def test_target_only_case(monkeypatch): + captured = {} - class FakeCostOptimiser: - def __init__(self, measures, min_gain): - captured_args["min_gain"] = min_gain - self.solution = [{"cost": 50}] + class FakeStrategicOptimiser: + def __init__( + self, + components, + budget=None, + target_gain=None, + allow_slack=False, + verbose=False, + ): + captured["budget"] = budget + captured["target_gain"] = target_gain + + self.solution = [{"cost": 50, "gain": 10}] + self.solution_cost = 50 self.solution_gain = 10 - def setup(self): - pass - def solve(self): pass monkeypatch.setattr( - "recommendations.optimiser.funding_optimiser.CostOptimiser", - FakeCostOptimiser + "recommendations.optimiser.funding_optimiser.StrategicOptimiser", + FakeStrategicOptimiser, ) - measures = [[{"cost": 50, "gain": 10}]] - solution, cost, gain = run_optimizer( - measures, - sub_target_gain=10 + [[{"cost": 50, "gain": 10}]], + sub_target_gain=10, ) - assert captured_args["min_gain"] == 10 + assert captured["budget"] is None + assert captured["target_gain"] == 10 + assert cost == 50 assert gain == 10 + assert solution == [{"cost": 50, "gain": 10}] diff --git a/recommendations/tests/test_recommendations.py b/recommendations/tests/test_recommendations.py index e3bcbb2f..2218cd16 100644 --- a/recommendations/tests/test_recommendations.py +++ b/recommendations/tests/test_recommendations.py @@ -373,7 +373,7 @@ def test_filter_phase_adjustment(input_data, expected): "sap_impact, limit, expected", [ (1.0, -4, True), # positive SAP not allowed - (0.0, -4, True), # zero not allowed + (0.0, -4, False), # zero is allowed (-1.0, -4, False), # valid range (-3.9, -4, False), # valid range (-4.0, -4, False), # exact lower bound allowed @@ -401,7 +401,7 @@ def test_adjust_ventilation_sap(sap_impact, limit, expected): ) == expected -def test_get_previous_phase_values_starting_phase(property_instance): +def test_get_previous_phase_values_phase_0_starting_phase_0(property_instance): result = Recommendations._get_previous_phase_values( rec_phase=0, starting_phase=0, @@ -411,6 +411,7 @@ def test_get_previous_phase_values_starting_phase(property_instance): assert result == { "sap": 65.0, + "sap_prediction": 65.0, "carbon": 2.4, "heat_demand": 284.0, } @@ -441,8 +442,8 @@ def test_get_previous_phase_values_single_rep(property_instance): def test_get_previous_phase_values_median(property_instance): impact_summary = [ - {"phase": 1, "representative": True, "sap": 70, "carbon": 2.0, "heat_demand": 250}, - {"phase": 1, "representative": True, "sap": 74, "carbon": 1.6, "heat_demand": 230}, + {"phase": 1, "representative": True, "sap": 70, "carbon": 2.0, "heat_demand": 250, "sap_prediction": 70}, + {"phase": 1, "representative": True, "sap": 74, "carbon": 1.6, "heat_demand": 230, "sap_prediction": 74}, ] result = Recommendations._get_previous_phase_values( @@ -1476,7 +1477,9 @@ def test_lighting_and_loft_adjustment_combined(property_instance, heat_demand_pr assert adjustments2 == [ {'recommendation_id': '0_phase=0', 'phase': 0, 'sap_adjustment': np.float64(1.7)}, - {'recommendation_id': '4_phase=2', 'phase': 2, 'sap_adjustment': np.float64(4.0)} + {'recommendation_id': '4_phase=2', 'phase': 2, 'sap_adjustment': np.float64(4.0)}, + {'recommendation_id': '5_phase=3', 'phase': 3, 'sap_adjustment': np.float64(1.0)}, + {'recommendation_id': '6_phase=3', 'phase': 3, 'sap_adjustment': np.float64(1.0000000000000027)} ] @@ -1499,7 +1502,8 @@ def test_mechanical_ventilation_sap_floor(property_instance): previous_phase_values=previous_phase_values, current_phase_values=current_phase_values, adjustments=adjustments, - property_instance=property_instance + property_instance=property_instance, + model_predicted_sap=0 ) ) @@ -1538,7 +1542,8 @@ def test_mechanical_ventilation_no_floor_adjustment(property_instance): previous_phase_values=previous_phase_values, current_phase_values=current_phase_values, adjustments=adjustments, - property_instance=property_instance + property_instance=property_instance, + model_predicted_sap=0 ) ) @@ -1570,7 +1575,8 @@ def test_mechanical_ventilation_exactly_one_no_adjustment(property_instance): previous_phase_values=previous_phase_values, current_phase_values=current_phase_values, adjustments=adjustments, - property_instance=property_instance + property_instance=property_instance, + model_predicted_sap=0 ) ) @@ -1578,3 +1584,182 @@ def test_mechanical_ventilation_exactly_one_no_adjustment(property_instance): assert updated_adjustments == [] assert updated_current["sap"] == 1.0 assert updated_impact["sap"] == -1.0 + + +def test_mechanical_ventilation_sap_zero_no_adjustment(property_instance): + # Test when SAP = 0 + rec = { + "type": "mechanical_ventilation", + "recommendation_id": "mv_test", + "phase": 1, + } + + previous_phase_values = {'phase': 0, 'representative': True, 'recommendation_id': '0_phase=0', + 'measure_type': 'flat_roof_insulation', 'sap': 68.0, 'carbon': np.float64(0.5), + 'heat_demand': np.float64(300.1), 'sap_prediction': np.float64(71.7)} + current_phase_values = {'sap': 68.0, 'carbon': np.float64(0.5), 'heat_demand': np.float64(307.0)} + property_phase_impact = {'sap': 0, 'carbon': 0, 'heat_demand': np.float64(-6.899999999999977)} + adjustments = [] + + updated_impact, updated_current, updated_adjustments = ( + Recommendations._apply_measure_specific_rules( + rec=rec, + property_phase_impact=property_phase_impact, + previous_phase_values=previous_phase_values, + current_phase_values=current_phase_values, + adjustments=adjustments, + property_instance=property_instance, + model_predicted_sap=0 + ) + ) + + # SAP is already at 0 → no adjustment expected + assert updated_adjustments == [] + assert updated_current["sap"] == 68.0 + assert updated_impact["sap"] == 0 + + +def test_mv_valid_negative_no_adjustment(property_instance): + rec = {"type": "mechanical_ventilation", "recommendation_id": "mv", "phase": 1} + + previous = {"sap": 70.0} + current = {"sap": 67.0} + impact = {"sap": -3.0, "carbon": 0, "heat_demand": 0} + adjustments = [] + + updated_impact, updated_current, updated_adjustments = ( + Recommendations._apply_measure_specific_rules( + rec, impact, previous, current, adjustments, property_instance, 0 + ) + ) + + assert updated_adjustments == [] + assert updated_current["sap"] == 67.0 + assert updated_impact["sap"] == -3.0 + + +def test_mv_zero_impact_allowed(property_instance): + rec = {"type": "mechanical_ventilation", "recommendation_id": "mv", "phase": 1} + + previous = {"sap": 68.0, "sap_prediction": 71.7} + current = {"sap": 68.0} + impact = {"sap": 0.0, "carbon": 0, "heat_demand": 0} + adjustments = [] + + updated_impact, updated_current, updated_adjustments = ( + Recommendations._apply_measure_specific_rules( + rec, impact, previous, current, adjustments, property_instance, 0 + ) + ) + + assert updated_adjustments == [] + assert updated_current["sap"] == 68.0 + assert updated_impact["sap"] == 0.0 + + +def test_mv_positive_impact_corrected(property_instance): + rec = {"type": "mechanical_ventilation", "recommendation_id": "mv", "phase": 1} + + previous = {"sap": 60.0} + current = {"sap": 61.0} + impact = {"sap": 1.0, "carbon": 0, "heat_demand": 0} + adjustments = [] + + updated_impact, updated_current, updated_adjustments = ( + Recommendations._apply_measure_specific_rules( + rec, impact, previous, current, adjustments, property_instance, 0 + ) + ) + + assert len(updated_adjustments) == 1 + assert updated_current["sap"] == previous["sap"] + updated_impact["sap"] + assert updated_impact["sap"] <= 0 + + +def test_mv_below_lower_bound_corrected(property_instance): + rec = {"type": "mechanical_ventilation", "recommendation_id": "mv", "phase": 1} + + previous = {"sap": 70.0} + current = {"sap": 64.0} + impact = {"sap": -6.0, "carbon": 0, "heat_demand": 0} + adjustments = [] + + updated_impact, updated_current, updated_adjustments = ( + Recommendations._apply_measure_specific_rules( + rec, impact, previous, current, adjustments, property_instance, 0 + ) + ) + + assert len(updated_adjustments) == 1 + assert updated_impact["sap"] >= -4 + + +def test_mv_floor_triggered(property_instance): + rec = {"type": "mechanical_ventilation", "recommendation_id": "mv", "phase": 1} + + previous = {"sap": 2.0} + current = {"sap": 0.5} + impact = {"sap": -1.5, "carbon": 0, "heat_demand": 0} + adjustments = [] + + updated_impact, updated_current, updated_adjustments = ( + Recommendations._apply_measure_specific_rules( + rec, impact, previous, current, adjustments, property_instance, 0 + ) + ) + + assert updated_current["sap"] == 1.0 + assert updated_adjustments[0]["sap_adjustment"] > 0 + + +def test_mv_exactly_one_no_floor(property_instance): + rec = {"type": "mechanical_ventilation", "recommendation_id": "mv", "phase": 1} + + previous = {"sap": 2.0} + current = {"sap": 1.0} + impact = {"sap": -1.0, "carbon": 0, "heat_demand": 0} + adjustments = [] + + updated_impact, updated_current, updated_adjustments = ( + Recommendations._apply_measure_specific_rules( + rec, impact, previous, current, adjustments, property_instance, 0 + ) + ) + + assert updated_adjustments == [] + assert updated_current["sap"] == 1.0 + + +def test_lighting_no_cap(property_instance): + rec = {"type": "low_energy_lighting", "recommendation_id": "led", "phase": 1, + "co2_equivalent_savings": 0} + + previous = {"sap": 60.0, "carbon": 2.0} + current = {"sap": 61.0, "carbon": 2.0} + impact = {"sap": 1.0, "carbon": 0, "heat_demand": 0} + adjustments = [] + + updated_impact, updated_current, updated_adjustments = ( + Recommendations._apply_measure_specific_rules( + rec, impact, previous, current, adjustments, property_instance, 0 + ) + ) + + assert updated_adjustments == [] + + +def test_filter_phase_adjustments(): + example_adjustments = [ + {'recommendation_id': '0_phase=0', 'phase': 0, 'sap_adjustment': np.float64(1.7)}, + {'recommendation_id': '4_phase=2', 'phase': 2, 'sap_adjustment': np.float64(4.0)}, + {'recommendation_id': '5_phase=3', 'phase': 3, 'sap_adjustment': np.float64(1.0)}, + {'recommendation_id': '6_phase=3', 'phase': 3, 'sap_adjustment': np.float64(1.0000000000000027)} + ] + + res = Recommendations._filter_phase_adjustment(example_adjustments) + + assert res == [ + {'recommendation_id': '0_phase=0', 'phase': 0, 'sap_adjustment': np.float64(1.7)}, + {'recommendation_id': '4_phase=2', 'phase': 2, 'sap_adjustment': np.float64(4.0)}, + {'recommendation_id': '6_phase=3', 'phase': 3, 'sap_adjustment': np.float64(1.0000000000000027)} + ] diff --git a/serverless.yml b/serverless.yml index f3def028..3dde5511 100644 --- a/serverless.yml +++ b/serverless.yml @@ -30,6 +30,8 @@ provider: GOOGLE_SOLAR_API_KEY: ${env:GOOGLE_SOLAR_API_KEY} ENGINE_SQS_URL: Ref: EngineQueue + # hardcode the categorisation queue for now as it's created in terraform + CATEGORISATION_SQS_URL: "https://sqs.eu-west-2.amazonaws.com/337213553626/categorisation-queue-dev" plugins: - serverless-python-requirements @@ -106,6 +108,7 @@ resources: - sqs:SendMessage Resource: - Fn::GetAtt: [ EngineQueue, Arn ] + - "arn:aws:sqs:eu-west-2:337213553626:categorisation-queue-dev" - Effect: Allow Action: - s3:GetObject diff --git a/sfr/principal_pitch/2_export_data.py b/sfr/principal_pitch/2_export_data.py index b62e51d7..b6a33ae1 100644 --- a/sfr/principal_pitch/2_export_data.py +++ b/sfr/principal_pitch/2_export_data.py @@ -13,7 +13,7 @@ from sqlalchemy.orm import sessionmaker from backend.app.db.connection import db_engine, db_read_session from backend.app.db.models.recommendations import ( Recommendation, - Plan, + PlanModel, PlanRecommendations, RecommendationMaterials, ) @@ -28,14 +28,16 @@ from sqlalchemy import func # PORTFOLIO_ID = 206 # SCENARIOS = [389] -PORTFOLIO_ID = 568 +PORTFOLIO_ID = 581 SCENARIOS = [ - 1059, + 1124 ] scenario_names = { - 1059: "EPC C - 10k budget", + 1124: "EPC C - Solar Focused", } +project_name = "WCHG EPC D rated properties" + def get_data(portfolio_id, scenario_ids): session = sessionmaker(bind=db_engine)() @@ -73,12 +75,12 @@ def get_data(portfolio_id, scenario_ids): # -------------------- latest_plans_subq = ( session.query( - Plan.scenario_id, - Plan.property_id, - func.max(Plan.created_at).label("latest_created_at"), + PlanModel.scenario_id, + PlanModel.property_id, + func.max(PlanModel.created_at).label("latest_created_at"), ) - .filter(Plan.scenario_id.in_(scenario_ids)) - .group_by(Plan.scenario_id, Plan.property_id) + .filter(PlanModel.scenario_id.in_(scenario_ids)) + .group_by(PlanModel.scenario_id, PlanModel.property_id) .subquery() ) @@ -87,12 +89,12 @@ def get_data(portfolio_id, scenario_ids): # ).all() plans_query = ( - session.query(Plan) + session.query(PlanModel) .join( latest_plans_subq, - (Plan.scenario_id == latest_plans_subq.c.scenario_id) - & (Plan.property_id == latest_plans_subq.c.property_id) - & (Plan.created_at == latest_plans_subq.c.latest_created_at), + (PlanModel.scenario_id == latest_plans_subq.c.scenario_id) + & (PlanModel.property_id == latest_plans_subq.c.property_id) + & (PlanModel.created_at == latest_plans_subq.c.latest_created_at), ) .all() ) @@ -108,7 +110,7 @@ def get_data(portfolio_id, scenario_ids): # ) plans_data = [ - {col.name: getattr(plan, col.name) for col in Plan.__table__.columns} + {col.name: getattr(plan, col.name) for col in PlanModel.__table__.columns} for plan in plans_query ] @@ -118,12 +120,14 @@ def get_data(portfolio_id, scenario_ids): # Recommendations (NO materials yet) # -------------------- recommendations_query = ( - session.query(Recommendation, Plan.scenario_id, PlanRecommendations.plan_id) + session.query( + Recommendation, PlanModel.scenario_id, PlanRecommendations.plan_id + ) .join( PlanRecommendations, Recommendation.id == PlanRecommendations.recommendation_id, ) - .join(Plan, Plan.id == PlanRecommendations.plan_id) + .join(PlanModel, PlanModel.id == PlanRecommendations.plan_id) .filter( PlanRecommendations.plan_id.in_(plan_ids), Recommendation.default.is_(True), @@ -284,6 +288,8 @@ for scenario_id in SCENARIOS: "current_sap_points", "total_floor_area", "number_of_rooms", + "lodgement_date", + "is_expired", "id", ] ] @@ -301,7 +307,58 @@ for scenario_id in SCENARIOS: ) df["uprn"] = df["uprn"].astype(str) + relevant_plans = plans_df[plans_df["scenario_id"] == scenario_id] + df2 = df.merge( + relevant_plans[["property_id", "post_sap_points", "post_epc_rating"]], + how="left", + on="property_id", + suffixes=("", "_plan"), + ) + print(df2["predicted_post_works_epc"].value_counts()) + print(df2["post_epc_rating"].value_counts()) + + z = df2[ + (df2["predicted_post_works_epc"] != "D") + & (df2["post_epc_rating"].astype(str) == "Epc.D") + ] + + df2["predicted_post_works_epc"].value_counts() + df2["post_epc_rating"].astype(str).value_counts() + + df2[df2["total_retrofit_cost"] > 0].shape + + getting_works = df[df["total_retrofit_cost"] > 0] + getting_works["predicted_post_works_epc"].value_counts() + + 32565 / getting_works.shape[0] + + df[df["predicted_post_works_sap"] == ""] + + # Expected columns list + expected_columns = [ + "suspended_floor_insulation", + "solid_floor_insulation", + "external_wall_insulation", + "internal_wall_insulation", + "cavity_wall_insulation", + "loft_insulation", + "flat_roof_insulation", + "room_roof_insulation", + "secondary_glazing", + "double_glazing", + "solar_pv", + "high_heat_retention_storage_heaters", + "air_source_heat_pump", + "boiler_upgrade", + "roomstat_programmer_trvs", + "time_temperature_zone_control", + ] + # Add missing columns with default values + for col in expected_columns: + if col not in df.columns: + df[col] = "" + # Create excel to store to - filename = f"{scenario_names[scenario_id]} - 20250113 final.xlsx" + filename = f"{scenario_names[scenario_id]} - {project_name}.xlsx" with pd.ExcelWriter(filename) as writer: df.to_excel(writer, sheet_name="properties", index=False) diff --git a/test.requirements.txt b/test.requirements.txt index d31371a6..d8b8b777 100644 --- a/test.requirements.txt +++ b/test.requirements.txt @@ -2,4 +2,6 @@ pytest mock pytest-cov pytest-mock -dotenv \ No newline at end of file +dotenv +psycopg[binary] +pytest-postgresql \ No newline at end of file diff --git a/utils/logger.py b/utils/logger.py index d643f36a..45370d3d 100644 --- a/utils/logger.py +++ b/utils/logger.py @@ -1,7 +1,13 @@ import logging +from os import PathLike +from typing import Optional, Union -def setup_logger(log_file=None, level=logging.INFO, overwrite_handler=False): +def setup_logger( + log_file: Optional[Union[str, PathLike[str]]] = None, + level: int = logging.INFO, + overwrite_handler: bool = False, +) -> logging.Logger: # Create a logger and set the logging level logger = logging.getLogger() logger.setLevel(level) diff --git a/utils/s3.py b/utils/s3.py index 2e67d4f0..b3a96dba 100644 --- a/utils/s3.py +++ b/utils/s3.py @@ -3,12 +3,62 @@ import boto3 import csv import pandas as pd from io import BytesIO, StringIO +from urllib.parse import unquote from utils.logger import setup_logger from botocore.exceptions import NoCredentialsError, PartialCredentialsError logger = setup_logger() +def parse_s3_uri(s3_uri: str) -> tuple[str, str]: + """ + Parse S3 URI to extract bucket and key. + + Supports two formats: + 1. S3 URI format: s3://bucket/key + 2. AWS console URL format with query parameters + """ + logger.info("Parsing S3 URI") + + try: + # Check if it's an S3 URI format + if s3_uri.startswith("s3://"): + parts = s3_uri[5:].split("/", 1) + if len(parts) < 2: + raise ValueError("S3 URI must include both bucket and key") + bucket = parts[0] + key = parts[1] + logger.info(f"Extracted bucket: {bucket}, key: {key}") + return bucket, key + + # Otherwise, treat as AWS console URL + logger.info("Parsing as AWS console URL") + + # Split base URL and query string + if "?" not in s3_uri: + raise ValueError("No query string found") + + base, query = s3_uri.split("?", 1) + + # Extract bucket from base URL + if "/s3/object/" not in base: + raise ValueError("No '/s3/object/' found in URL path") + + path_parts = base.split("/s3/object/") + bucket = path_parts[1] + logger.info(f"Extracted bucket: {bucket}") + + # Extract prefix from query parameters + params = dict(item.split("=") for item in query.split("&") if "=" in item) + key = unquote(params.get("prefix", "")) + logger.info(f"Extracted key: {key}") + + return bucket, key + except Exception as e: + logger.error(f"Error parsing S3 URI: {type(e).__name__}: {e}") + raise ValueError(f"Could not parse S3 URI") from e + + def read_from_s3(bucket_name, s3_file_name): """ Read an object from s3. Decoding of the data is left for outside of this function @@ -17,11 +67,11 @@ def read_from_s3(bucket_name, s3_file_name): :param s3_file_name: The file name to use for the saved data in S3 """ # Initialize a session using Amazon S3 - s3 = boto3.resource('s3') + s3 = boto3.resource("s3") # Get the MessagePack data from S3 obj = s3.Object(bucket_name, s3_file_name) - data = obj.get()['Body'].read() + data = obj.get()["Body"].read() return data @@ -36,7 +86,7 @@ def save_data_to_s3(data, bucket_name, s3_file_name): """ # Ensure you have AWS credentials set up - either via environment variables, AWS CLI, or IAM roles try: - s3 = boto3.client('s3') + s3 = boto3.client("s3") except NoCredentialsError: print("Credentials not available.") return @@ -46,12 +96,12 @@ def save_data_to_s3(data, bucket_name, s3_file_name): try: s3.put_object(Bucket=bucket_name, Key=s3_file_name, Body=data) - print(f'Successfully uploaded data to {bucket_name}/{s3_file_name}') + print(f"Successfully uploaded data to {bucket_name}/{s3_file_name}") except Exception as e: - print(f'Failed to upload data to {bucket_name}/{s3_file_name}: {str(e)}') + print(f"Failed to upload data to {bucket_name}/{s3_file_name}: {str(e)}") -def read_io_from_s3(bucket_name, file_key): +def read_io_from_s3(bucket_name: str, file_key: str) -> BytesIO: """ Read a file from S3 into a BytesIO object. This can be used by other methods to parse the response @@ -61,13 +111,13 @@ def read_io_from_s3(bucket_name, file_key): :param file_key: The file name of the shapefile in S3 :return: Io file to be parsed by another method """ - client = boto3.client('s3') + client = boto3.client("s3") # Get the Parquet file from S3 response = client.get_object(Bucket=bucket_name, Key=file_key) # Read the file into an io object - buffer = BytesIO(response['Body'].read()) + buffer = BytesIO(response["Body"].read()) return buffer @@ -86,7 +136,7 @@ def save_dataframe_to_s3_parquet(df, bucket_name, file_key): df.to_parquet(parquet_buffer) # Create the boto3 client - client = boto3.client('s3') + client = boto3.client("s3") # Upload the Parquet file to S3 client.put_object(Bucket=bucket_name, Key=file_key, Body=parquet_buffer.getvalue()) @@ -102,15 +152,14 @@ def read_dataframe_from_s3_parquet(bucket_name, file_key): """ if bucket_name is None: - raise ValueError("Bucket name is None when trying to read dataframe from parquet") + raise ValueError( + "Bucket name is None when trying to read dataframe from parquet" + ) if not file_key.endswith(".parquet"): raise ValueError("This file doesn't look like a parquet file") - parquet_buffer = read_io_from_s3( - bucket_name=bucket_name, - file_key=file_key - ) + parquet_buffer = read_io_from_s3(bucket_name=bucket_name, file_key=file_key) df = pd.read_parquet(parquet_buffer) @@ -130,7 +179,7 @@ def save_csv_to_s3(dataframe, bucket_name, file_name): bool: True if the file was successfully saved, False otherwise. """ # Initialize S3 client - s3 = boto3.client('s3') + s3 = boto3.client("s3") # Create an in-memory text stream csv_buffer = StringIO() @@ -159,7 +208,7 @@ def save_pickle_to_s3(data, bucket_name, s3_file_name): try: serialized_data = pickle.dumps(data) except Exception as e: - print(f'Failed to serialize data: {str(e)}') + print(f"Failed to serialize data: {str(e)}") return # Use save_data_to_s3 function to upload the serialized data to S3 @@ -175,9 +224,9 @@ def read_pickle_from_s3(bucket_name, s3_file_name): :return: The data read from the pickle file """ try: - s3 = boto3.client('s3') + s3 = boto3.client("s3") s3_response = s3.get_object(Bucket=bucket_name, Key=s3_file_name) - serialized_data = s3_response['Body'].read() + serialized_data = s3_response["Body"].read() except NoCredentialsError: logger.errpr("Credentials not available.") return None @@ -185,20 +234,24 @@ def read_pickle_from_s3(bucket_name, s3_file_name): logger.errpr("Incomplete credentials provided.") return None except Exception as e: - logger.error(f'Failed to download data from {bucket_name}/{s3_file_name}: {str(e)}') + logger.error( + f"Failed to download data from {bucket_name}/{s3_file_name}: {str(e)}" + ) return None # Deserialize data from pickle format try: data = pickle.loads(serialized_data) except Exception as e: - logger.error(f'Failed to deserialize data: {str(e)}') + logger.error(f"Failed to deserialize data: {str(e)}") return None return data -def read_excel_from_s3(bucket_name, file_key, header_row, drop_all_na=True, sheet_name=None): +def read_excel_from_s3( + bucket_name, file_key, header_row, drop_all_na=True, sheet_name=None +): """ Read an Excel file from an S3 bucket and return it as a pandas DataFrame. @@ -222,7 +275,7 @@ def read_excel_from_s3(bucket_name, file_key, header_row, drop_all_na=True, shee # Drop columns where all values are NaN if drop_all_na: - df.dropna(axis=1, how='all', inplace=True) + df.dropna(axis=1, how="all", inplace=True) # Reset index if the first column is just an index or entirely NaN df.reset_index(drop=True, inplace=True) @@ -254,7 +307,7 @@ def save_excel_to_s3(df, bucket_name, file_key): # Initialize a session using boto3 session = boto3.session.Session() - s3 = session.resource('s3') + s3 = session.resource("s3") # Upload the Excel file from the buffer to S3 bucket = s3.Bucket(bucket_name) @@ -264,17 +317,19 @@ def save_excel_to_s3(df, bucket_name, file_key): def read_csv_from_s3(bucket_name, filepath): - logger.info(f"Reading CSV file from S3 bucket '{bucket_name}' with key '{filepath}'") - s3 = boto3.client('s3') + logger.info( + f"Reading CSV file from S3 bucket '{bucket_name}' with key '{filepath}'" + ) + s3 = boto3.client("s3") # Get the object from s3 s3_object = s3.get_object(Bucket=bucket_name, Key=filepath) # Read the CSV body from the s3 object - body = s3_object['Body'].read() + body = s3_object["Body"].read() # Use StringIO to create a file-like object from the string - csv_data = StringIO(body.decode('utf-8')) + csv_data = StringIO(body.decode("utf-8")) # Use csv library to read it into a list of dictionaries reader = csv.DictReader(csv_data) @@ -292,14 +347,16 @@ def list_files_in_s3_folder(bucket_name, folder_name): :return: A list of file keys in the specified S3 folder. """ try: - s3 = boto3.client('s3') + s3 = boto3.client("s3") response = s3.list_objects_v2(Bucket=bucket_name, Prefix=folder_name) - if 'Contents' not in response: - logger.info(f"No files found in folder {folder_name} in bucket {bucket_name}.") + if "Contents" not in response: + logger.info( + f"No files found in folder {folder_name} in bucket {bucket_name}." + ) return [] - file_keys = [content['Key'] for content in response['Contents']] + file_keys = [content["Key"] for content in response["Contents"]] return file_keys except NoCredentialsError: @@ -309,7 +366,9 @@ def list_files_in_s3_folder(bucket_name, folder_name): logger.error("Incomplete credentials provided.") return [] except Exception as e: - logger.error(f'Failed to list files in folder {folder_name} in bucket {bucket_name}: {str(e)}') + logger.error( + f"Failed to list files in folder {folder_name} in bucket {bucket_name}: {str(e)}" + ) return [] @@ -335,22 +394,30 @@ def list_files_and_subfolders_in_s3_folder(bucket_name, folder_name): """ # For this function, folder_name should end with a forward slash - if not folder_name.endswith('/'): - folder_name += '/' + if not folder_name.endswith("/"): + folder_name += "/" try: - s3 = boto3.client('s3') - response = s3.list_objects_v2(Bucket=bucket_name, Prefix=folder_name, Delimiter='/') + s3 = boto3.client("s3") + response = s3.list_objects_v2( + Bucket=bucket_name, Prefix=folder_name, Delimiter="/" + ) items = [] # Add files to the list - if 'Contents' in response: - items.extend([content['Key'] for content in response['Contents'] if content['Key'] != folder_name]) + if "Contents" in response: + items.extend( + [ + content["Key"] + for content in response["Contents"] + if content["Key"] != folder_name + ] + ) # Add immediate subfolders to the list - if 'CommonPrefixes' in response: - items.extend([prefix['Prefix'] for prefix in response['CommonPrefixes']]) + if "CommonPrefixes" in response: + items.extend([prefix["Prefix"] for prefix in response["CommonPrefixes"]]) return items @@ -361,7 +428,9 @@ def list_files_and_subfolders_in_s3_folder(bucket_name, folder_name): logger.error("Incomplete credentials provided.") return [] except Exception as e: - logger.error(f'Failed to list files and subfolders in folder {folder_name} in bucket {bucket_name}: {str(e)}') + logger.error( + f"Failed to list files and subfolders in folder {folder_name} in bucket {bucket_name}: {str(e)}" + ) return [] @@ -374,15 +443,21 @@ def list_xmls_in_s3_folder(bucket_name, folder_name): :return: A list of XML file keys in the specified S3 folder. """ try: - s3 = boto3.client('s3') + s3 = boto3.client("s3") response = s3.list_objects_v2(Bucket=bucket_name, Prefix=folder_name) - if 'Contents' not in response: - logger.info(f"No files found in folder {folder_name} in bucket {bucket_name}.") + if "Contents" not in response: + logger.info( + f"No files found in folder {folder_name} in bucket {bucket_name}." + ) return [] # Filter XML files - xml_files = [content['Key'] for content in response['Contents'] if content['Key'].endswith('.xml')] + xml_files = [ + content["Key"] + for content in response["Contents"] + if content["Key"].endswith(".xml") + ] return xml_files except NoCredentialsError: @@ -392,5 +467,7 @@ def list_xmls_in_s3_folder(bucket_name, folder_name): logger.error("Incomplete credentials provided.") return [] except Exception as e: - logger.error(f'Failed to list XML files in folder {folder_name} in bucket {bucket_name}: {str(e)}') + logger.error( + f"Failed to list XML files in folder {folder_name} in bucket {bucket_name}: {str(e)}" + ) return []