diff --git a/.devcontainer/asset_list/docker-compose.yml b/.devcontainer/asset_list/docker-compose.yml index 67b27444..06e4124d 100644 --- a/.devcontainer/asset_list/docker-compose.yml +++ b/.devcontainer/asset_list/docker-compose.yml @@ -4,11 +4,11 @@ services: model-sal: user: "${UID}:${GID}" build: - context: .. - dockerfile: .devcontainer/Dockerfile + context: ../.. + dockerfile: .devcontainer/asset_list/Dockerfile command: sleep infinity volumes: - - ..:/workspaces/model + - ../../:/workspaces/model networks: - model-net diff --git a/.devcontainer/asset_list/requirements.txt b/.devcontainer/asset_list/requirements.txt index cfab95ec..0640f2c9 100644 --- a/.devcontainer/asset_list/requirements.txt +++ b/.devcontainer/asset_list/requirements.txt @@ -21,3 +21,4 @@ pydantic>=1.10.7,<2 sqlmodel # Formatting black==26.1.0 +dotenv diff --git a/.github/workflows/_build_image.yml b/.github/workflows/_build_image.yml new file mode 100644 index 00000000..6b6c4994 --- /dev/null +++ b/.github/workflows/_build_image.yml @@ -0,0 +1,78 @@ +name: Build Docker image + +on: + workflow_call: + inputs: + ecr_repo: + required: true + type: string + dockerfile_path: + required: true + type: string + build_context: + required: false + default: "." + type: string + + outputs: + image_digest: + description: "Pushed image digest" + value: ${{ jobs.build.outputs.image_digest }} + ecr_repo_url: + description: "ECR repository URL" + value: ${{ jobs.build.outputs.ecr_repo_url }} + + secrets: + AWS_ACCESS_KEY_ID: + required: true + AWS_SECRET_ACCESS_KEY: + required: true + AWS_REGION: + required: true + +jobs: + build: + runs-on: ubuntu-latest + + outputs: + image_digest: ${{ steps.digest.outputs.image_digest }} + ecr_repo_url: ${{ steps.repo.outputs.ecr_repo_url }} + + steps: + - uses: actions/checkout@v4 + + - uses: aws-actions/configure-aws-credentials@v4 + with: + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws-region: ${{ secrets.AWS_REGION }} + + - uses: aws-actions/amazon-ecr-login@v2 + + - name: Resolve ECR repo URL + id: repo + run: | + AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text) + + ECR_REPO_URL="${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/${{ inputs.ecr_repo }}" + + echo "Resolved ECR repo URL (local var):" + echo "$ECR_REPO_URL" + + echo "ecr_repo_url=$ECR_REPO_URL" >> "$GITHUB_OUTPUT" + + - name: Build & push image + run: | + IMAGE_URI="${{ steps.repo.outputs.ecr_repo_url }}:${GITHUB_SHA}" + docker build -f ${{ inputs.dockerfile_path }} -t $IMAGE_URI ${{ inputs.build_context }} + docker push $IMAGE_URI + + - name: Resolve image digest + id: digest + run: | + DIGEST=$(aws ecr describe-images \ + --repository-name ${{ inputs.ecr_repo }} \ + --image-ids imageTag=${GITHUB_SHA} \ + --query 'imageDetails[0].imageDigest' \ + --output text) + echo "image_digest=$DIGEST" >> "$GITHUB_OUTPUT" diff --git a/.github/workflows/_deploy_lambda.yml b/.github/workflows/_deploy_lambda.yml new file mode 100644 index 00000000..bff106c5 --- /dev/null +++ b/.github/workflows/_deploy_lambda.yml @@ -0,0 +1,91 @@ +name: Deploy Lambda (Terraform) + +on: + workflow_call: + inputs: + lambda_name: + required: true + type: string + + lambda_path: + required: true + type: string + + stage: + required: true + type: string + + ecr_repo: + required: true + type: string + + image_digest: + required: true + type: string + + secrets: + AWS_ACCESS_KEY_ID: + required: true + AWS_SECRET_ACCESS_KEY: + required: true + AWS_REGION: + required: true + +jobs: + deploy: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Debug inputs + run: | + echo "lambda_name=${{ inputs.lambda_name }}" + echo "lambda_path=${{ inputs.lambda_path }}" + echo "stage=${{ inputs.stage }}" + echo "ecr_repo_url=${{ inputs.ecr_repo_url }}" + echo "image_digest=${{ inputs.image_digest }}" + + + - uses: aws-actions/configure-aws-credentials@v4 + with: + aws-access-key-id: ${{ secrets.AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.AWS_SECRET_ACCESS_KEY }} + aws-region: ${{ secrets.AWS_REGION }} + + - uses: hashicorp/setup-terraform@v3 + + - uses: aws-actions/amazon-ecr-login@v2 + + - name: Resolve ECR repo URL + id: repo + env: + AWS_REGION: ${{ secrets.AWS_REGION }} + run: | + AWS_ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text) + ECR_REPO_URL="${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/${{ inputs.ecr_repo }}" + echo "ecr_repo_url=$ECR_REPO_URL" >> "$GITHUB_OUTPUT" + + - name: Terraform Init + working-directory: ${{ inputs.lambda_path }} + run: terraform init -reconfigure + + - name: Terraform Workspace + working-directory: ${{ inputs.lambda_path }} + run: | + terraform workspace select ${{ inputs.stage }} \ + || terraform workspace new ${{ inputs.stage }} + + - name: Terraform Plan + working-directory: ${{ inputs.lambda_path }} + run: | + terraform plan \ + -var="stage=${{ inputs.stage }}" \ + -var="lambda_name=${{ inputs.lambda_name }}" \ + -var="ecr_repo_url=${{ steps.repo.outputs.ecr_repo_url }}" \ + -var="image_digest=${{ inputs.image_digest }}" \ + -out=lambdaplan + + - name: Terraform Apply + working-directory: ${{ inputs.lambda_path }} + run: terraform apply -auto-approve lambdaplan diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index a7aef225..41a551c4 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -1,80 +1,98 @@ -name: Deploy terraform stack +name: Deploy infrastructure on: push: branches: - - dev - - prod + - "**" jobs: - deploy: + determine_stage: runs-on: ubuntu-latest + outputs: + stage: ${{ steps.set-stage.outputs.stage }} + steps: - - name: Checkout - uses: actions/checkout@v2 - - - name: Setup AWS credentials file + - name: Determine stage from branch + id: set-stage + shell: bash run: | - mkdir -p ~/.aws - echo "[DevAdmin]" > ~/.aws/credentials - echo "aws_access_key_id = ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}" >> ~/.aws/credentials - echo "aws_secret_access_key = ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}" >> ~/.aws/credentials - echo "[ProdAdmin]" >> ~/.aws/credentials - echo "aws_access_key_id = ${{ secrets.PROD_AWS_ACCESS_KEY_ID }}" >> ~/.aws/credentials - echo "aws_secret_access_key = ${{ secrets.PROD_AWS_SECRET_ACCESS_KEY }}" >> ~/.aws/credentials + BRANCH="${GITHUB_REF_NAME}" - - name: Setup AWS config file - run: | - echo "[profile DevAdmin]" > ~/.aws/config - echo "region = eu-west-2" >> ~/.aws/config - echo "[profile ProdAdmin]" >> ~/.aws/config - echo "region = eu-west-2" >> ~/.aws/config + if [[ "$BRANCH" == "prod" ]]; then + echo "stage=prod" >> "$GITHUB_OUTPUT" - - name: Setup Terraform - uses: hashicorp/setup-terraform@v1 - with: - terraform_version: 1.5.2 + elif [[ "$BRANCH" == "dev" ]]; then + echo "stage=dev" >> "$GITHUB_OUTPUT" - - name: Configure AWS credentials (DevAdmin) - uses: aws-actions/configure-aws-credentials@v1 + else + echo "stage=dev" >> "$GITHUB_OUTPUT" + fi + + # ============================================================ + # 1️⃣ Shared Terraform (infra) + # ============================================================ + shared_terraform: + needs: determine_stage + runs-on: ubuntu-latest + env: + STAGE: ${{ needs.determine_stage.outputs.stage }} + + steps: + - uses: actions/checkout@v4 + + - uses: aws-actions/configure-aws-credentials@v4 with: aws-access-key-id: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} aws-secret-access-key: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} - aws-region: eu-west-2 - env: - AWS_PROFILE: "DevAdmin" + aws-region: ${{ secrets.DEV_AWS_REGION }} + + - uses: hashicorp/setup-terraform@v3 - name: Terraform Init - run: cd infrastructure/terraform && terraform init + working-directory: infrastructure/terraform/shared + run: terraform init -reconfigure - name: Terraform Workspace - run: | - BRANCH_NAME=$(echo "${{ github.ref }}" | sed -e "s/^refs\/heads\///") - cd infrastructure/terraform - terraform workspace select ${BRANCH_NAME} || terraform workspace new ${BRANCH_NAME} + working-directory: infrastructure/terraform/shared + run: terraform workspace select ${STAGE} || terraform workspace new ${STAGE} - name: Terraform Plan - run: | - BRANCH_NAME=$(echo "${{ github.ref }}" | sed -e "s/^refs\/heads\///") - cd infrastructure/terraform && terraform plan -var-file=${BRANCH_NAME}.tfvars + working-directory: infrastructure/terraform/shared + run: terraform plan -var-file=${STAGE}.tfvars -out=tfplan - - name: Deploy to Dev - if: github.ref == 'refs/heads/dev' - run: cd infrastructure/terraform && terraform apply -var-file=dev.tfvars -auto-approve - env: - name: dev + - name: Terraform Apply + if: env.STAGE == 'prod' + working-directory: infrastructure/terraform/shared + run: terraform apply -auto-approve tfplan - - name: Configure AWS credentials (ProdAdmin) - uses: aws-actions/configure-aws-credentials@v1 - with: - aws-access-key-id: ${{ secrets.PROD_AWS_ACCESS_KEY_ID }} - aws-secret-access-key: ${{ secrets.PROD_AWS_SECRET_ACCESS_KEY }} - aws-region: eu-west-2 - env: - AWS_PROFILE: "ProdAdmin" + # ============================================================ + # 2️⃣ Build Address 2 UPRN image and Push + # ============================================================ + address2uprn_image: + needs: [determine_stage, shared_terraform] + uses: ./.github/workflows/_build_image.yml + with: + ecr_repo: address2uprn-${{ needs.determine_stage.outputs.stage }} + dockerfile_path: backend/address2UPRN/Dockerfile + build_context: backend/address2UPRN + secrets: + AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} + AWS_REGION: ${{ secrets.DEV_AWS_REGION }} - - name: Deploy to Prod - if: github.ref == 'refs/heads/prod' - run: cd infrastructure/terraform && terraform apply -var-file=prod.tfvars -auto-approve - env: - name: prod + # ============================================================ + # 3️⃣ Deploy Address 2 UPRN Lambda + # ============================================================ + address2uprn_lambda: + needs: [address2uprn_image, determine_stage] + uses: ./.github/workflows/_deploy_lambda.yml + with: + lambda_name: address2uprn + lambda_path: infrastructure/terraform/lambda/address2UPRN + stage: ${{ needs.determine_stage.outputs.stage }} + ecr_repo: address2uprn-${{ needs.determine_stage.outputs.stage }} + image_digest: ${{ needs.address2uprn_image.outputs.image_digest }} + secrets: + AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} + AWS_REGION: ${{ secrets.DEV_AWS_REGION }} diff --git a/asset_list/app.py b/asset_list/app.py index 9907a609..b46254f9 100644 --- a/asset_list/app.py +++ b/asset_list/app.py @@ -12,23 +12,34 @@ from asset_list.utils import get_data from dotenv import load_dotenv from backend.SearchEpc import SearchEpc + load_dotenv(dotenv_path="backend/.env") -EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN", "a2Nvbm5rb3dsZXNzYXJAZ21haWwuY29tOjY5MGJiMWM0NmIyOGI5ZDUxYzAxMzQzYzNiZGNlZGJjZDNmODQwMzA=") +EPC_AUTH_TOKEN = os.getenv( + "EPC_AUTH_TOKEN", +) -def extract_address1(asset_list, full_address_col, postcode_col, method="first_two_words"): +def extract_address1( + asset_list, full_address_col, postcode_col, method="first_two_words" +): if method == "first_two_words": - asset_list["address1_extracted"] = asset_list[full_address_col].str.split(" ").str[:2].str.join(" ") + asset_list["address1_extracted"] = ( + asset_list[full_address_col].str.split(" ").str[:2].str.join(" ") + ) return asset_list if method == "first_word": - asset_list["address1_extracted"] = asset_list[full_address_col].str.split(" ").str[0] + asset_list["address1_extracted"] = ( + asset_list[full_address_col].str.split(" ").str[0] + ) return asset_list if method == "house_number_extraction": asset_list["address1_extracted"] = asset_list.apply( - lambda x: SearchEpc.get_house_number(address=x[full_address_col], postcode=x[postcode_col]), - axis=1 + lambda x: SearchEpc.get_house_number( + address=x[full_address_col], postcode=x[postcode_col] + ), + axis=1, ) return asset_list @@ -57,15 +68,11 @@ def app(): EPC recommendations Property UPRN """ -<<<<<<< HEAD - data_folder = ("/workspaces/model/asset_list") - data_filename = "assets.xlsx" -======= data_folder = "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Hackney" data_filename = "Domna SHF Wave 3 (3).xlsx" sheet_name = "Domna Wave 3" - postcode_column = 'Postcode' + postcode_column = "Postcode" address1_column = "Address 1" address1_method = None fulladdress_column = None @@ -96,15 +103,16 @@ def app(): landlord_block_reference = None # Peabody data for cleaning - data_folder = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting " - "Project/data_validation") + data_folder = ( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting " + "Project/data_validation" + ) data_filename = "to_standardise_uprns.xlsx" ->>>>>>> 3874da6177cbcc37f7a488bec0a06e387906653c sheet_name = "Sheet1" - postcode_column = 'Postcode' + postcode_column = "Postcode" address1_column = None - address1_method = 'house_number_extraction' - fulladdress_column = 'Address' + address1_method = "house_number_extraction" + fulladdress_column = "Address" address_cols_to_concat = None missing_postcodes_method = None landlord_year_built = None @@ -155,49 +163,62 @@ def app(): landlord_existing_pv=landlord_existing_pv, landlord_sap=landlord_sap, landlord_block_reference=landlord_block_reference, - phase=phase + phase=phase, ) asset_list.init_standardise() # We produce the new maps, which can be saved for future useage new_property_type_map = { - k: v for k, v in ( - asset_list.variable_mappings[asset_list.landlord_property_type] if - asset_list.landlord_property_type else {} + k: v + for k, v in ( + asset_list.variable_mappings[asset_list.landlord_property_type] + if asset_list.landlord_property_type + else {} ).items() if k not in PROPERTY_MAPPING } new_built_form_map = { - k: v for k, v in ( - asset_list.variable_mappings[asset_list.landlord_built_form] if - asset_list.landlord_built_form else {} + k: v + for k, v in ( + asset_list.variable_mappings[asset_list.landlord_built_form] + if asset_list.landlord_built_form + else {} ).items() if k not in BUILT_FORM_MAPPINGS } new_wall_map = { - k: v for k, v in ( - asset_list.variable_mappings[asset_list.landlord_wall_construction] if - asset_list.landlord_wall_construction else {} + k: v + for k, v in ( + asset_list.variable_mappings[asset_list.landlord_wall_construction] + if asset_list.landlord_wall_construction + else {} ).items() if k not in WALL_CONSTRUCTION_MAPPINGS } new_heating_map = { - k: v for k, v in ( - asset_list.variable_mappings[asset_list.landlord_heating_system] if - asset_list.landlord_heating_system else {} + k: v + for k, v in ( + asset_list.variable_mappings[asset_list.landlord_heating_system] + if asset_list.landlord_heating_system + else {} ).items() if k not in HEATING_MAPPINGS } new_existing_pv_map = { - k: v for k, v in ( - asset_list.variable_mappings[asset_list.landlord_existing_pv] if asset_list.landlord_existing_pv else {} + k: v + for k, v in ( + asset_list.variable_mappings[asset_list.landlord_existing_pv] + if asset_list.landlord_existing_pv + else {} ).items() if k not in EXISTING_PV_MAPPINGS } new_roof_construction_map = { - k: v for k, v in ( - asset_list.variable_mappings[asset_list.landlord_roof_construction] if - asset_list.landlord_roof_construction else {} + k: v + for k, v in ( + asset_list.variable_mappings[asset_list.landlord_roof_construction] + if asset_list.landlord_roof_construction + else {} ).items() if k not in ROOF_CONSTRUCTION_MAPPINGS } @@ -211,7 +232,7 @@ def app(): outcomes_address=outcomes_address, outcomes_postcode=outcomes_postcode, outcomes_houseno=outcomes_houseno, - outcomes_id=outcomes_id + outcomes_id=outcomes_id, ) asset_list.flag_survey_master( @@ -245,14 +266,16 @@ def app(): skip = max(chunk_indexes) if any(x in folder_contents for x in downloaded_files): - skip = max([i for i in chunk_indexes if filename.format(i=i) in folder_contents]) + skip = max( + [i for i in chunk_indexes if filename.format(i=i) in folder_contents] + ) for i in range(0, len(asset_list.standardised_asset_list), chunk_size): print(f"Processing chunk {i} to {i + chunk_size}") if skip is not None and not force_retrieve_data: if i <= skip: continue - chunk = asset_list.standardised_asset_list[i:i + chunk_size] + chunk = asset_list.standardised_asset_list[i : i + chunk_size] epc_data_chunk, errors_chunk, no_epc_chunk = get_data( df=chunk, row_id_name=asset_list.DOMNA_PROPERTY_ID, @@ -264,7 +287,7 @@ def app(): built_form_column=AssetList.STANDARD_BUILT_FORM, manual_uprn_map=manual_uprn_map, epc_api_only=epc_api_only, - epc_auth_token=EPC_AUTH_TOKEN + epc_auth_token=EPC_AUTH_TOKEN, ) # We now retrieve any failed properties @@ -287,7 +310,9 @@ def app(): # Append the failed data to the main data # Store the chunk locally as a csv - pd.DataFrame(epc_data_chunk).to_csv(os.path.join(data_folder, f"Chunks/Chunk {i}.csv"), index=False) + pd.DataFrame(epc_data_chunk).to_csv( + os.path.join(data_folder, f"Chunks/Chunk {i}.csv"), index=False + ) # Store the errors and no-data locally with open(os.path.join(data_folder, f"Chunks/Chunk {i} errors.json"), "w") as f: json.dump(errors_chunk, f) @@ -318,7 +343,9 @@ def app(): unique_recommendations = set() for _, row in recommendations_df.iterrows(): - unique_recommendations.update([rec["improvement-summary-text"] for rec in row["recommendations"]]) + unique_recommendations.update( + [rec["improvement-summary-text"] for rec in row["recommendations"]] + ) columns = [asset_list.DOMNA_PROPERTY_ID] + list(unique_recommendations) transformed_data = [] @@ -338,20 +365,24 @@ def app(): transformed_df = pd.DataFrame(transformed_data) for col in [ "Floor insulation (solid floor)", - "Floor insulation", "Floor insulation (suspended floor)" + "Floor insulation", + "Floor insulation (suspended floor)", ]: if col not in transformed_df.columns: transformed_df[col] = False transformed_df = transformed_df[ [ - asset_list.DOMNA_PROPERTY_ID, "Floor insulation (solid floor)", - "Floor insulation", "Floor insulation (suspended floor)" + asset_list.DOMNA_PROPERTY_ID, + "Floor insulation (solid floor)", + "Floor insulation", + "Floor insulation (suspended floor)", ] ] transformed_df["epc_has_floor_recommendation"] = ( - transformed_df["Floor insulation (solid floor)"] | transformed_df["Floor insulation"] | - transformed_df["Floor insulation (suspended floor)"] + transformed_df["Floor insulation (solid floor)"] + | transformed_df["Floor insulation"] + | transformed_df["Floor insulation (suspended floor)"] ) # Get the find my epc data @@ -364,21 +395,20 @@ def app(): find_my_epc_data.append( { asset_list.DOMNA_PROPERTY_ID: x[asset_list.DOMNA_PROPERTY_ID], - **x["find_my_epc_data"] + **x["find_my_epc_data"], } ) else: find_my_epc_data.append( - { - asset_list.DOMNA_PROPERTY_ID: x[asset_list.DOMNA_PROPERTY_ID] - } + {asset_list.DOMNA_PROPERTY_ID: x[asset_list.DOMNA_PROPERTY_ID]} ) find_my_epc_data = pd.DataFrame(find_my_epc_data) find_my_epc_data = find_my_epc_data.merge( transformed_df[[asset_list.DOMNA_PROPERTY_ID, "epc_has_floor_recommendation"]], - how="left", on=asset_list.DOMNA_PROPERTY_ID + how="left", + on=asset_list.DOMNA_PROPERTY_ID, ) # We check if we get the solar pv column: @@ -388,24 +418,26 @@ def app(): # Retrieve just the data we need epc_df = epc_df[ [asset_list.DOMNA_PROPERTY_ID] + list(asset_list.EPC_API_DATA_NAMES.keys()) - ].rename( - columns=asset_list.EPC_API_DATA_NAMES - ) + ].rename(columns=asset_list.EPC_API_DATA_NAMES) # Look for columns not in the find my EPC data, which will have happened if we didn't # retrieve it in the first place - missed_find_epc_cols = [c for c in list(asset_list.FIND_EPC_DATA_NAMES.keys()) if c not in find_my_epc_data.columns] + missed_find_epc_cols = [ + c + for c in list(asset_list.FIND_EPC_DATA_NAMES.keys()) + if c not in find_my_epc_data.columns + ] if missed_find_epc_cols: for c in missed_find_epc_cols: find_my_epc_data[c] = None epc_df = epc_df.merge( find_my_epc_data[ - [asset_list.DOMNA_PROPERTY_ID, "epc_has_floor_recommendation"] + list(asset_list.FIND_EPC_DATA_NAMES.keys()) - ] - .rename(columns=asset_list.FIND_EPC_DATA_NAMES), + [asset_list.DOMNA_PROPERTY_ID, "epc_has_floor_recommendation"] + + list(asset_list.FIND_EPC_DATA_NAMES.keys()) + ].rename(columns=asset_list.FIND_EPC_DATA_NAMES), how="left", - on=asset_list.DOMNA_PROPERTY_ID + on=asset_list.DOMNA_PROPERTY_ID, ) asset_list.merge_data(epc_df) @@ -422,7 +454,10 @@ def app(): asset_list.get_work_figures() # Store as an excel - filename = os.path.join(data_folder, ".".join(data_filename.split(".")[:-1])) + " - Standardised.xlsx" + filename = ( + os.path.join(data_folder, ".".join(data_filename.split(".")[:-1])) + + " - Standardised.xlsx" + ) # Store the data in two tabs. One for the asset list with the EPC data and the second with the flat data # Determine inspections priority @@ -446,26 +481,42 @@ def app(): # ) with pd.ExcelWriter(filename) as writer: - asset_list.standardised_asset_list.to_excel(writer, sheet_name="Standardised Asset List", index=False) + asset_list.standardised_asset_list.to_excel( + writer, sheet_name="Standardised Asset List", index=False + ) if asset_list.block_analysis_df is not None: - asset_list.block_analysis_df.to_excel(writer, sheet_name="Block Analysis", index=False) + asset_list.block_analysis_df.to_excel( + writer, sheet_name="Block Analysis", index=False + ) # If we have outcomes, we add a tab with the outcomes if not asset_list.outcomes_for_output.empty: - asset_list.outcomes_for_output.to_excel(writer, sheet_name="Outcomes", index=False) + asset_list.outcomes_for_output.to_excel( + writer, sheet_name="Outcomes", index=False + ) if not asset_list.unmatched_submissions.empty: - asset_list.unmatched_submissions.to_excel(writer, sheet_name="Unmatched Submissions", index=False) + asset_list.unmatched_submissions.to_excel( + writer, sheet_name="Unmatched Submissions", index=False + ) if not asset_list.outcomes_no_match.empty: - asset_list.outcomes_no_match.to_excel(writer, sheet_name="Unmatched Outcomes", index=False) + asset_list.outcomes_no_match.to_excel( + writer, sheet_name="Unmatched Outcomes", index=False + ) if not asset_list.ecosurv_no_match.empty: - asset_list.ecosurv_no_match.to_excel(writer, sheet_name="Unmatched Ecosurv", index=False) + asset_list.ecosurv_no_match.to_excel( + writer, sheet_name="Unmatched Ecosurv", index=False + ) if not asset_list.geographical_areas.empty: - asset_list.geographical_areas.to_excel(writer, sheet_name="Geographical Areas", index=False) + asset_list.geographical_areas.to_excel( + writer, sheet_name="Geographical Areas", index=False + ) # Store dupes if asset_list.duplicated_addresses is not None: if not asset_list.duplicated_addresses.empty: - asset_list.duplicated_addresses.to_excel(writer, sheet_name="Duplicate Properties", index=False) + asset_list.duplicated_addresses.to_excel( + writer, sheet_name="Duplicate Properties", index=False + ) diff --git a/backend/address2UPRN/Dockerfile b/backend/address2UPRN/Dockerfile new file mode 100644 index 00000000..ac6af2a5 --- /dev/null +++ b/backend/address2UPRN/Dockerfile @@ -0,0 +1,7 @@ +FROM public.ecr.aws/lambda/python:3.10 + +# Copy function code +COPY main.py . + +# Set the handler +CMD ["main.handler"] diff --git a/backend/address2UPRN/main.py b/backend/address2UPRN/main.py index 58b25d74..9d27a5ce 100644 --- a/backend/address2UPRN/main.py +++ b/backend/address2UPRN/main.py @@ -14,6 +14,9 @@ EPC_AUTH_TOKEN = os.getenv( "EPC_AUTH_TOKEN", ) +if EPC_AUTH_TOKEN is None: + raise RuntimeError("EPC_AUTH_TOKEN not defined in env") + import re from difflib import SequenceMatcher from typing import Set @@ -38,6 +41,34 @@ def levenshtein(a: str, b: str) -> float: def tokenise(s: str) -> Set[str]: return set(s.split()) + def extract_building_number(s: str) -> str | None: + """ + Extract the main building number (NOT flat/unit). + Assumes formats like: + - '42 moreton road' + - 'flat 3 42 moreton road' + """ + tokens = s.split() + + # remove flat/unit context + cleaned = [] + skip_next = False + for t in tokens: + if t in ("flat", "apt", "apartment", "unit"): + skip_next = True + continue + if skip_next: + skip_next = False + continue + cleaned.append(t) + + # first remaining number is building number + for t in cleaned: + if re.fullmatch(r"\d+[a-z]?", t): + return t + + return None + a_norm = normalise_address(a) b_norm = normalise_address(b) @@ -52,6 +83,13 @@ def levenshtein(a: str, b: str) -> float: if nums_a and nums_b and nums_a.isdisjoint(nums_b): return 0.0 + # 🔒 HARD GUARD: building number must match + bld_a = extract_building_number(a_norm) + bld_b = extract_building_number(b_norm) + + if bld_a and bld_b and bld_a != bld_b: + return 0.0 + # --- order-sensitive flat/building guard --- seq_a = extract_number_sequence(a_norm) seq_b = extract_number_sequence(b_norm) @@ -418,6 +456,10 @@ def run_all_test(): get_uprn("46 Oswald Street", "E5 0BT"), False ) # this one return "flat 1, in 1 semley gate" get_uprn_candidates(get_epc_data_with_postcode("e5 0bt"), "48 Oswald Street") + get_uprn_candidates( + get_epc_data_with_postcode("Cr2 7dl"), + "FLAT 3; 42 MORETON ROAD, SOUTH CROYDON, SURREY", + ) if __name__ == "__main__": @@ -511,6 +553,11 @@ if __name__ == "__main__": ) +def handler(event, context): + print("hello world") + return {"statusCode": 200, "body": "hello world"} + + # TO do function dispatcher, # get_uprn_candidates(get_epc_data_with_postcode("E9 5NH"),"Flat 1, 5 Semley Gate" and Flat 5, 1 Semley Gate) diff --git a/backend/address2UPRN/tests/test_data.csv b/backend/address2UPRN/tests/test_data.csv index f3d9b64c..ee23813b 100644 --- a/backend/address2UPRN/tests/test_data.csv +++ b/backend/address2UPRN/tests/test_data.csv @@ -115,11 +115,16 @@ FLAT 43 Goodstone Court,HA1 4FL,10070269095 8 Genteel House Samara Drive,UB1 1FJ,12189842 9 Genteel House Samara Drive,UB1 1FJ,12189843 10 Genteel House Samara Drive,UB1 1FJ,12189844 -1 ASH TREE HOUSE,SE5 0TE,10009803979 -3 ASH TREE HOUSE,SE5 0TE,10009803981 -5 ASH TREE HOUSE,SE5 0TE,10009803983 -8 ASH TREE HOUSE,SE5 0TE,10009803986 -12 ASH TREE HOUSE,SE5 0TE,10009803990 +1 ASH TREE HOUSE,SE5 0TE,None +"Flat 1 Ash Tree House, 2, Thompson Avenue",SE5 0TE,10009803979 +3 ASH TREE HOUSE,SE5 0TE,None +Flat 3 ASH TREE HOUSE,SE5 0TE,10009803981 +5 ASH TREE HOUSE,SE5 0TE,None +Flat 5 ASH TREE HOUSE,SE5 0TE,10009803983 +Flat 8 ASH TREE HOUSE,SE5 0TE,10009803986 +8 ASH TREE HOUSE,SE5 0TE,None +Flat 12 ASH TREE HOUSE,SE5 0TE,10009803990 +12 ASH TREE HOUSE,SE5 0TE,None FLAT 1 599 HARROW ROAD,W10 4RA,217113930 FLAT 2 599 HARROW ROAD,W10 4RA,217113931 FLAT 3 599 HARROW ROAD,W10 4RA,None @@ -164,4 +169,198 @@ FLAT 8 599 HARROW ROAD,W10 4RA,None 24b Honley Road,SE6 2HZ,None FLAT B 158 LEAHURST ROAD,SE13 5NL,100021976974 2 COLLEGE HOUSE,CM7 1JS,100091449870 -3 COLLEGE HOUSE,CM7 1JS,100091449871 \ No newline at end of file +3 COLLEGE HOUSE,CM7 1JS,100091449871 +1 Anita Street,M4 5DU,None +2 Anita Street,M4 5DU,77123061 +5 Anita Street,M4 5DU,77123081 +6 Anita Street,M4 5DU,77123082 +8 Anita Street,M4 5DU,None +9 Anita Street,M4 5DU,None +10 Anita Street,M4 5DU,77123051 +12 Anita Street,M4 5DU,77123053 +19 Anita Street,M4 5DU,None +22 Anita Street,M4 5DU,None +26 Anita Street,M4 5DU,77123068 +28 Anita Street,M4 5DU,None +30 Anita Street,M4 5DU,None +32 Anita Street,M4 5DU,None +33 Anita Street,M4 5DU,77123076 +34 Anita Street,M4 5DU,None +35 Anita Street,M4 5DU,77123078 +36 Anita Street,M4 5DU,77123079 +23 George Leigh Street,M4 5DR,77123171 +25 George Leigh Street,M4 5DR,None +35 George Leigh Street,M4 5DR,77123177 +39 George Leigh Street,M4 5DR,77123179 +41 George Leigh Street,M4 5DR,None +43 George Leigh Street,M4 5DR,None +49 George Leigh Street,M4 5DR,None +51 George Leigh Street,M4 5DR,77123185 +55 George Leigh Street,M4 5DR,None +57 George Leigh Street,M4 5DR,None +"1a, Victoria Square",M4 5DX,77211153 +2a Victoria Square ,M4 5DX,None +"4a, Victoria Square",M4 5DX,77211155 +5a Victoria Square,M4 5DX,77211156 + 6a Victoria Square,M4 5DX,77211157 +7a Victoria Square,M4 5DX,77211158 +8a Victoria Square,M4 5DX,77211159 +9a Victoria Square,M4 5DX,77211160 +10a Victoria Square,M4 5DX,77211161 +11a Victoria Square,M4 5DX,77211162 +12a Victoria Square,M4 5DX,77211163 +13a Victoria Square,M4 5DX,77211164 +14a Victoria Square,M4 5DX,77211165 +15a Victoria Square,M4 5DX,77211166 +16a Victoria Square,M4 5DX,77211167 +17a Victoria Square,M4 5DX,77211168 +18a Victoria Square,M4 5DX,77211169 +19a Victoria Square,M4 5DX,77211170 +20a Victoria Square,M4 5DX,77211171 +21a Victoria Square,M4 5DY,77211172 +22a Victoria Square,M4 5DY,None +23a Victoria Square,M4 5DY,77211174 +24a Victoria Square,M4 5DY,77211175 +25a Victoria Square,M4 5DY,77211176 +26a Victoria Square,M4 5DY,77211177 +27a Victoria Square,M4 5DY,77211178 +28a Victoria Square,M4 5DY,None +29a Victoria Square,M4 5DY,77211180 +30a Victoria Square,M4 5DY,77211181 +31a Victoria Square,M4 5DY,77211182 +32a Victoria Square,M4 5DY,77211183 +33a Victoria Square,M4 5DY,77211184 +34a Victoria Square,M4 5DY,77211185 +35a Victoria Square,M4 5DY,None +36a Victoria Square,M4 5DY,77211187 +37a Victoria Square,M4 5DY,77211188 +38a Victoria Square,M4 5DY,77211189 +39a Victoria Square,M4 5DY,77211190 +40a Victoria Square,M4 5DY,None +41a Victoria Square,M4 5DY,77211192 +42a Victoria Square,M4 5DY,77211193 +43a Victoria Square,M4 5DY,77211194 +44a Victoria Square,M4 5DY,77211195 +45a Victoria Square,M4 5DY,77211196 +46a Victoria Square,M4 5DY,77211197 +47a Victoria Square,M4 5DY,77211198 +48a Victoria Square,M4 5DY,77211199 +49a Victoria Square,M4 5DY,77211200 +50a Victoria Square,M4 5DY,77211201 +51a Victoria Square,M4 5DY,77211202 +52a Victoria Square,M4 5DY,77211203 +53a Victoria Square,M4 5DY,77211204 +54a Victoria Square,M4 5DY,77211205 +55a Victoria Square,M4 5DY,77211206 +56a Victoria Square,M4 5DZ,77211207 +57a Victoria Square,M4 5DZ,None +58a Victoria Square,M4 5DZ,77211209 +59a Victoria Square,M4 5DZ,77211210 +60a Victoria Square,M4 5DZ,77211211 +61a Victoria Square,M4 5DZ,77211212 +62a Victoria Square,M4 5DZ,77211213 +63a Victoria Square,M4 5DZ,None +64a Victoria Square,M4 5DZ,77211215 +65a Victoria Square,M4 5DZ,77211216 +66a Victoria Square,M4 5DZ,None +67a Victoria Square,M4 5DZ,None +68a Victoria Square,M4 5DZ,77211219 +69a Victoria Square,M4 5DZ,77211220 +70a Victoria Square,M4 5DZ,77211221 +71a Victoria Square,M4 5DZ,77211222 +72a Victoria Square,M4 5DZ,77211223 +73a Victoria Square,M4 5DZ,77211224 +74a Victoria Square,M4 5DZ,None +75a Victoria Square,M4 5DZ,77211226 +76a Victoria Square,M4 5DZ,77211227 +77a Victoria Square,M4 5DZ,None +78a Victoria Square,M4 5DZ,77211229 +79a Victoria Square,M4 5DZ,77211230 +80a Victoria Square,M4 5DZ,77211231 +81a Victoria Square,M4 5DZ,77211232 +82 Victoria Square,M4 5DZ,None +83a Victoria Square,M4 5DZ,77211234 +84a Victoria Square,M4 5DZ,None +85a Victoria Square,M4 5DZ,77211236 +86a Victoria Square,M4 5DZ,77211237 +87a Victoria Square,M4 5DZ,77211238 +88a Victoria Square,M4 5DZ,None +89a Victoria Square,M4 5DZ,77211240 +90a Victoria Square,M4 5DZ,77211241 +91a Victoria Square,M4 5DZ,77211242 +92a Victoria Square,M4 5DZ,77211243 +93a Victoria Square,M4 5EA,77211244 +94a Victoria Square,M4 5EA,None +95a Victoria Square,M4 5EA,77211246 +96a Victoria Square,M4 5EA,77211247 +97a Victoria Square,M4 5EA,77211248 +98a Victoria Square,M4 5EA,77211249 +99a Victoria Square,M4 5EA,77211250 +100a Victoria Square,M4 5EA,77211251 +101a Victoria Square,M4 5EA,None +102a Victoria Square,M4 5EA,None +103a Victoria Square,M4 5EA,77211254 +104a Victoria Square,M4 5EA,77211255 +105a Victoria Square,M4 5EA,None +106a Victoria Square,M4 5EA,77211257 +107a Victoria Square,M4 5EA,77211258 +108a Victoria Square,M4 5EA,77211259 +109a Victoria Square,M4 5EA,77211260 +110a Victoria Square,M4 5EA,77211261 +111a Victoria Square,M4 5EA,77211262 +112a Victoria Square,M4 5EA,None +113a Victoria Square,M4 5EA,77211264 +114a Victoria Square,M4 5EA,77211265 +115a Victoria Square,M4 5EA,77211266 +116a Victoria Square,M4 5EA,77211267 +117a Victoria Square,M4 5EA,None +118a Victoria Square,M4 5EA,None +119a Victoria Square,M4 5EA,77211270 +120a Victoria Square,M4 5EA,77211271 +121a Victoria Square,M4 5EA,77211272 +122a Victoria Square,M4 5EA,77211273 +123a Victoria Square,M4 5EA,77211274 +124a Victoria Square,M4 5EA,None +125a Victoria Square,M4 5EA,77211276 +126a Victoria Square,M4 5EA,77211277 +127a Victoria Square,M4 5EA,77211278 +128a Victoria Square,M4 5EA,77211279 +129a Victoria Square,M4 5EA,77211280 +130a Victoria Square,M4 5FA,77211281 +131a Victoria Square,M4 5FA,77211282 +132a Victoria Square,M4 5FA,77211283 +133a Victoria Square,M4 5FA,None +134a Victoria Square,M4 5FA,77211285 +135a Victoria Square,M4 5FA,77211286 +136a Victoria Square,M4 5FA,77211287 +137a Victoria Square,M4 5FA,77211288 +138a Victoria Square,M4 5FA,77211289 +139a Victoria Square,M4 5FA,77211290 +140a Victoria Square,M4 5FA,77211291 +141a Victoria Square,M4 5FA,77211292 +142a Victoria Square,M4 5FA,77211293 +143a Victoria Square,M4 5FA,77211294 +144a Victoria Square,M4 5FA,77211295 +145a Victoria Square,M4 5FA,None +146a Victoria Square,M4 5FA,77211297 +147a Victoria Square,M4 5FA,77211298 +148a Victoria Square,M4 5FA,77211299 +149a Victoria Square,M4 5FA,77211300 +150a Victoria Square,M4 5FA,77211301 +151a Victoria Square,M4 5FA,None +152a Victoria Square,M4 5FA,77211303 +153a Victoria Square,M4 5FA,None +154a Victoria Square,M4 5FA,77211305 +155a Victoria Square,M4 5FA,None +156a Victoria Square,M4 5FA,77211307 +157a Victoria Square,M4 5FA,77211308 +158a Victoria Square,M4 5FA,77211309 +159a Victoria Square,M4 5FA,None +160a Victoria Square,M4 5FA,77211311 +161a Victoria Square,M4 5FA,None +162a Victoria Square,M4 5FA,None +163a Victoria Square,M4 5FA,77211314 +164a Victoria Square,M4 5FA,77211315 +165a Victoria Square,M4 5FA,77211316 +166a Victoria Square,M4 5FA,None +"FLAT 3; 42 MORETON ROAD, SOUTH CROYDON, SURREY",CR2 7DL,None \ No newline at end of file diff --git a/infrastructure/terraform/lambda/_template/README.md b/infrastructure/terraform/lambda/_template/README.md new file mode 100644 index 00000000..a7282fc9 --- /dev/null +++ b/infrastructure/terraform/lambda/_template/README.md @@ -0,0 +1,51 @@ +## Checklist for adding a new Lambda + +### 1. Create the Lambda scaffold +- Copy the template: + + cp -r lambda/_template lambda/ + +--- + +### 2. Add infrastructure prerequisites (shared stack) +- Add a new ECR repository in: + + infrastructure/terraform/shared/main.tf + +- Apply the shared stack + - This requires commenting 'if env.stage == "prod"' in .github/workflows/deploy_terraform.yml + +- Verify the ECR repository exists in AWS + +--- + +### 3. Add Docker build configuration +- Create a `Dockerfile` for the Lambda +- Verify the Dockerfile path and build context +- Add a new image build job in `deploy_terraform.yml` using `_build_image.yml` + +--- + +### 4. Wire the Lambda deploy job (CI) +- Add a deploy job using `_deploy_lambda.yml` +- Ensure the deploy job depends on the image build job + +--- + +### 5. Deploy +- Push changes to GitHub +- CI will: + 1. Build and push the Docker image + 2. Deploy the Lambda + 3. Verify everything deployed. Good things to check: + - ECR with image + - SQS + - Trigger SQS + - Cloud watch logs +--- +### 5. Delete + 1. Delete README if you used cp -r + +--- + +## Please feel free to update this document to make it easier for the next person \ No newline at end of file diff --git a/infrastructure/terraform/lambda/_template/main.tf b/infrastructure/terraform/lambda/_template/main.tf new file mode 100644 index 00000000..3010aa8a --- /dev/null +++ b/infrastructure/terraform/lambda/_template/main.tf @@ -0,0 +1,14 @@ +module "lambda" { + source = "../modules/lambda_with_sqs" + + name = REPLACE ME #"address2uprn" for example + stage = var.stage + + image_uri = local.image_uri + + + environment = { + STAGE = var.stage + LOG_LEVEL = "info" + } +} diff --git a/infrastructure/terraform/lambda/_template/provider.tf b/infrastructure/terraform/lambda/_template/provider.tf new file mode 100644 index 00000000..37c412ce --- /dev/null +++ b/infrastructure/terraform/lambda/_template/provider.tf @@ -0,0 +1,16 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 4.16" + } + } + + backend "s3" { + bucket = REPLACE_ME + key = "terraform.tfstate" + region = "eu-west-2" + } + + required_version = ">= 1.2.0" +} \ No newline at end of file diff --git a/infrastructure/terraform/lambda/_template/variables.tf b/infrastructure/terraform/lambda/_template/variables.tf new file mode 100644 index 00000000..e4bab243 --- /dev/null +++ b/infrastructure/terraform/lambda/_template/variables.tf @@ -0,0 +1,27 @@ +variable "lambda_name" { + type = string + description = "Logical name of the lambda (e.g. address2uprn)" +} + +variable "stage" { + description = "Deployment stage (e.g. dev, prod)" + type = string +} +variable "ecr_repo_url" { + type = string + description = "ECR repository URL (no tag, no digest)" +} + +variable "image_digest" { + type = string + description = "Image digest (sha256:...)" +} + + +locals { + image_uri = "${var.ecr_repo_url}@${var.image_digest}" +} + +output "resolved_image_uri" { + value = local.image_uri +} diff --git a/infrastructure/terraform/lambda/address2UPRN/main.tf b/infrastructure/terraform/lambda/address2UPRN/main.tf new file mode 100644 index 00000000..46b193f2 --- /dev/null +++ b/infrastructure/terraform/lambda/address2UPRN/main.tf @@ -0,0 +1,14 @@ +module "address2uprn" { + source = "../modules/lambda_with_sqs" + + name = "address2uprn" + stage = var.stage + + image_uri = local.image_uri + + + environment = { + STAGE = var.stage + LOG_LEVEL = "info" + } +} diff --git a/infrastructure/terraform/lambda/address2UPRN/provider.tf b/infrastructure/terraform/lambda/address2UPRN/provider.tf new file mode 100644 index 00000000..ad873717 --- /dev/null +++ b/infrastructure/terraform/lambda/address2UPRN/provider.tf @@ -0,0 +1,17 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 4.16" + } + } + + backend "s3" { + bucket = "address2uprn-terraform-state" + key = "terraform.tfstate" + region = "eu-west-2" + } + + required_version = ">= 1.2.0" +} + diff --git a/infrastructure/terraform/lambda/address2UPRN/variables.tf b/infrastructure/terraform/lambda/address2UPRN/variables.tf new file mode 100644 index 00000000..e4bab243 --- /dev/null +++ b/infrastructure/terraform/lambda/address2UPRN/variables.tf @@ -0,0 +1,27 @@ +variable "lambda_name" { + type = string + description = "Logical name of the lambda (e.g. address2uprn)" +} + +variable "stage" { + description = "Deployment stage (e.g. dev, prod)" + type = string +} +variable "ecr_repo_url" { + type = string + description = "ECR repository URL (no tag, no digest)" +} + +variable "image_digest" { + type = string + description = "Image digest (sha256:...)" +} + + +locals { + image_uri = "${var.ecr_repo_url}@${var.image_digest}" +} + +output "resolved_image_uri" { + value = local.image_uri +} diff --git a/infrastructure/terraform/lambda/modules/lambda_with_sqs/main.tf b/infrastructure/terraform/lambda/modules/lambda_with_sqs/main.tf new file mode 100644 index 00000000..3816c206 --- /dev/null +++ b/infrastructure/terraform/lambda/modules/lambda_with_sqs/main.tf @@ -0,0 +1,44 @@ +############################################ +# IAM role +############################################ +module "role" { + source = "../../../modules/lambda_execution_role" + name = "${var.name}-lambda-${var.stage}" +} + +############################################ +# SQS queue + DLQ +############################################ +module "queue" { + source = "../../../modules/sqs_queue" + name = "${var.name}-queue-${var.stage}" +} + +############################################ +# Lambda +############################################ +module "lambda" { + source = "../../../modules/lambda_service" + + name = "${var.name}-${var.stage}" + role_arn = module.role.role_arn + image_uri = var.image_uri + + timeout = var.timeout + memory_size = var.memory_size + + environment = var.environment +} + +############################################ +# SQS → Lambda trigger +############################################ +module "sqs_trigger" { + source = "../../../modules/lambda_sqs_trigger" + + lambda_arn = module.lambda.lambda_arn + lambda_role_name = module.role.role_name + queue_arn = module.queue.queue_arn + + batch_size = var.batch_size +} diff --git a/infrastructure/terraform/lambda/modules/lambda_with_sqs/outputs.tf b/infrastructure/terraform/lambda/modules/lambda_with_sqs/outputs.tf new file mode 100644 index 00000000..afc9246d --- /dev/null +++ b/infrastructure/terraform/lambda/modules/lambda_with_sqs/outputs.tf @@ -0,0 +1,11 @@ +output "lambda_arn" { + value = module.lambda.lambda_arn +} + +output "queue_arn" { + value = module.queue.queue_arn +} + +output "queue_url" { + value = module.queue.queue_url +} diff --git a/infrastructure/terraform/lambda/modules/lambda_with_sqs/variables.tf b/infrastructure/terraform/lambda/modules/lambda_with_sqs/variables.tf new file mode 100644 index 00000000..b20ab2a8 --- /dev/null +++ b/infrastructure/terraform/lambda/modules/lambda_with_sqs/variables.tf @@ -0,0 +1,36 @@ +variable "name" { + type = string +} + +variable "stage" { + type = string +} + +variable "image_uri" { + type = string +} + +variable "region" { + type = string + default = "eu-west-2" +} + +variable "timeout" { + type = number + default = 60 +} + +variable "memory_size" { + type = number + default = 1024 +} + +variable "environment" { + type = map(string) + default = {} +} + +variable "batch_size" { + type = number + default = 10 +} diff --git a/infrastructure/terraform/modules/container_registry/main.tf b/infrastructure/terraform/modules/container_registry/main.tf new file mode 100644 index 00000000..f5ba8d5e --- /dev/null +++ b/infrastructure/terraform/modules/container_registry/main.tf @@ -0,0 +1,30 @@ +resource "aws_ecr_repository" "this" { + name = "${var.name}-${var.stage}" + + image_tag_mutability = "MUTABLE" + + image_scanning_configuration { + scan_on_push = true + } +} + +resource "aws_ecr_lifecycle_policy" "this" { + repository = aws_ecr_repository.this.name + + policy = jsonencode({ + rules = [ + { + rulePriority = 1 + description = "Expire old images" + selection = { + tagStatus = "any" + countType = "imageCountMoreThan" + countNumber = var.retain_count + } + action = { + type = "expire" + } + } + ] + }) +} diff --git a/infrastructure/terraform/modules/container_registry/outputs.tf b/infrastructure/terraform/modules/container_registry/outputs.tf new file mode 100644 index 00000000..47a4bc64 --- /dev/null +++ b/infrastructure/terraform/modules/container_registry/outputs.tf @@ -0,0 +1,11 @@ +output "repository_name" { + value = aws_ecr_repository.this.name +} + +output "repository_url" { + value = aws_ecr_repository.this.repository_url +} + +output "repository_arn" { + value = aws_ecr_repository.this.arn +} diff --git a/infrastructure/terraform/modules/container_registry/variables.tf b/infrastructure/terraform/modules/container_registry/variables.tf new file mode 100644 index 00000000..11821b31 --- /dev/null +++ b/infrastructure/terraform/modules/container_registry/variables.tf @@ -0,0 +1,15 @@ +variable "name" { + description = "Base name of the repository (without stage)" + type = string +} + +variable "stage" { + description = "Deployment stage (e.g. dev, prod)" + type = string +} + +variable "retain_count" { + description = "Number of images to retain" + type = number + default = 10 +} diff --git a/infrastructure/terraform/modules/ecr/main.tf b/infrastructure/terraform/modules/ecr/main.tf index 468ef3d2..d93d1340 100644 --- a/infrastructure/terraform/modules/ecr/main.tf +++ b/infrastructure/terraform/modules/ecr/main.tf @@ -1,3 +1,6 @@ +# This ecr works for things deployed by serverless. +# TODO: unify ecr and container_registry to one + resource "aws_ecr_repository" "my_repository" { name = "${var.ecr_name}" image_tag_mutability = "MUTABLE" diff --git a/infrastructure/terraform/modules/ecr/outputs.tf b/infrastructure/terraform/modules/ecr/outputs.tf index 53839718..7f045412 100644 --- a/infrastructure/terraform/modules/ecr/outputs.tf +++ b/infrastructure/terraform/modules/ecr/outputs.tf @@ -1,4 +1,10 @@ output "ecr_repository_name" { description = "Name of the EPR repo in AWS" value = aws_ecr_repository.my_repository.name +} + + +output "ecr_repository_url" { + description = "Full ECR repository URL" + value = aws_ecr_repository.my_repository.repository_url } \ No newline at end of file diff --git a/infrastructure/terraform/modules/lambda_execution_role/main.tf b/infrastructure/terraform/modules/lambda_execution_role/main.tf new file mode 100644 index 00000000..fa657afd --- /dev/null +++ b/infrastructure/terraform/modules/lambda_execution_role/main.tf @@ -0,0 +1,37 @@ +data "aws_iam_policy_document" "assume" { + statement { + effect = "Allow" + principals { + type = "Service" + identifiers = ["lambda.amazonaws.com"] + } + actions = ["sts:AssumeRole"] + } +} + +resource "aws_iam_role" "this" { + name = var.name + assume_role_policy = data.aws_iam_policy_document.assume.json +} + +resource "aws_iam_role_policy_attachment" "basic_logs" { + role = aws_iam_role.this.name + policy_arn = "arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole" +} + +resource "aws_iam_role_policy" "ecr_pull" { + role = aws_iam_role.this.name + + policy = jsonencode({ + Version = "2012-10-17" + Statement = [{ + Effect = "Allow" + Action = [ + "ecr:GetAuthorizationToken", + "ecr:BatchGetImage", + "ecr:GetDownloadUrlForLayer" + ] + Resource = "*" + }] + }) +} diff --git a/infrastructure/terraform/modules/lambda_execution_role/outputs.tf b/infrastructure/terraform/modules/lambda_execution_role/outputs.tf new file mode 100644 index 00000000..1baca34d --- /dev/null +++ b/infrastructure/terraform/modules/lambda_execution_role/outputs.tf @@ -0,0 +1,7 @@ +output "role_arn" { + value = aws_iam_role.this.arn +} + +output "role_name" { + value = aws_iam_role.this.name +} diff --git a/infrastructure/terraform/modules/lambda_execution_role/variables.tf b/infrastructure/terraform/modules/lambda_execution_role/variables.tf new file mode 100644 index 00000000..f9f512ff --- /dev/null +++ b/infrastructure/terraform/modules/lambda_execution_role/variables.tf @@ -0,0 +1,4 @@ +variable "name" { + description = "IAM role name for the Lambda execution role" + type = string +} diff --git a/infrastructure/terraform/modules/lambda_service/main.tf b/infrastructure/terraform/modules/lambda_service/main.tf new file mode 100644 index 00000000..8a159db1 --- /dev/null +++ b/infrastructure/terraform/modules/lambda_service/main.tf @@ -0,0 +1,15 @@ +resource "aws_lambda_function" "this" { + function_name = var.name + role = var.role_arn + + package_type = "Image" + image_uri = var.image_uri + + timeout = var.timeout + memory_size = var.memory_size + publish = true + + environment { + variables = var.environment + } +} diff --git a/infrastructure/terraform/modules/lambda_service/outputs.tf b/infrastructure/terraform/modules/lambda_service/outputs.tf new file mode 100644 index 00000000..dd05cccf --- /dev/null +++ b/infrastructure/terraform/modules/lambda_service/outputs.tf @@ -0,0 +1,3 @@ +output "lambda_arn" { + value = aws_lambda_function.this.arn +} diff --git a/infrastructure/terraform/modules/lambda_service/variables.tf b/infrastructure/terraform/modules/lambda_service/variables.tf new file mode 100644 index 00000000..43def6ad --- /dev/null +++ b/infrastructure/terraform/modules/lambda_service/variables.tf @@ -0,0 +1,18 @@ +variable "name" { type = string } +variable "role_arn" { type = string } +variable "image_uri" { type = string } + +variable "timeout" { + type = number + default = 30 +} + +variable "memory_size" { + type = number + default = 512 +} + +variable "environment" { + type = map(string) + default = {} +} diff --git a/infrastructure/terraform/modules/lambda_sqs_trigger/main.tf b/infrastructure/terraform/modules/lambda_sqs_trigger/main.tf new file mode 100644 index 00000000..5919e10f --- /dev/null +++ b/infrastructure/terraform/modules/lambda_sqs_trigger/main.tf @@ -0,0 +1,23 @@ +resource "aws_lambda_event_source_mapping" "this" { + event_source_arn = var.queue_arn + function_name = var.lambda_arn + batch_size = var.batch_size + enabled = true +} + +resource "aws_iam_role_policy" "allow_sqs" { + role = var.lambda_role_name + + policy = jsonencode({ + Version = "2012-10-17" + Statement = [{ + Effect = "Allow" + Action = [ + "sqs:ReceiveMessage", + "sqs:DeleteMessage", + "sqs:GetQueueAttributes" + ] + Resource = var.queue_arn + }] + }) +} diff --git a/infrastructure/terraform/modules/lambda_sqs_trigger/variables.tf b/infrastructure/terraform/modules/lambda_sqs_trigger/variables.tf new file mode 100644 index 00000000..0e50cd54 --- /dev/null +++ b/infrastructure/terraform/modules/lambda_sqs_trigger/variables.tf @@ -0,0 +1,8 @@ +variable "lambda_arn" { type = string } +variable "lambda_role_name" { type = string } +variable "queue_arn" { type = string } + +variable "batch_size" { + type = number + default = 10 +} diff --git a/infrastructure/terraform/modules/sqs_queue/main.tf b/infrastructure/terraform/modules/sqs_queue/main.tf new file mode 100644 index 00000000..580e67bd --- /dev/null +++ b/infrastructure/terraform/modules/sqs_queue/main.tf @@ -0,0 +1,14 @@ +resource "aws_sqs_queue" "dlq" { + name = "${var.name}-dlq" +} + +resource "aws_sqs_queue" "this" { + name = var.name + + visibility_timeout_seconds = 120 + + redrive_policy = jsonencode({ + deadLetterTargetArn = aws_sqs_queue.dlq.arn + maxReceiveCount = var.max_receive_count + }) +} diff --git a/infrastructure/terraform/modules/sqs_queue/outputs.tf b/infrastructure/terraform/modules/sqs_queue/outputs.tf new file mode 100644 index 00000000..46fafe90 --- /dev/null +++ b/infrastructure/terraform/modules/sqs_queue/outputs.tf @@ -0,0 +1,7 @@ +output "queue_arn" { + value = aws_sqs_queue.this.arn +} + +output "queue_url" { + value = aws_sqs_queue.this.url +} diff --git a/infrastructure/terraform/modules/sqs_queue/variables.tf b/infrastructure/terraform/modules/sqs_queue/variables.tf new file mode 100644 index 00000000..943a7a16 --- /dev/null +++ b/infrastructure/terraform/modules/sqs_queue/variables.tf @@ -0,0 +1,6 @@ +variable "name" { type = string } + +variable "max_receive_count" { + type = number + default = 5 +} diff --git a/infrastructure/terraform/modules/tf_state_bucket/main.tf b/infrastructure/terraform/modules/tf_state_bucket/main.tf new file mode 100644 index 00000000..86c0cc21 --- /dev/null +++ b/infrastructure/terraform/modules/tf_state_bucket/main.tf @@ -0,0 +1,30 @@ +resource "aws_s3_bucket" "this" { + bucket = var.bucket_name +} + +resource "aws_s3_bucket_versioning" "this" { + bucket = aws_s3_bucket.this.id + + versioning_configuration { + status = "Enabled" + } +} + +resource "aws_s3_bucket_server_side_encryption_configuration" "this" { + bucket = aws_s3_bucket.this.id + + rule { + apply_server_side_encryption_by_default { + sse_algorithm = "AES256" + } + } +} + +resource "aws_s3_bucket_public_access_block" "this" { + bucket = aws_s3_bucket.this.id + + block_public_acls = true + block_public_policy = true + ignore_public_acls = true + restrict_public_buckets = true +} diff --git a/infrastructure/terraform/modules/tf_state_bucket/outputs.tf b/infrastructure/terraform/modules/tf_state_bucket/outputs.tf new file mode 100644 index 00000000..e8ceffd1 --- /dev/null +++ b/infrastructure/terraform/modules/tf_state_bucket/outputs.tf @@ -0,0 +1,7 @@ +output "bucket_name" { + value = aws_s3_bucket.this.bucket +} + +output "bucket_arn" { + value = aws_s3_bucket.this.arn +} diff --git a/infrastructure/terraform/modules/tf_state_bucket/variables.tf b/infrastructure/terraform/modules/tf_state_bucket/variables.tf new file mode 100644 index 00000000..b3aae9bb --- /dev/null +++ b/infrastructure/terraform/modules/tf_state_bucket/variables.tf @@ -0,0 +1,3 @@ +variable "bucket_name" { + type = string +} diff --git a/infrastructure/terraform/dev.tfvars b/infrastructure/terraform/shared/dev.tfvars similarity index 95% rename from infrastructure/terraform/dev.tfvars rename to infrastructure/terraform/shared/dev.tfvars index 92b7e158..53ca6d9e 100644 --- a/infrastructure/terraform/dev.tfvars +++ b/infrastructure/terraform/shared/dev.tfvars @@ -1,5 +1,4 @@ stage = "dev" -profile = "DevAdmin" region = "eu-west-2" # Domain diff --git a/infrastructure/terraform/main.tf b/infrastructure/terraform/shared/main.tf similarity index 81% rename from infrastructure/terraform/main.tf rename to infrastructure/terraform/shared/main.tf index 5a67b793..3ba78ef3 100644 --- a/infrastructure/terraform/main.tf +++ b/infrastructure/terraform/shared/main.tf @@ -8,7 +8,6 @@ terraform { backend "s3" { bucket = "assessment-model-terraform-state" region = "eu-west-2" - profile = "DevAdmin" key = "terraform.tfstate" } @@ -16,7 +15,6 @@ terraform { } provider "aws" { - profile = var.profile region = var.region } @@ -91,101 +89,101 @@ resource "aws_db_instance" "default" { # Set up the bucket that recieve the csv uploads of epc to be retrofit module "s3_presignable_bucket" { - source = "./modules/s3_presignable_bucket" + source = "../modules/s3_presignable_bucket" bucketname = "retrofit-plan-inputs-${var.stage}" environment = var.stage allowed_origins = var.allowed_origins } module "s3_due_considerations_bucket" { - source = "./modules/s3_presignable_bucket" + source = "../modules/s3_presignable_bucket" bucketname = "retrofit-due-considerations-${var.stage}" environment = var.stage allowed_origins = var.allowed_origins } module "s3_eco_spreadseet_bucket" { - source = "./modules/s3_presignable_bucket" + source = "../modules/s3_presignable_bucket" bucketname = "retrofit-eco-spreadsheet-${var.stage}" environment = var.stage allowed_origins = var.allowed_origins } module "s3" { - source = "./modules/s3" + source = "../modules/s3" bucketname = "retrofit-datalake-${var.stage}" allowed_origins = var.allowed_origins } module "model_directory" { - source = "./modules/s3" + source = "../modules/s3" bucketname = "retrofit-model-directory-${var.stage}" allowed_origins = var.allowed_origins } module "retrofit_sap_predictions" { - source = "./modules/s3" + source = "../modules/s3" bucketname = "retrofit-sap-predictions-${var.stage}" allowed_origins = var.allowed_origins } module "retrofit_sap_data" { - source = "./modules/s3" + source = "../modules/s3" bucketname = "retrofit-data-${var.stage}" allowed_origins = var.allowed_origins } module "retrofit_carbon_predictions" { - source = "./modules/s3" + source = "../modules/s3" bucketname = "retrofit-carbon-predictions-${var.stage}" allowed_origins = var.allowed_origins } module "retrofit_heat_predictions" { - source = "./modules/s3" + source = "../modules/s3" bucketname = "retrofit-heat-predictions-${var.stage}" allowed_origins = var.allowed_origins } module "retrofit_lighting_cost_predictions" { - source = "./modules/s3" + source = "../modules/s3" bucketname = "retrofit-lighting-cost-predictions-${var.stage}" allowed_origins = var.allowed_origins } module "retrofit_heating_cost_predictions" { - source = "./modules/s3" + source = "../modules/s3" bucketname = "retrofit-heating-cost-predictions-${var.stage}" allowed_origins = var.allowed_origins } module "retrofit_hot_water_cost_predictions" { - source = "./modules/s3" + source = "../modules/s3" bucketname = "retrofit-hot-water-cost-predictions-${var.stage}" allowed_origins = var.allowed_origins } module "retrofit_heating_kwh_predictions" { - source = "./modules/s3" + source = "../modules/s3" bucketname = "retrofit-heating-kwh-predictions-${var.stage}" allowed_origins = var.allowed_origins } module "retrofit_hotwater_kwh_predictions" { - source = "./modules/s3" + source = "../modules/s3" bucketname = "retrofit-hotwater-kwh-predictions-${var.stage}" allowed_origins = var.allowed_origins } module "retrofit_sap_baseline_predictions" { - source = "./modules/s3" + source = "../modules/s3" bucketname = "retrofit-sap-baseline-predictions-${var.stage}" allowed_origins = var.allowed_origins } // We make this bucket presignable, because we want to generate download links for the frontend module "retrofit_energy_assessments" { - source = "./modules/s3_presignable_bucket" + source = "../modules/s3_presignable_bucket" bucketname = "retrofit-energy-assessments-${var.stage}" allowed_origins = var.allowed_origins environment = var.stage @@ -193,7 +191,7 @@ module "retrofit_energy_assessments" { # Set up the route53 record for the API module "route53" { - source = "./modules/route53" + source = "../modules/route53" domain_name = var.domain_name api_url_prefix = var.api_url_prefix providers = { @@ -201,75 +199,76 @@ module "route53" { } } + # Create an ECR repository for storage of the lambda's docker images module "ecr" { ecr_name = "fastapi-repository-${var.stage}" - source = "./modules/ecr" + source = "../modules/ecr" } module "lambda_sap_prediction_ecr" { ecr_name = "lambda-sap-prediction-${var.stage}" - source = "./modules/ecr" + source = "../modules/ecr" } module "due_considerations_ecr" { ecr_name = "due-considerations-${var.stage}" - source = "./modules/ecr" + source = "../modules/ecr" } module "eco_spreadsheet_ecr" { ecr_name = "eco-spreadsheet-${var.stage}" - source = "./modules/ecr" + source = "../modules/ecr" } module "lambda_carbon_prediction_ecr" { ecr_name = "lambda-carbon-prediction-${var.stage}" - source = "./modules/ecr" + source = "../modules/ecr" } module "lambda_heat_prediction_ecr" { ecr_name = "lambda-heat-prediction-${var.stage}" - source = "./modules/ecr" + source = "../modules/ecr" } # ECR repos for lighting cost, heating cost and hot water cost models module "lambda_lighting_cost_prediction_ecr" { ecr_name = "lighting-cost-prediction-${var.stage}" - source = "./modules/ecr" + source = "../modules/ecr" } module "lambda_heating_cost_prediction_ecr" { ecr_name = "heating-cost-prediction-${var.stage}" - source = "./modules/ecr" + source = "../modules/ecr" } module "lambda_hot_water_cost_prediction_ecr" { ecr_name = "hot-water-cost-prediction-${var.stage}" - source = "./modules/ecr" + source = "../modules/ecr" } # For heating and hot water kwh models module "lambda_heating_kwh_prediction_ecr" { ecr_name = "heating-kwh-prediction-${var.stage}" - source = "./modules/ecr" + source = "../modules/ecr" } module "lambda_hotwater_kwh_prediction_ecr" { ecr_name = "hotwater-kwh-prediction-${var.stage}" - source = "./modules/ecr" + source = "../modules/ecr" } # Baselining models module "sap_baseline_ecr" { ecr_name = "sap-baseline-prediction-${var.stage}" - source = "./modules/ecr" + source = "../modules/ecr" } ############################################## # CDN - Cloudfront ############################################## module "cloudfront_distribution" { - source = "./modules/cloudfront" + source = "../modules/cloudfront" bucket_name = module.s3.bucket_name bucket_id = module.s3.bucket_id bucket_arn = module.s3.bucket_arn @@ -281,11 +280,35 @@ module "cloudfront_distribution" { # SES - Email sending ################################################ module "ses" { - source = "./modules/ses" + source = "../modules/ses" domain_name = "domna.homes" stage = var.stage } output "ses_dns_records" { value = module.ses.dns_records +} + +################################################ +# Address2UPRN – Lambda ECR +################################################ +module "address2uprn_state_bucket" { + source = "../modules/tf_state_bucket" + bucket_name = "address2uprn-terraform-state" + +} + +output "address2uprn_state_bucket_name" { + value = module.address2uprn_state_bucket.bucket_name +} + +module "address2uprn_registry" { + source = "../modules/container_registry" + name = "address2uprn" + stage = var.stage + +} + +output "address2uprn_repository_url" { + value = module.address2uprn_registry.repository_url } \ No newline at end of file diff --git a/infrastructure/terraform/secrets.tf b/infrastructure/terraform/shared/secrets.tf similarity index 100% rename from infrastructure/terraform/secrets.tf rename to infrastructure/terraform/shared/secrets.tf diff --git a/infrastructure/terraform/variables.tf b/infrastructure/terraform/shared/variables.tf similarity index 90% rename from infrastructure/terraform/variables.tf rename to infrastructure/terraform/shared/variables.tf index 76734340..e922e465 100644 --- a/infrastructure/terraform/variables.tf +++ b/infrastructure/terraform/shared/variables.tf @@ -3,11 +3,6 @@ variable stage { type = string } -variable "profile" { - description = "AWS profile to use" - type = string -} - variable "region" { description = "AWS region" type = string diff --git a/sfr/principal_pitch/2_export_data.py b/sfr/principal_pitch/2_export_data.py index f0fc5cd1..ae807654 100644 --- a/sfr/principal_pitch/2_export_data.py +++ b/sfr/principal_pitch/2_export_data.py @@ -7,20 +7,29 @@ import numpy as np from backend.app.utils import sap_to_epc from sqlalchemy.orm import sessionmaker from backend.app.db.connection import db_engine, db_read_session -from backend.app.db.models.recommendations import Recommendation, Plan, PlanRecommendations, RecommendationMaterials -from backend.app.db.models.portfolio import PropertyModel, PropertyDetailsEpcModel, PropertyDetailsSpatial +from backend.app.db.models.recommendations import ( + Recommendation, + Plan, + PlanRecommendations, + RecommendationMaterials, +) +from backend.app.db.models.portfolio import ( + PropertyModel, + PropertyDetailsEpcModel, + PropertyDetailsSpatial, +) from backend.app.db.functions.materials_functions import get_materials from collections import defaultdict from sqlalchemy import func # PORTFOLIO_ID = 206 # SCENARIOS = [389] -PORTFOLIO_ID = 485 # Peabody +PORTFOLIO_ID = 502 # Peabody SCENARIOS = [ - 970, + 986, ] scenario_names = { - 970: "EPC C - No solid floor, EQI, IWI", + 986: "EPC C", } @@ -31,22 +40,26 @@ def get_data(portfolio_id, scenario_ids): # -------------------- # Properties # -------------------- - properties_query = session.query( - PropertyModel, - PropertyDetailsEpcModel - ).join( - PropertyDetailsEpcModel, - PropertyModel.id == PropertyDetailsEpcModel.property_id - ).filter( - PropertyModel.portfolio_id == portfolio_id - ).all() + properties_query = ( + session.query(PropertyModel, PropertyDetailsEpcModel) + .join( + PropertyDetailsEpcModel, + PropertyModel.id == PropertyDetailsEpcModel.property_id, + ) + .filter(PropertyModel.portfolio_id == portfolio_id) + .all() + ) properties_data = [ { - **{col.name: getattr(p.PropertyModel, col.name) - for col in PropertyModel.__table__.columns}, - **{col.name: getattr(p.PropertyDetailsEpcModel, col.name) - for col in PropertyDetailsEpcModel.__table__.columns}, + **{ + col.name: getattr(p.PropertyModel, col.name) + for col in PropertyModel.__table__.columns + }, + **{ + col.name: getattr(p.PropertyDetailsEpcModel, col.name) + for col in PropertyDetailsEpcModel.__table__.columns + }, } for p in properties_query ] @@ -58,13 +71,10 @@ def get_data(portfolio_id, scenario_ids): session.query( Plan.scenario_id, Plan.property_id, - func.max(Plan.created_at).label("latest_created_at") + func.max(Plan.created_at).label("latest_created_at"), ) .filter(Plan.scenario_id.in_(scenario_ids)) - .group_by( - Plan.scenario_id, - Plan.property_id - ) + .group_by(Plan.scenario_id, Plan.property_id) .subquery() ) @@ -76,9 +86,9 @@ def get_data(portfolio_id, scenario_ids): session.query(Plan) .join( latest_plans_subq, - (Plan.scenario_id == latest_plans_subq.c.scenario_id) & - (Plan.property_id == latest_plans_subq.c.property_id) & - (Plan.created_at == latest_plans_subq.c.latest_created_at) + (Plan.scenario_id == latest_plans_subq.c.scenario_id) + & (Plan.property_id == latest_plans_subq.c.property_id) + & (Plan.created_at == latest_plans_subq.c.latest_created_at), ) .all() ) @@ -103,28 +113,29 @@ def get_data(portfolio_id, scenario_ids): # -------------------- # Recommendations (NO materials yet) # -------------------- - recommendations_query = session.query( - Recommendation, - Plan.scenario_id, - PlanRecommendations.plan_id - ).join( - PlanRecommendations, - Recommendation.id == PlanRecommendations.recommendation_id - ).join( - Plan, - Plan.id == PlanRecommendations.plan_id - ).filter( - PlanRecommendations.plan_id.in_(plan_ids), - Recommendation.default.is_(True), - Recommendation.already_installed.is_(False) - ).all() + recommendations_query = ( + session.query(Recommendation, Plan.scenario_id, PlanRecommendations.plan_id) + .join( + PlanRecommendations, + Recommendation.id == PlanRecommendations.recommendation_id, + ) + .join(Plan, Plan.id == PlanRecommendations.plan_id) + .filter( + PlanRecommendations.plan_id.in_(plan_ids), + Recommendation.default.is_(True), + Recommendation.already_installed.is_(False), + ) + .all() + ) recommendations_data = [ { - **{col.name: getattr(r.Recommendation, col.name) - for col in Recommendation.__table__.columns}, + **{ + col.name: getattr(r.Recommendation, col.name) + for col in Recommendation.__table__.columns + }, "scenario_id": r.scenario_id, - "materials": [] # placeholder + "materials": [], # placeholder } for r in recommendations_query ] @@ -134,23 +145,25 @@ def get_data(portfolio_id, scenario_ids): # -------------------- # Recommendation materials (SEPARATE QUERY) # -------------------- - materials_query = session.query( - RecommendationMaterials - ).filter( - RecommendationMaterials.recommendation_id.in_(recommendation_ids) - ).all() + materials_query = ( + session.query(RecommendationMaterials) + .filter(RecommendationMaterials.recommendation_id.in_(recommendation_ids)) + .all() + ) # Group materials by recommendation_id materials_by_recommendation = defaultdict(list) for m in materials_query: - materials_by_recommendation[m.recommendation_id].append({ - "material_id": m.material_id, - "depth": m.depth, - "quantity": m.quantity, - "quantity_unit": m.quantity_unit, - "estimated_cost": m.estimated_cost, - }) + materials_by_recommendation[m.recommendation_id].append( + { + "material_id": m.material_id, + "depth": m.depth, + "quantity": m.quantity, + "quantity_unit": m.quantity_unit, + "estimated_cost": m.estimated_cost, + } + ) # Attach materials safely (no filtering side effects) for r in recommendations_data: @@ -161,7 +174,9 @@ def get_data(portfolio_id, scenario_ids): return properties_data, plans_data, recommendations_data -properties_data, plans_data, recommendations_data = get_data(portfolio_id=PORTFOLIO_ID, scenario_ids=SCENARIOS) +properties_data, plans_data, recommendations_data = get_data( + portfolio_id=PORTFOLIO_ID, scenario_ids=SCENARIOS +) properties_df = pd.DataFrame(properties_data) plans_df = pd.DataFrame(plans_data) @@ -172,10 +187,8 @@ with db_read_session() as session: materials = pd.DataFrame(materials) -material_lookup = ( - materials - .set_index("id")[["type", "includes_battery"]] - .to_dict("index") +material_lookup = materials.set_index("id")[["type", "includes_battery"]].to_dict( + "index" ) @@ -189,14 +202,14 @@ def has_solar_with_battery(materials_list): return False -recommendations_df["has_solar_with_battery"] = ( - recommendations_df["materials"].apply(has_solar_with_battery) +recommendations_df["has_solar_with_battery"] = recommendations_df["materials"].apply( + has_solar_with_battery ) recommendations_df["measure_type"] = np.where( recommendations_df["has_solar_with_battery"] == True, recommendations_df["measure_type"] + "_with_battery", - recommendations_df["measure_type"] + recommendations_df["measure_type"], ) # Adjust material type to indicate if there is a battery included @@ -211,50 +224,67 @@ from utils.s3 import read_csv_from_s3, read_excel_from_s3 for scenario_id in SCENARIOS: # Get recs for this scenario - recommended_measures_df = recommendations_df[recommendations_df["scenario_id"] == scenario_id][ - ["property_id", "measure_type", "estimated_cost", "default"] + recommended_measures_df = recommendations_df[ + recommendations_df["scenario_id"] == scenario_id + ][["property_id", "measure_type", "estimated_cost", "default"]] + recommended_measures_df = recommended_measures_df[ + recommended_measures_df["default"] ] - recommended_measures_df = recommended_measures_df[recommended_measures_df["default"]] recommended_measures_df = recommended_measures_df.drop(columns=["default"]) - post_install_sap = recommendations_df[recommendations_df["scenario_id"] == scenario_id][ - ["property_id", "default", "sap_points"]] + post_install_sap = recommendations_df[ + recommendations_df["scenario_id"] == scenario_id + ][["property_id", "default", "sap_points"]] post_install_sap = post_install_sap[post_install_sap["default"]] # Sum up the sap points by property id - post_install_sap = post_install_sap.groupby(["property_id"])[["sap_points"]].sum().reset_index() + post_install_sap = ( + post_install_sap.groupby(["property_id"])[["sap_points"]].sum().reset_index() + ) # Find dupes by property id and measure type - dupes = recommended_measures_df.duplicated(subset=["property_id", "measure_type"], keep=False) + dupes = recommended_measures_df.duplicated( + subset=["property_id", "measure_type"], keep=False + ) dupe_df = recommended_measures_df[dupes] if dupe_df.shape: # Drop dupes - happened due to a funny bug recommended_measures_df = recommended_measures_df.drop_duplicates( - subset=["property_id", "measure_type"], keep='first' + subset=["property_id", "measure_type"], keep="first" ) recommendations_measures_pivot = recommended_measures_df.pivot( - index='property_id', - columns='measure_type', - values='estimated_cost' + index="property_id", columns="measure_type", values="estimated_cost" ) recommendations_measures_pivot = recommendations_measures_pivot.reset_index() # Total cost is the row sum, excluding the property_id column - recommendations_measures_pivot["total_retrofit_cost"] = recommendations_measures_pivot.drop( - columns=["property_id"] - ).sum(axis=1) + recommendations_measures_pivot["total_retrofit_cost"] = ( + recommendations_measures_pivot.drop(columns=["property_id"]).sum(axis=1) + ) - df = properties_df[ - [ - "landlord_property_id", "property_id", "uprn", "address", "postcode", "property_type", "walls", "roof", - "heating", "windows", "current_epc_rating", "current_sap_points", "total_floor_area", "number_of_rooms", - "id" + df = ( + properties_df[ + [ + "landlord_property_id", + "property_id", + "uprn", + "address", + "postcode", + "property_type", + "walls", + "roof", + "heating", + "windows", + "current_epc_rating", + "current_sap_points", + "total_floor_area", + "number_of_rooms", + "id", + ] ] - ].merge( - recommendations_measures_pivot, how="left", on="property_id" - ).merge( - post_install_sap, how="left", on="property_id" + .merge(recommendations_measures_pivot, how="left", on="property_id") + .merge(post_install_sap, how="left", on="property_id") ) # df = df.drop(columns=["property_id"]) @@ -262,21 +292,25 @@ for scenario_id in SCENARIOS: df["predicted_post_works_sap"] = df["current_sap_points"] + df["sap_points"] df["predicted_post_works_sap"] = df["predicted_post_works_sap"] - df["predicted_post_works_epc"] = df["predicted_post_works_sap"].apply(lambda x: sap_to_epc(x)) + df["predicted_post_works_epc"] = df["predicted_post_works_sap"].apply( + lambda x: sap_to_epc(x) + ) df["uprn"] = df["uprn"].astype(str) relevant_plans = plans_df[plans_df["scenario_id"] == scenario_id] df2 = df.merge( - relevant_plans[["property_id", "post_sap_points", "post_epc_rating"]], how="left", on="property_id", - suffixes=("", "_plan") + relevant_plans[["property_id", "post_sap_points", "post_epc_rating"]], + how="left", + on="property_id", + suffixes=("", "_plan"), ) print(df2["predicted_post_works_epc"].value_counts()) print(df2["post_epc_rating"].value_counts()) z = df2[ - (df2["predicted_post_works_epc"] != "D") & - (df2["post_epc_rating"].astype(str) == "Epc.D") - ] + (df2["predicted_post_works_epc"] != "D") + & (df2["post_epc_rating"].astype(str) == "Epc.D") + ] df2["predicted_post_works_epc"].value_counts() df2["post_epc_rating"].astype(str).value_counts() @@ -291,189 +325,6 @@ for scenario_id in SCENARIOS: df[df["predicted_post_works_sap"] == ""] # Create excel to store to -<<<<<<< HEAD - filename = (f"{scenario_names[scenario_id]} - 20250113 final.xlsx") + filename = f"{scenario_names[scenario_id]} - 20250113 final.xlsx" with pd.ExcelWriter(filename) as writer: df.to_excel(writer, sheet_name="properties", index=False) -======= - filename = ("/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting " - f"Project/Final SAL/scenarios/{scenario_names[scenario_id]} - 20250114 final.xlsx") - with pd.ExcelWriter(filename) as writer: - df.to_excel(writer, sheet_name="properties", index=False) - - -# asset_list = pd.DataFrame(asset_list) -# asset_list = asset_list.rename( -# columns={ -# "postcode": "domna_postcode" -# } -# ) -# if "domna_full_address": -# # For Peabody -# asset_list["domna_full_address"] = asset_list["domna_address_1"] -# -# asset_list = asset_list[["domna_full_address", "domna_postcode", "epc_os_uprn", ]].copy() -# asset_list = asset_list.rename(columns={"epc_os_uprn": "uprn"}) -# asset_list["uprn"] = asset_list["uprn"].astype("Int64").astype(str) -# asset_list = asset_list.merge( -# df.drop(columns=["address", "postcode", "property_type", "total_floor_area"]), -# how="left", -# on="uprn" -# ) - - -# Get conservation area data from property details spatial. based on the UPRNs -def get_conservation_area_data(uprns): - session = sessionmaker(bind=db_engine)() - session.begin() - - # Query to get conservation area data - spatial_query = session.query( - PropertyDetailsSpatial - ).filter( - PropertyDetailsSpatial.uprn.in_(uprns) # Filter by UPRNs - ).all() - - # Transform spatial data to include all fields dynamically - spatial_data = [ - {col.name: getattr(spatial, col.name) for col in PropertyDetailsSpatial.__table__.columns} - for spatial in spatial_query - ] - - session.close() - return pd.DataFrame(spatial_data) - - -uprns = asset_list[ - ~pd.isna(asset_list["uprn"]) & (asset_list["uprn"] != "") - ]["uprn"].astype(int).unique().tolist() -conservation_area_data = get_conservation_area_data(uprns) -conservation_area_data["uprn"] = conservation_area_data["uprn"].astype(str) -asset_list = asset_list.merge( - conservation_area_data[["uprn", "conservation_status", "is_listed_building", "is_heritage_building"]], - how="left", - on="uprn" -) - -# For exporting -df.to_excel( - "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lincs Rural/EPC C -without floors proposed measures - " - "with ID.xlsx", - index=False -) -# asset_list.to_excel( -# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Lincs Rural/epc_measures.xlsx", -# index=False -# ) - -condition_costs = pd.read_excel( - "/Users/khalimconn-kowlessar/Documents/hestia/sfr/Spring JV/Condition costs.xlsx", - sheet_name="Prices - Khalim", - header=35 -) -# Remove unnamed columns and reset index -condition_costs = condition_costs.loc[:, ~condition_costs.columns.str.contains('^Unnamed')] -condition_costs = condition_costs.reset_index(drop=True) - - -# We now estimate condition cost -def simulate_condition(asset_list, condition_costs): - """ - This function is for testing, and will simulate condition cost from 1-10 for each property to see what the - costing array looks like. - :param df: - :return: - """ - - condition_df = [] - for _, row in asset_list.iterrows(): - - n_bathrooms = row["bathrooms"] - - conditions = {} - for condition in reversed(range(1, 11)): - condition_cost = condition_costs[ - condition_costs["Condition"] == condition - ].drop(columns=["Condition"]).iloc[0] - - # Each cost is scaled by floor area - condition_cost = condition_cost * row["total_floor_area"] - condition_cost["Bathroom"] = condition_cost["Bathroom"] * n_bathrooms - - total_condition_cost = condition_cost.sum() - conditions["Condition " + str(condition)] = (total_condition_cost) - - condition_df.append( - { - "uprn": row["uprn"], - **conditions - } - ) - - condition_df = pd.DataFrame(condition_df) - - asset_list = asset_list.merge( - condition_df, - how="left", - on="uprn" - ) - - return asset_list - - -# asset_list = simulate_condition(asset_list, condition_costs) - -# We calculate the condition cost based on the condition -for _, row in asset_list.iterrows(): - - condition = row["condition_score"] - if condition in [None, ""]: - continue - condition = int(float(condition)) - - condition_cost = condition_costs[ - condition_costs["Condition"] == condition - ].drop(columns=["Condition"]).iloc[0] - - # Each cost is scaled by floor area - condition_cost = condition_cost * float(row["total_floor_area"]) - n_bathrooms = row["n_bathrooms"] - condition_cost["Bathroom"] = condition_cost["Bathroom"] * float(n_bathrooms) - - total_condition_cost = condition_cost.sum() - asset_list.loc[asset_list["uprn"] == row["uprn"], "domna_condition_cost"] = total_condition_cost - -# Store output -asset_list.to_excel( - "/Users/khalimconn-kowlessar/Documents/hestia/sfr/Spring JV/20250624_portfolio_retrofit_packages.xlsx", - index=False -) - -condition_cost_comparison = asset_list[ - ["condition_score", "decoration_sum_min ", "decoration_sum_max", "domna_condition_cost"] -] - -# Testing -plans_df.head() - -example = pd.read_excel( - "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Peabody/Nov 2025 Consulting Project/Final " - "SAL/scenarios/EPC C - no solid floor, no EWI or IWI, ashp 3.0 - 20250114 final.xlsx" -) - -plans_df2 = plans_df.merge( - properties_df[["property_id", "landlord_property_id"]], - left_on="property_id", - right_on="property_id", - how="left" -) - -plans_df2 = plans_df2[plans_df2["scenario_id"] == 909] - -dupes = plans_df2[plans_df2["property_id"].duplicated()] - -# merge on plans -example = example.merge( - plans_df, how="left", -) ->>>>>>> 3874da6177cbcc37f7a488bec0a06e387906653c