diff --git a/.devcontainer/asset_list/Dockerfile b/.devcontainer/asset_list/Dockerfile index 512ab109..72a5de53 100644 --- a/.devcontainer/asset_list/Dockerfile +++ b/.devcontainer/asset_list/Dockerfile @@ -27,8 +27,9 @@ RUN useradd -m -s /usr/bin/bash ${USER} \ # # 4) Python deps - if you want to run assest list ENV PIP_NO_CACHE_DIR=1 PIP_DISABLE_PIP_VERSION_CHECK=1 -ADD asset_list/requirements.txt requirements.txt -RUN pip install -r requirements.txt +ADD .devcontainer/asset_list/requirements.txt requirements2.txt +ADD asset_list/requirements.txt requirements1.txt +RUN cat requirements1.txt requirements2.txt >> requirements.txt RUN pip install -r requirements.txt # 5) Workdir diff --git a/.devcontainer/asset_list/requirements.txt b/.devcontainer/asset_list/requirements.txt index 0640f2c9..fe536a81 100644 --- a/.devcontainer/asset_list/requirements.txt +++ b/.devcontainer/asset_list/requirements.txt @@ -15,10 +15,9 @@ uvicorn[standard] pytest==9.0.2 pytest-cov==7.0.0 ipykernel>=6.25,<7 -pydantic-settings<2 pyyaml>=6.0.1 -pydantic>=1.10.7,<2 sqlmodel # Formatting black==26.1.0 dotenv +pydantic-settings \ No newline at end of file diff --git a/.github/workflows/deploy_terraform.yml b/.github/workflows/deploy_terraform.yml index 41a551c4..567f8d5a 100644 --- a/.github/workflows/deploy_terraform.yml +++ b/.github/workflows/deploy_terraform.yml @@ -73,8 +73,8 @@ jobs: uses: ./.github/workflows/_build_image.yml with: ecr_repo: address2uprn-${{ needs.determine_stage.outputs.stage }} - dockerfile_path: backend/address2UPRN/Dockerfile - build_context: backend/address2UPRN + dockerfile_path: backend/address2UPRN/handler/Dockerfile + build_context: . secrets: AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} @@ -96,3 +96,38 @@ jobs: AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} AWS_REGION: ${{ secrets.DEV_AWS_REGION }} + + + # ============================================================ + # 2️⃣ Build Postcode Splitter image and Push + # ============================================================ + postcodeSplitter_image: + needs: [determine_stage, shared_terraform] + uses: ./.github/workflows/_build_image.yml + with: + ecr_repo: postcode_splitter-${{ needs.determine_stage.outputs.stage }} + dockerfile_path: backend/postcode_splitter/handler/Dockerfile + build_context: . + secrets: + AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} + AWS_REGION: ${{ secrets.DEV_AWS_REGION }} + + # ============================================================ + # 3️⃣ Deploy Postcode Splitter Lambda + # ============================================================ + postcodeSplitter_lambda: + needs: [postcodeSplitter_image, determine_stage] + uses: ./.github/workflows/_deploy_lambda.yml + with: + lambda_name: postcodeSplitter + lambda_path: infrastructure/terraform/lambda/postcodeSplitter + stage: ${{ needs.determine_stage.outputs.stage }} + ecr_repo: postcode_splitter-${{ needs.determine_stage.outputs.stage }} + image_digest: ${{ needs.postcodeSplitter_image.outputs.image_digest }} + secrets: + AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }} + AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }} + AWS_REGION: ${{ secrets.DEV_AWS_REGION }} + + diff --git a/backend/.env.local b/backend/.env.local index a05c93a3..22e1db35 100644 --- a/backend/.env.local +++ b/backend/.env.local @@ -19,4 +19,22 @@ PLAN_TRIGGER_BUCKET="test" DATA_BUCKET="test" EPC_AUTH_TOKEN="test" ENGINE_SQS_URL="test" +ENERGY_ASSESSMENTS_BUCKET="test" +API_KEY="test" +SECRET_KEY="test" +ENVIRONMENT="test" +DATA_BUCKET="test" +PLAN_TRIGGER_BUCKET="test" +ENGINE_SQS_URL="test" +GOOGLE_SOLAR_API_KEY="test" +DB_HOST="test" +DB_PASSWORD="test" +DB_USERNAME="test" +DB_PORT="test" +DB_NAME="test" +SAP_PREDICTIONS_BUCKET="test" +CARBON_PREDICTIONS_BUCKET="test" +HEAT_PREDICTIONS_BUCKET="test" +HEATING_KWH_PREDICTIONS_BUCKET="test" +HOTWATER_KWH_PREDICTIONS_BUCKET="test" ENERGY_ASSESSMENTS_BUCKET="test" \ No newline at end of file diff --git a/backend/address2UPRN/Dockerfile b/backend/address2UPRN/Dockerfile deleted file mode 100644 index ac6af2a5..00000000 --- a/backend/address2UPRN/Dockerfile +++ /dev/null @@ -1,7 +0,0 @@ -FROM public.ecr.aws/lambda/python:3.10 - -# Copy function code -COPY main.py . - -# Set the handler -CMD ["main.handler"] diff --git a/backend/address2UPRN/handler/Dockerfile b/backend/address2UPRN/handler/Dockerfile new file mode 100644 index 00000000..3f7567d3 --- /dev/null +++ b/backend/address2UPRN/handler/Dockerfile @@ -0,0 +1,26 @@ +FROM public.ecr.aws/lambda/python:3.10 + +# This is not going to be permenant - but until we solve for env variables in live prod +ENV EPC_AUTH_TOKEN=a2Nvbm5rb3dsZXNzYXJAZ21haWwuY29tOjY5MGJiMWM0NmIyOGI5ZDUxYzAxMzQzYzNiZGNlZGJjZDNmODQwMzAg + +# Set working directory (Lambda task root) +WORKDIR /var/task + +# ----------------------------- +# Copy requirements FIRST (for Docker layer caching) +# ----------------------------- +COPY backend/address2UPRN/handler/requirements.txt . + +# Install dependencies into Lambda runtime +RUN pip install --no-cache-dir -r requirements.txt + +# ----------------------------- +# Copy application code +# ----------------------------- +COPY utils/ utils/ +COPY backend/address2UPRN/main.py . + +# ----------------------------- +# Lambda handler +# ----------------------------- +CMD ["main.handler"] diff --git a/backend/address2UPRN/handler/requirements.txt b/backend/address2UPRN/handler/requirements.txt new file mode 100644 index 00000000..bc753841 --- /dev/null +++ b/backend/address2UPRN/handler/requirements.txt @@ -0,0 +1,3 @@ +epc-api-python==1.0.2 +tqdm +pandas \ No newline at end of file diff --git a/backend/address2UPRN/main.py b/backend/address2UPRN/main.py index 9d27a5ce..ba386e0a 100644 --- a/backend/address2UPRN/main.py +++ b/backend/address2UPRN/main.py @@ -212,6 +212,8 @@ def get_epc_data_with_postcode(postcode, size=500, attempt=1, max_attempts=3): method="get", params={"postcode": postcode}, ) + if not search_resp or "rows" not in search_resp: + return pd.DataFrame() results_df = pd.DataFrame(search_resp["rows"], columns=search_resp["column-names"]) @@ -298,7 +300,7 @@ def get_uprn_candidates( ) -def get_uprn(user_inputed_address: str, postcode: str): +def get_uprn(user_inputed_address: str, postcode: str, return_address=False): """ Return uprn (str) Return False if failed to find a sensible matching epc @@ -337,6 +339,8 @@ def get_uprn(user_inputed_address: str, postcode: str): if found_uprn == "": return None + if return_address: + return found_uprn, address return found_uprn diff --git a/backend/address2UPRN/script.py b/backend/address2UPRN/script.py index bd8f8017..a71b5827 100644 --- a/backend/address2UPRN/script.py +++ b/backend/address2UPRN/script.py @@ -1,17 +1,24 @@ import pandas as pd +from tqdm import tqdm +from backend.address2UPRN.main import get_uprn + +# Enable tqdm for pandas +tqdm.pandas() + +df = pd.read_excel("address2.xlsx") -# use Address 1 -junte_df = pd.read_excel("hackney_uprn_failures.xlsx") +def extract_uprn(row): + print(row["User Input"], row["Postcode"]) + result = get_uprn(row["User Input"], row["Postcode"], return_address=True) + + if result is None: + return pd.Series([None, None]) + + uprn, found_address = result + return pd.Series([uprn, found_address]) -# use domna_address_1 -khalim_df = pd.read_excel("khalim_standard.xlsx") - - -combined_df = junte_df.merge(khalim_df, how="left", left_on="Address 1", right_on='domna_address_1') - -# Find the row in khalim_df that does not app - -result = combined_df[~pd.isnull(combined_df["epc_os_uprn"])] +df[["juntes uprn", "junte found address"]] = df.progress_apply(extract_uprn, axis=1) +df.to_excel("outputs2.xlsx", index=False) diff --git a/backend/postcode_splitter/hackney.xlsx b/backend/postcode_splitter/hackney.xlsx deleted file mode 100644 index 64892f3a..00000000 Binary files a/backend/postcode_splitter/hackney.xlsx and /dev/null differ diff --git a/backend/postcode_splitter/handler/Dockerfile b/backend/postcode_splitter/handler/Dockerfile new file mode 100644 index 00000000..7c1a7989 --- /dev/null +++ b/backend/postcode_splitter/handler/Dockerfile @@ -0,0 +1,9 @@ +FROM public.ecr.aws/lambda/python:3.10 + +# Set working directory (Lambda task root) +WORKDIR /var/task + +# ----------------------------- +# Lambda handler +# ----------------------------- +CMD ["main.handler"] diff --git a/backend/postcode_splitter/handler/requirements.txt b/backend/postcode_splitter/handler/requirements.txt new file mode 100644 index 00000000..e69de29b diff --git a/backend/postcode_splitter/main.py b/backend/postcode_splitter/main.py index d417c8f1..d55f618a 100644 --- a/backend/postcode_splitter/main.py +++ b/backend/postcode_splitter/main.py @@ -1,10 +1,12 @@ import pandas as pd import requests -from backend.address2UPRN.main import resolve_uprns_for_postcode_group, get_epc_data_with_postcode +from backend.address2UPRN.main import ( + resolve_uprns_for_postcode_group, + get_epc_data_with_postcode, +) from tqdm import tqdm - def sanitise_postcode(postcode: str) -> str | None: """ Normalise postcode for grouping. @@ -51,11 +53,7 @@ def main(): # --- validate AFTER grouping (save API calls) --- # Get unique, non-null postcodes - unique_postcodes = ( - df["postcode_clean"] - .dropna() - .unique() - ) + unique_postcodes = df["postcode_clean"].dropna().unique() # Validate each postcode once, TODOadd a progress bar postcode_validity = { @@ -66,7 +64,6 @@ def main(): # Map validity back onto dataframe df["postcode_valid"] = df["postcode_clean"].map(postcode_validity) - results = [] for postcode, group_df in tqdm( @@ -98,17 +95,33 @@ def main(): results.append(tmp) final_df = pd.concat(results, ignore_index=True) - a = final_df[[ - "best_match_lexiscore","Address 1", - "best_match_address", "Postcode", - "UPRN", "best_match_uprn" - ]] # add levi score to viewing - b = final_df[final_df["best_match_lexiscore"]>0] # add levi score to viewing - b = b[[ - "best_match_lexiscore","Address 1", - "best_match_address", "Postcode", - "UPRN", "best_match_uprn" - ]] + a = final_df[ + [ + "best_match_lexiscore", + "Address 1", + "best_match_address", + "Postcode", + "UPRN", + "best_match_uprn", + ] + ] # add levi score to viewing + b = final_df[final_df["best_match_lexiscore"] > 0] # add levi score to viewing + b = b[ + [ + "best_match_lexiscore", + "Address 1", + "best_match_address", + "Postcode", + "UPRN", + "best_match_uprn", + ] + ] + + +def handler(event, context): + print("hello Postcode splitter world") + return {"statusCode": 200, "body": "hello world"} + if __name__ == "__main__": main() diff --git a/infrastructure/terraform/lambda/postcodeSplitter/main.tf b/infrastructure/terraform/lambda/postcodeSplitter/main.tf new file mode 100644 index 00000000..ebbdbfdc --- /dev/null +++ b/infrastructure/terraform/lambda/postcodeSplitter/main.tf @@ -0,0 +1,14 @@ +module "lambda" { + source = "../modules/lambda_with_sqs" + + name = "postcode-splitter" + stage = var.stage + + image_uri = local.image_uri + + + environment = { + STAGE = var.stage + LOG_LEVEL = "info" + } +} diff --git a/infrastructure/terraform/lambda/postcodeSplitter/provider.tf b/infrastructure/terraform/lambda/postcodeSplitter/provider.tf new file mode 100644 index 00000000..dbe323f2 --- /dev/null +++ b/infrastructure/terraform/lambda/postcodeSplitter/provider.tf @@ -0,0 +1,16 @@ +terraform { + required_providers { + aws = { + source = "hashicorp/aws" + version = "~> 4.16" + } + } + + backend "s3" { + bucket = "postcode-splitter-terraform-state" + key = "terraform.tfstate" + region = "eu-west-2" + } + + required_version = ">= 1.2.0" +} \ No newline at end of file diff --git a/infrastructure/terraform/lambda/postcodeSplitter/variables.tf b/infrastructure/terraform/lambda/postcodeSplitter/variables.tf new file mode 100644 index 00000000..9ce45fa5 --- /dev/null +++ b/infrastructure/terraform/lambda/postcodeSplitter/variables.tf @@ -0,0 +1,26 @@ +variable "lambda_name" { + type = string + description = "Logical name of the lambda (e.g. address2uprn)" +} + +variable "stage" { + description = "Deployment stage (e.g. dev, prod)" + type = string +} +variable "ecr_repo_url" { + type = string + description = "ECR repository URL (no tag, no digest)" +} + +variable "image_digest" { + type = string + description = "Image digest (sha256:...)" +} + +locals { + image_uri = "${var.ecr_repo_url}@${var.image_digest}" +} + +output "resolved_image_uri" { + value = local.image_uri +} diff --git a/infrastructure/terraform/shared/main.tf b/infrastructure/terraform/shared/main.tf index 02962a29..28bf4914 100644 --- a/infrastructure/terraform/shared/main.tf +++ b/infrastructure/terraform/shared/main.tf @@ -319,4 +319,20 @@ module "condition_etl_registry" { name = "condition-etl" stage = var.stage + + +################################################ +# Postcode Splitter – Lambda ECR +################################################ +module "postcode_splitter_state_bucket" { + source = "../modules/tf_state_bucket" + bucket_name = "postcode-splitter-terraform-state" + +} + +module "postcode_splitter_registry" { + source = "../modules/container_registry" + name = "postcode_splitter" + stage = var.stage + } \ No newline at end of file diff --git a/sfr/principal_pitch/2_export_data.py b/sfr/principal_pitch/2_export_data.py index ae807654..a65509d5 100644 --- a/sfr/principal_pitch/2_export_data.py +++ b/sfr/principal_pitch/2_export_data.py @@ -2,6 +2,10 @@ This script prepares the data for the financial model """ +from dotenv import load_dotenv + +load_dotenv(".env.local") + import pandas as pd import numpy as np from backend.app.utils import sap_to_epc @@ -24,12 +28,12 @@ from sqlalchemy import func # PORTFOLIO_ID = 206 # SCENARIOS = [389] -PORTFOLIO_ID = 502 # Peabody +PORTFOLIO_ID = 524 SCENARIOS = [ - 986, + 1009, ] scenario_names = { - 986: "EPC C", + 1009: "EPC C; Most Economic", }