mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-30 13:10:47 +00:00
Merge pull request #1006 from Hestia-Homes/main
Deployment fix for Pashub to Ara
This commit is contained in:
commit
dc0d7be24d
19 changed files with 556 additions and 48 deletions
50
.github/workflows/deploy_terraform.yml
vendored
50
.github/workflows/deploy_terraform.yml
vendored
|
|
@ -201,6 +201,46 @@ jobs:
|
||||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
|
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
|
||||||
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
|
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# Build Bulk Address2UPRN Combiner image and Push
|
||||||
|
# ============================================================
|
||||||
|
bulk_address2uprn_combiner_image:
|
||||||
|
needs: [determine_stage, shared_terraform]
|
||||||
|
uses: ./.github/workflows/_build_image.yml
|
||||||
|
with:
|
||||||
|
ecr_repo: bulk_address2uprn_combiner-${{ needs.determine_stage.outputs.stage }}
|
||||||
|
dockerfile_path: backend/bulk_address2uprn_combiner/handler/Dockerfile
|
||||||
|
build_context: .
|
||||||
|
build_args: |
|
||||||
|
DEV_DB_HOST=$DEV_DB_HOST
|
||||||
|
DEV_DB_PORT=$DEV_DB_PORT
|
||||||
|
DEV_DB_NAME=$DEV_DB_NAME
|
||||||
|
secrets:
|
||||||
|
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
|
||||||
|
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
|
||||||
|
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
|
||||||
|
DEV_DB_HOST: ${{ secrets.DEV_DB_HOST }}
|
||||||
|
DEV_DB_PORT: ${{ secrets.DEV_DB_PORT }}
|
||||||
|
DEV_DB_NAME: ${{ secrets.DEV_DB_NAME }}
|
||||||
|
|
||||||
|
# ============================================================
|
||||||
|
# Deploy Bulk Address2UPRN Combiner Lambda
|
||||||
|
# ============================================================
|
||||||
|
bulk_address2uprn_combiner_lambda:
|
||||||
|
needs: [bulk_address2uprn_combiner_image, determine_stage, shared_terraform]
|
||||||
|
uses: ./.github/workflows/_deploy_lambda.yml
|
||||||
|
with:
|
||||||
|
lambda_name: bulk_address2uprn_combiner
|
||||||
|
lambda_path: infrastructure/terraform/lambda/bulk_address2uprn_combiner
|
||||||
|
stage: ${{ needs.determine_stage.outputs.stage }}
|
||||||
|
ecr_repo: bulk_address2uprn_combiner-${{ needs.determine_stage.outputs.stage }}
|
||||||
|
image_digest: ${{ needs.bulk_address2uprn_combiner_image.outputs.image_digest }}
|
||||||
|
terraform_apply: ${{ needs.determine_stage.outputs.terraform_apply }}
|
||||||
|
secrets:
|
||||||
|
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
|
||||||
|
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
|
||||||
|
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
|
||||||
|
|
||||||
# ============================================================
|
# ============================================================
|
||||||
# Condition ETL image and Push
|
# Condition ETL image and Push
|
||||||
# ============================================================
|
# ============================================================
|
||||||
|
|
@ -332,17 +372,10 @@ jobs:
|
||||||
ecr_repo: pashub_to_ara-${{ needs.determine_stage.outputs.stage }}
|
ecr_repo: pashub_to_ara-${{ needs.determine_stage.outputs.stage }}
|
||||||
dockerfile_path: backend/pashub_fetcher/handler/Dockerfile
|
dockerfile_path: backend/pashub_fetcher/handler/Dockerfile
|
||||||
build_context: .
|
build_context: .
|
||||||
build_args: |
|
|
||||||
DEV_DB_HOST=$DEV_DB_HOST
|
|
||||||
DEV_DB_PORT=$DEV_DB_PORT
|
|
||||||
DEV_DB_NAME=$DEV_DB_NAME
|
|
||||||
secrets:
|
secrets:
|
||||||
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
|
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
|
||||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
|
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
|
||||||
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
|
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
|
||||||
DEV_DB_HOST: ${{ secrets.DEV_DB_HOST }}
|
|
||||||
DEV_DB_PORT: ${{ secrets.DEV_DB_PORT }}
|
|
||||||
DEV_DB_NAME: ${{ secrets.DEV_DB_NAME }}
|
|
||||||
|
|
||||||
|
|
||||||
# ============================================================
|
# ============================================================
|
||||||
|
|
@ -362,6 +395,9 @@ jobs:
|
||||||
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
|
AWS_ACCESS_KEY_ID: ${{ secrets.DEV_AWS_ACCESS_KEY_ID }}
|
||||||
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
|
AWS_SECRET_ACCESS_KEY: ${{ secrets.DEV_AWS_SECRET_ACCESS_KEY }}
|
||||||
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
|
AWS_REGION: ${{ secrets.DEV_AWS_REGION }}
|
||||||
|
TF_VAR_db_host: ${{ secrets.DEV_DB_HOST }}
|
||||||
|
TF_VAR_db_name: ${{ secrets.DEV_DB_NAME }}
|
||||||
|
TF_VAR_db_port: ${{ secrets.DEV_DB_PORT }}
|
||||||
TF_VAR_sharepoint_client_id: ${{ secrets.SHAREPOINT_CLIENT_ID }}
|
TF_VAR_sharepoint_client_id: ${{ secrets.SHAREPOINT_CLIENT_ID }}
|
||||||
TF_VAR_sharepoint_client_secret: ${{ secrets.SHAREPOINT_CLIENT_SECRET }}
|
TF_VAR_sharepoint_client_secret: ${{ secrets.SHAREPOINT_CLIENT_SECRET }}
|
||||||
TF_VAR_sharepoint_tenant_id: ${{ secrets.SHAREPOINT_TENANT_ID }}
|
TF_VAR_sharepoint_tenant_id: ${{ secrets.SHAREPOINT_TENANT_ID }}
|
||||||
|
|
|
||||||
5
.gitignore
vendored
5
.gitignore
vendored
|
|
@ -278,6 +278,11 @@ cache/
|
||||||
|
|
||||||
*.png
|
*.png
|
||||||
*.pptx
|
*.pptx
|
||||||
|
*.csv
|
||||||
|
*.xlsx
|
||||||
|
*.pdf
|
||||||
|
**/Chunks/
|
||||||
|
*.ipynb
|
||||||
|
|
||||||
local_data*
|
local_data*
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -74,23 +74,23 @@ def app():
|
||||||
"""
|
"""
|
||||||
|
|
||||||
data_folder = "/workspaces/model/asset_list"
|
data_folder = "/workspaces/model/asset_list"
|
||||||
data_filename = "Waverley UPRN Match.xlsx"
|
data_filename = "foom (2).xlsx"
|
||||||
sheet_name = "in"
|
sheet_name = "in"
|
||||||
postcode_column = "postcode_clean"
|
postcode_column = "postcode_clean"
|
||||||
address1_column = "domna_found_address"
|
address1_column = "address2uprn_address"
|
||||||
address1_method = None
|
address1_method = None
|
||||||
fulladdress_column = "domna_found_address"
|
fulladdress_column = "address2uprn_address"
|
||||||
address_cols_to_concat = []
|
address_cols_to_concat = []
|
||||||
missing_postcodes_method = None
|
missing_postcodes_method = None
|
||||||
landlord_year_built = None
|
landlord_year_built = None
|
||||||
landlord_os_uprn = "domna_found_uprn"
|
landlord_os_uprn = "address2uprn_uprn"
|
||||||
landlord_property_type = "Property Type 1" # Good to include if landlord gave
|
landlord_property_type = None # Good to include if landlord gave
|
||||||
landlord_built_form = None # Good to include if landlord gave
|
landlord_built_form = None # Good to include if landlord gave
|
||||||
landlord_wall_construction = None
|
landlord_wall_construction = None
|
||||||
landlord_roof_construction = None
|
landlord_roof_construction = None
|
||||||
landlord_heating_system = None
|
landlord_heating_system = None
|
||||||
landlord_existing_pv = None
|
landlord_existing_pv = None
|
||||||
landlord_property_id = "WBC Ref"
|
landlord_property_id = "UPRN"
|
||||||
landlord_sap = None
|
landlord_sap = None
|
||||||
outcomes_filename = None
|
outcomes_filename = None
|
||||||
outcomes_sheetname = None
|
outcomes_sheetname = None
|
||||||
|
|
|
||||||
30
backend/app/db/models/bulk_address_uploads.py
Normal file
30
backend/app/db/models/bulk_address_uploads.py
Normal file
|
|
@ -0,0 +1,30 @@
|
||||||
|
from typing import Optional
|
||||||
|
from uuid import UUID, uuid4
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
|
from sqlmodel import SQLModel, Field, select
|
||||||
|
|
||||||
|
from backend.app.db.connection import get_db_session
|
||||||
|
|
||||||
|
|
||||||
|
class BulkAddressUpload(SQLModel, table=True):
|
||||||
|
__tablename__ = "bulk_address_uploads"
|
||||||
|
|
||||||
|
id: UUID = Field(default_factory=uuid4, primary_key=True, index=True)
|
||||||
|
task_id: UUID = Field(foreign_key="tasks.id", index=True)
|
||||||
|
combined_csv_s3_uri: Optional[str] = Field(default=None)
|
||||||
|
updated_at: datetime = Field(default_factory=datetime.utcnow)
|
||||||
|
|
||||||
|
|
||||||
|
def set_combined_csv_s3_uri(task_id: UUID, s3_uri: str) -> None:
|
||||||
|
now = datetime.now(timezone.utc)
|
||||||
|
with get_db_session() as session:
|
||||||
|
row = session.exec(
|
||||||
|
select(BulkAddressUpload).where(BulkAddressUpload.task_id == task_id)
|
||||||
|
).first()
|
||||||
|
if not row:
|
||||||
|
raise ValueError(f"No bulk_address_uploads row for task_id {task_id}")
|
||||||
|
row.combined_csv_s3_uri = s3_uri
|
||||||
|
row.updated_at = now
|
||||||
|
session.add(row)
|
||||||
|
session.commit()
|
||||||
0
backend/bulk_address2uprn_combiner/__init__.py
Normal file
0
backend/bulk_address2uprn_combiner/__init__.py
Normal file
23
backend/bulk_address2uprn_combiner/handler/Dockerfile
Normal file
23
backend/bulk_address2uprn_combiner/handler/Dockerfile
Normal file
|
|
@ -0,0 +1,23 @@
|
||||||
|
FROM public.ecr.aws/lambda/python:3.11
|
||||||
|
|
||||||
|
ARG DEV_DB_HOST
|
||||||
|
ARG DEV_DB_PORT
|
||||||
|
ARG DEV_DB_NAME
|
||||||
|
|
||||||
|
ENV DB_HOST=${DEV_DB_HOST}
|
||||||
|
ENV DB_PORT=${DEV_DB_PORT}
|
||||||
|
ENV DB_NAME=${DEV_DB_NAME}
|
||||||
|
|
||||||
|
WORKDIR /var/task
|
||||||
|
|
||||||
|
COPY backend/bulk_address2uprn_combiner/handler/requirements.txt .
|
||||||
|
|
||||||
|
RUN pip install --no-cache-dir -r requirements.txt
|
||||||
|
|
||||||
|
COPY utils/ utils/
|
||||||
|
COPY backend/ backend/
|
||||||
|
COPY datatypes/ datatypes/
|
||||||
|
|
||||||
|
COPY backend/bulk_address2uprn_combiner/main.py .
|
||||||
|
|
||||||
|
CMD ["main.handler"]
|
||||||
|
|
@ -0,0 +1,7 @@
|
||||||
|
pandas==2.2.2
|
||||||
|
numpy<2.0
|
||||||
|
boto3==1.35.44
|
||||||
|
sqlmodel
|
||||||
|
sqlalchemy==2.0.36
|
||||||
|
psycopg2-binary==2.9.10
|
||||||
|
pydantic-settings==2.6.0
|
||||||
74
backend/bulk_address2uprn_combiner/main.py
Normal file
74
backend/bulk_address2uprn_combiner/main.py
Normal file
|
|
@ -0,0 +1,74 @@
|
||||||
|
import os
|
||||||
|
import boto3
|
||||||
|
import pandas as pd
|
||||||
|
from io import BytesIO
|
||||||
|
from typing import Any
|
||||||
|
from uuid import UUID
|
||||||
|
from datetime import datetime, timezone
|
||||||
|
|
||||||
|
from utils.logger import setup_logger
|
||||||
|
from backend.utils.subtasks import subtask_handler
|
||||||
|
from backend.app.db.models.bulk_address_uploads import set_combined_csv_s3_uri
|
||||||
|
|
||||||
|
logger = setup_logger()
|
||||||
|
|
||||||
|
S3_BUCKET_NAME = os.getenv("S3_BUCKET_NAME")
|
||||||
|
|
||||||
|
|
||||||
|
def list_csv_files(s3_client, bucket: str, task_id: str) -> list[str]:
|
||||||
|
paginator = s3_client.get_paginator("list_objects_v2")
|
||||||
|
prefix = f"ara_raw_outputs/{task_id}/"
|
||||||
|
keys = []
|
||||||
|
for page in paginator.paginate(Bucket=bucket, Prefix=prefix):
|
||||||
|
for obj in page.get("Contents", []):
|
||||||
|
if obj["Key"].endswith(".csv"):
|
||||||
|
keys.append(obj["Key"])
|
||||||
|
return keys
|
||||||
|
|
||||||
|
|
||||||
|
def download_csv(s3_client, bucket: str, key: str) -> pd.DataFrame:
|
||||||
|
obj = s3_client.get_object(Bucket=bucket, Key=key)
|
||||||
|
return pd.read_csv(BytesIO(obj["Body"].read()))
|
||||||
|
|
||||||
|
|
||||||
|
@subtask_handler()
|
||||||
|
def handler(body: dict[str, Any], context: Any) -> str:
|
||||||
|
task_id_str: str = body.get("task_id", "")
|
||||||
|
|
||||||
|
if not task_id_str:
|
||||||
|
raise RuntimeError("Missing task_id in message body")
|
||||||
|
|
||||||
|
bucket = S3_BUCKET_NAME
|
||||||
|
if not bucket:
|
||||||
|
raise RuntimeError("S3_BUCKET_NAME env var not set")
|
||||||
|
|
||||||
|
s3 = boto3.client("s3")
|
||||||
|
|
||||||
|
logger.info(f"Combining ara_raw_outputs for task {task_id_str}")
|
||||||
|
|
||||||
|
csv_keys = list_csv_files(s3, bucket, task_id_str)
|
||||||
|
if not csv_keys:
|
||||||
|
raise RuntimeError(f"No CSV files found under ara_raw_outputs/{task_id_str}/")
|
||||||
|
|
||||||
|
logger.info(f"Found {len(csv_keys)} CSV files")
|
||||||
|
|
||||||
|
dfs = [download_csv(s3, bucket, key) for key in csv_keys]
|
||||||
|
combined = pd.concat(dfs, ignore_index=True)
|
||||||
|
logger.info(f"Combined {len(combined)} rows from {len(dfs)} files")
|
||||||
|
|
||||||
|
timestamp = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H-%M-%S")
|
||||||
|
output_key = f"bulk_final_outputs/{task_id_str}/combined_{timestamp}.csv"
|
||||||
|
|
||||||
|
csv_buffer = BytesIO()
|
||||||
|
combined.to_csv(csv_buffer, index=False)
|
||||||
|
csv_buffer.seek(0)
|
||||||
|
s3.put_object(Bucket=bucket, Key=output_key, Body=csv_buffer.getvalue())
|
||||||
|
|
||||||
|
s3_uri = f"s3://{bucket}/{output_key}"
|
||||||
|
logger.info(f"Saved combined CSV to {s3_uri}")
|
||||||
|
print(f"OUTPUT_S3_URI: {s3_uri}")
|
||||||
|
|
||||||
|
set_combined_csv_s3_uri(UUID(task_id_str), s3_uri)
|
||||||
|
logger.info(f"Persisted combined_csv_s3_uri for task {task_id_str}")
|
||||||
|
|
||||||
|
return s3_uri
|
||||||
|
|
@ -26,15 +26,14 @@ def has_solar_with_battery(materials_list: Optional[List[Dict[str, Any]]]) -> bo
|
||||||
:return:
|
:return:
|
||||||
"""
|
"""
|
||||||
for m in materials_list or []:
|
for m in materials_list or []:
|
||||||
if (
|
if m.get("type") == "solar_pv" and m.get("includes_battery") is True:
|
||||||
m.get("type") == "solar_pv"
|
|
||||||
and m.get("includes_battery") is True
|
|
||||||
):
|
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
|
||||||
def process_export(payload: ExportRequest, session: Session) -> Dict[Union[str, int], pd.DataFrame]:
|
def process_export(
|
||||||
|
payload: ExportRequest, session: Session
|
||||||
|
) -> Dict[Union[str, int], pd.DataFrame]:
|
||||||
export_files: Dict[Union[str, int], pd.DataFrame] = {}
|
export_files: Dict[Union[str, int], pd.DataFrame] = {}
|
||||||
|
|
||||||
db_methods = DbMethods(session)
|
db_methods = DbMethods(session)
|
||||||
|
|
@ -52,7 +51,9 @@ def process_export(payload: ExportRequest, session: Session) -> Dict[Union[str,
|
||||||
logger.info("Retrieved %s plans for export", len(plans_df))
|
logger.info("Retrieved %s plans for export", len(plans_df))
|
||||||
|
|
||||||
if plans_df.empty:
|
if plans_df.empty:
|
||||||
logger.info("Empty plans dataframe - no plans to export. Returning empty export.")
|
logger.info(
|
||||||
|
"Empty plans dataframe - no plans to export. Returning empty export."
|
||||||
|
)
|
||||||
return export_files
|
return export_files
|
||||||
plan_ids: List[int] = plans_df["id"].tolist()
|
plan_ids: List[int] = plans_df["id"].tolist()
|
||||||
recommendations_df: pd.DataFrame = db_methods.get_recommendations(plan_ids)
|
recommendations_df: pd.DataFrame = db_methods.get_recommendations(plan_ids)
|
||||||
|
|
@ -61,13 +62,12 @@ def process_export(payload: ExportRequest, session: Session) -> Dict[Union[str,
|
||||||
|
|
||||||
recommendations_df = db_methods.attach_materials(recommendations_df)
|
recommendations_df = db_methods.attach_materials(recommendations_df)
|
||||||
|
|
||||||
recommendations_df["has_solar_with_battery"] = (
|
recommendations_df["has_solar_with_battery"] = recommendations_df[
|
||||||
recommendations_df["materials"].apply(has_solar_with_battery)
|
"materials"
|
||||||
)
|
].apply(has_solar_with_battery)
|
||||||
|
|
||||||
_filter = (
|
_filter = (recommendations_df["measure_type"] == "solar_pv") & (
|
||||||
(recommendations_df["measure_type"] == "solar_pv")
|
recommendations_df["has_solar_with_battery"]
|
||||||
& (recommendations_df["has_solar_with_battery"])
|
|
||||||
)
|
)
|
||||||
|
|
||||||
recommendations_df.loc[_filter, "measure_type"] = (
|
recommendations_df.loc[_filter, "measure_type"] = (
|
||||||
|
|
@ -83,10 +83,13 @@ def process_export(payload: ExportRequest, session: Session) -> Dict[Union[str,
|
||||||
else:
|
else:
|
||||||
scenario_recs = recommendations_df[
|
scenario_recs = recommendations_df[
|
||||||
recommendations_df["scenario_id"] == group_key
|
recommendations_df["scenario_id"] == group_key
|
||||||
]
|
]
|
||||||
|
|
||||||
if scenario_recs.empty:
|
if scenario_recs.empty:
|
||||||
logger.info("No recommendations found for group_key %s - skipping export for this group", group_key)
|
logger.info(
|
||||||
|
"No recommendations found for group_key %s - skipping export for this group",
|
||||||
|
group_key,
|
||||||
|
)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
measures_df: pd.DataFrame = scenario_recs[
|
measures_df: pd.DataFrame = scenario_recs[
|
||||||
|
|
@ -99,14 +102,12 @@ def process_export(payload: ExportRequest, session: Session) -> Dict[Union[str,
|
||||||
values="estimated_cost",
|
values="estimated_cost",
|
||||||
).reset_index()
|
).reset_index()
|
||||||
|
|
||||||
pivot["total_retrofit_cost"] = (
|
pivot["total_retrofit_cost"] = pivot.drop(
|
||||||
pivot.drop(columns=["property_id", "plan_name"]).sum(axis=1)
|
columns=["property_id", "plan_name"]
|
||||||
)
|
).sum(axis=1)
|
||||||
|
|
||||||
post_sap: pd.DataFrame = (
|
post_sap: pd.DataFrame = (
|
||||||
scenario_recs.groupby("property_id")[["sap_points"]]
|
scenario_recs.groupby("property_id")[["sap_points"]].sum().reset_index()
|
||||||
.sum()
|
|
||||||
.reset_index()
|
|
||||||
)
|
)
|
||||||
|
|
||||||
df: pd.DataFrame = (
|
df: pd.DataFrame = (
|
||||||
|
|
@ -117,7 +118,9 @@ def process_export(payload: ExportRequest, session: Session) -> Dict[Union[str,
|
||||||
|
|
||||||
df["sap_points"] = df["sap_points"].fillna(0)
|
df["sap_points"] = df["sap_points"].fillna(0)
|
||||||
df["predicted_post_works_sap"] = df["current_sap_points"] + df["sap_points"]
|
df["predicted_post_works_sap"] = df["current_sap_points"] + df["sap_points"]
|
||||||
df["predicted_post_works_epc"] = df["predicted_post_works_sap"].apply(sap_to_epc)
|
df["predicted_post_works_epc"] = df["predicted_post_works_sap"].apply(
|
||||||
|
sap_to_epc
|
||||||
|
)
|
||||||
|
|
||||||
export_files[group_key] = df
|
export_files[group_key] = df
|
||||||
|
|
||||||
|
|
@ -128,22 +131,17 @@ def process_export(payload: ExportRequest, session: Session) -> Dict[Union[str,
|
||||||
# Lambda Handler
|
# Lambda Handler
|
||||||
# ============================================================
|
# ============================================================
|
||||||
|
|
||||||
def handler(event: Mapping[str, Any], context: Optional[Any]) -> Mapping[str, Union[int, str]]:
|
|
||||||
|
def handler(
|
||||||
|
event: Mapping[str, Any], context: Optional[Any]
|
||||||
|
) -> Mapping[str, Union[int, str]]:
|
||||||
"""
|
"""
|
||||||
Example event:
|
Example event:
|
||||||
body_dict = {
|
body_dict = {
|
||||||
"task_id": "test",
|
"task_id": "test",
|
||||||
"subtask_id": "test",
|
"subtask_id": "test",
|
||||||
"portfolio_id": 655,
|
"portfolio_id": 682,
|
||||||
"scenario_ids": [],
|
"scenario_ids": [1210],
|
||||||
"default_plans_only": True,
|
|
||||||
}
|
|
||||||
|
|
||||||
body_dict = {
|
|
||||||
"task_id": "test",
|
|
||||||
"subtask_id": "test",
|
|
||||||
"portfolio_id": 655,
|
|
||||||
"scenario_ids": [1174],
|
|
||||||
"default_plans_only": False,
|
"default_plans_only": False,
|
||||||
}
|
}
|
||||||
:param event: Lambda event containing export request details
|
:param event: Lambda event containing export request details
|
||||||
|
|
@ -168,7 +166,12 @@ def handler(event: Mapping[str, Any], context: Optional[Any]) -> Mapping[str, Un
|
||||||
exported_files = process_export(payload, session)
|
exported_files = process_export(payload, session)
|
||||||
|
|
||||||
# TODO: Need to handle the exported files - e.g. upload to s3 and email a presigned url
|
# TODO: Need to handle the exported files - e.g. upload to s3 and email a presigned url
|
||||||
_ = exported_files
|
output_path = f"/tmp/export_{payload.portfolio_id}.xlsx"
|
||||||
|
with pd.ExcelWriter(output_path, engine="openpyxl") as writer:
|
||||||
|
for group_key, df in exported_files.items():
|
||||||
|
sheet_name = str(group_key)[:31] # Excel sheet names max 31 chars
|
||||||
|
df.to_excel(writer, sheet_name=sheet_name, index=False)
|
||||||
|
logger.info("Exported files written to %s", output_path)
|
||||||
return {
|
return {
|
||||||
"statusCode": 200,
|
"statusCode": 200,
|
||||||
"body": json.dumps({}),
|
"body": json.dumps({}),
|
||||||
|
|
|
||||||
|
|
@ -31,6 +31,8 @@ def download_csv(key):
|
||||||
|
|
||||||
|
|
||||||
def main(task_id, output):
|
def main(task_id, output):
|
||||||
|
task_id = "3fb9a9b7-ff49-4c11-b9e1-9d00da955a75"
|
||||||
|
|
||||||
print(f"Scanning task: {task_id}")
|
print(f"Scanning task: {task_id}")
|
||||||
|
|
||||||
csv_files = list_csv_files(task_id)
|
csv_files = list_csv_files(task_id)
|
||||||
|
|
|
||||||
110
devcontainer.sh
Normal file
110
devcontainer.sh
Normal file
|
|
@ -0,0 +1,110 @@
|
||||||
|
#!/usr/bin/env bash
|
||||||
|
#
|
||||||
|
# dc.sh — devcontainer helper for this repo
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# ./devcontainer.sh <config> <command>
|
||||||
|
#
|
||||||
|
# Configs: backend | asset_list
|
||||||
|
# Commands: up, shell, down, rebuild
|
||||||
|
#
|
||||||
|
# `shell` auto-runs `up` first if the container isn't already running,
|
||||||
|
# so it's safe to call cold.
|
||||||
|
#
|
||||||
|
# Examples:
|
||||||
|
# ./devcontainer.sh backend shell # up + exec bash
|
||||||
|
# ./devcontainer.sh asset_list up
|
||||||
|
# ./devcontainer.sh backend rebuild
|
||||||
|
# ./devcontainer.sh backend down
|
||||||
|
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
|
||||||
|
REPO_ROOT="${SCRIPT_DIR}"
|
||||||
|
|
||||||
|
VALID_CONFIGS=(backend asset_list)
|
||||||
|
VALID_COMMANDS=(up shell down rebuild)
|
||||||
|
|
||||||
|
# --- helpers ---------------------------------------------------------------
|
||||||
|
|
||||||
|
usage() {
|
||||||
|
sed -n '3,20p' "${BASH_SOURCE[0]}" | sed 's/^# \{0,1\}//'
|
||||||
|
exit "${1:-0}"
|
||||||
|
}
|
||||||
|
|
||||||
|
die() {
|
||||||
|
echo "error: $*" >&2
|
||||||
|
exit 1
|
||||||
|
}
|
||||||
|
|
||||||
|
in_list() {
|
||||||
|
# in_list <needle> <haystack...>
|
||||||
|
local needle="$1"
|
||||||
|
shift
|
||||||
|
local item
|
||||||
|
for item in "$@"; do
|
||||||
|
[[ "${item}" == "${needle}" ]] && return 0
|
||||||
|
done
|
||||||
|
return 1
|
||||||
|
}
|
||||||
|
|
||||||
|
container_id_for() {
|
||||||
|
# Find a running container for the given config path via devcontainer labels.
|
||||||
|
local config_path="$1"
|
||||||
|
docker ps -q \
|
||||||
|
--filter "label=devcontainer.local_folder=${REPO_ROOT}" \
|
||||||
|
--filter "label=devcontainer.config_file=${config_path}"
|
||||||
|
}
|
||||||
|
|
||||||
|
# --- argument parsing ------------------------------------------------------
|
||||||
|
|
||||||
|
[[ $# -eq 2 ]] || usage 1
|
||||||
|
|
||||||
|
CONFIG_NAME="$1"
|
||||||
|
COMMAND="$2"
|
||||||
|
|
||||||
|
in_list "${CONFIG_NAME}" "${VALID_CONFIGS[@]}" \
|
||||||
|
|| die "invalid config '${CONFIG_NAME}' (expected: ${VALID_CONFIGS[*]})"
|
||||||
|
in_list "${COMMAND}" "${VALID_COMMANDS[@]}" \
|
||||||
|
|| die "invalid command '${COMMAND}' (expected: ${VALID_COMMANDS[*]})"
|
||||||
|
|
||||||
|
CONFIG_PATH="${REPO_ROOT}/.devcontainer/${CONFIG_NAME}/devcontainer.json"
|
||||||
|
[[ -f "${CONFIG_PATH}" ]] || die "config not found: ${CONFIG_PATH}"
|
||||||
|
|
||||||
|
DC_ARGS=(--workspace-folder "${REPO_ROOT}" --config "${CONFIG_PATH}")
|
||||||
|
|
||||||
|
# --- dispatch --------------------------------------------------------------
|
||||||
|
|
||||||
|
case "${COMMAND}" in
|
||||||
|
up)
|
||||||
|
echo ">> bringing up '${CONFIG_NAME}'"
|
||||||
|
devcontainer up "${DC_ARGS[@]}"
|
||||||
|
;;
|
||||||
|
|
||||||
|
shell)
|
||||||
|
# Auto-up if not already running. `devcontainer up` is idempotent —
|
||||||
|
# it reuses an existing container, so this is cheap on warm starts.
|
||||||
|
if [[ -z "$(container_id_for "${CONFIG_PATH}")" ]]; then
|
||||||
|
echo ">> '${CONFIG_NAME}' not running, bringing it up first"
|
||||||
|
devcontainer up "${DC_ARGS[@]}"
|
||||||
|
fi
|
||||||
|
echo ">> attaching shell to '${CONFIG_NAME}'"
|
||||||
|
devcontainer exec "${DC_ARGS[@]}" bash 2>/dev/null \
|
||||||
|
|| devcontainer exec "${DC_ARGS[@]}" sh
|
||||||
|
;;
|
||||||
|
|
||||||
|
down)
|
||||||
|
cid="$(container_id_for "${CONFIG_PATH}")"
|
||||||
|
if [[ -z "${cid}" ]]; then
|
||||||
|
echo ">> '${CONFIG_NAME}' not running, nothing to stop"
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
echo ">> stopping '${CONFIG_NAME}'"
|
||||||
|
docker stop "${cid}"
|
||||||
|
;;
|
||||||
|
|
||||||
|
rebuild)
|
||||||
|
echo ">> rebuilding '${CONFIG_NAME}' from scratch"
|
||||||
|
devcontainer up "${DC_ARGS[@]}" --remove-existing-container --build-no-cache
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
@ -9,8 +9,8 @@ PIPELINE_ID = Pipeline.OPERATIONS_SOCIAL_HOUSING.value
|
||||||
companies = list(
|
companies = list(
|
||||||
[
|
[
|
||||||
# Companies.THE_GUINESS_PARTNERSHIP,
|
# Companies.THE_GUINESS_PARTNERSHIP,
|
||||||
# Companies.SOUTHERN_HOUSING_GROUP,
|
Companies.SOUTHERN_HOUSING_GROUP,
|
||||||
Companies.CALICO_HOMES,
|
# Companies.CALICO_HOMES,
|
||||||
]
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,44 @@
|
||||||
|
data "terraform_remote_state" "shared" {
|
||||||
|
backend = "s3"
|
||||||
|
config = {
|
||||||
|
bucket = "assessment-model-terraform-state"
|
||||||
|
key = "env:/${var.stage}/terraform.tfstate"
|
||||||
|
region = "eu-west-2"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
data "aws_secretsmanager_secret_version" "db_credentials" {
|
||||||
|
secret_id = "${var.stage}/assessment_model/db_credentials"
|
||||||
|
}
|
||||||
|
|
||||||
|
locals {
|
||||||
|
db_credentials = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string)
|
||||||
|
}
|
||||||
|
|
||||||
|
module "lambda" {
|
||||||
|
source = "../../modules/lambda_with_sqs"
|
||||||
|
|
||||||
|
name = "bulk-address2uprn-combiner"
|
||||||
|
stage = var.stage
|
||||||
|
|
||||||
|
image_uri = local.image_uri
|
||||||
|
|
||||||
|
timeout = 900
|
||||||
|
memory_size = 2048
|
||||||
|
|
||||||
|
maximum_concurrency = var.maximum_concurrency
|
||||||
|
batch_size = var.batch_size
|
||||||
|
|
||||||
|
environment = {
|
||||||
|
STAGE = var.stage
|
||||||
|
LOG_LEVEL = "info"
|
||||||
|
S3_BUCKET_NAME = data.terraform_remote_state.shared.outputs.retrofit_sap_data_bucket_name
|
||||||
|
DB_USERNAME = local.db_credentials.db_assessment_model_username
|
||||||
|
DB_PASSWORD = local.db_credentials.db_assessment_model_password
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
resource "aws_iam_role_policy_attachment" "bulk_address2uprn_combiner_s3" {
|
||||||
|
role = module.lambda.role_name
|
||||||
|
policy_arn = data.terraform_remote_state.shared.outputs.bulk_address2uprn_combiner_s3_arn
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,14 @@
|
||||||
|
output "bulk_address2uprn_combiner_queue_url" {
|
||||||
|
value = module.lambda.queue_url
|
||||||
|
description = "URL of the bulk_address2uprn_combiner SQS queue"
|
||||||
|
}
|
||||||
|
|
||||||
|
output "bulk_address2uprn_combiner_queue_arn" {
|
||||||
|
value = module.lambda.queue_arn
|
||||||
|
description = "ARN of the bulk_address2uprn_combiner SQS queue"
|
||||||
|
}
|
||||||
|
|
||||||
|
output "bulk_address2uprn_combiner_lambda_arn" {
|
||||||
|
value = module.lambda.lambda_arn
|
||||||
|
description = "ARN of the bulk_address2uprn_combiner Lambda function"
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,16 @@
|
||||||
|
terraform {
|
||||||
|
required_providers {
|
||||||
|
aws = {
|
||||||
|
source = "hashicorp/aws"
|
||||||
|
version = ">= 5.0"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
backend "s3" {
|
||||||
|
bucket = "bulk-address2uprn-combiner-terraform-state"
|
||||||
|
key = "terraform.tfstate"
|
||||||
|
region = "eu-west-2"
|
||||||
|
}
|
||||||
|
|
||||||
|
required_version = ">= 1.2.0"
|
||||||
|
}
|
||||||
|
|
@ -0,0 +1,38 @@
|
||||||
|
variable "lambda_name" {
|
||||||
|
type = string
|
||||||
|
description = "Logical name of the lambda"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "stage" {
|
||||||
|
description = "Deployment stage (e.g. dev, prod)"
|
||||||
|
type = string
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "ecr_repo_url" {
|
||||||
|
type = string
|
||||||
|
description = "ECR repository URL (no tag, no digest)"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "image_digest" {
|
||||||
|
type = string
|
||||||
|
description = "Image digest (sha256:...)"
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "maximum_concurrency" {
|
||||||
|
type = number
|
||||||
|
default = 2
|
||||||
|
description = "Maximum concurrent Lambda invocations from SQS (2-1000)."
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "batch_size" {
|
||||||
|
type = number
|
||||||
|
default = 1
|
||||||
|
}
|
||||||
|
|
||||||
|
locals {
|
||||||
|
image_uri = "${var.ecr_repo_url}@${var.image_digest}"
|
||||||
|
}
|
||||||
|
|
||||||
|
output "resolved_image_uri" {
|
||||||
|
value = local.image_uri
|
||||||
|
}
|
||||||
|
|
@ -7,6 +7,14 @@ data "terraform_remote_state" "shared" {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
data "aws_secretsmanager_secret_version" "db_credentials" {
|
||||||
|
secret_id = "${var.stage}/assessment_model/db_credentials"
|
||||||
|
}
|
||||||
|
|
||||||
|
locals {
|
||||||
|
db_credentials = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string)
|
||||||
|
}
|
||||||
|
|
||||||
module "lambda" {
|
module "lambda" {
|
||||||
source = "../../modules/lambda_with_sqs"
|
source = "../../modules/lambda_with_sqs"
|
||||||
|
|
||||||
|
|
@ -23,5 +31,20 @@ module "lambda" {
|
||||||
environment = {
|
environment = {
|
||||||
STAGE = var.stage
|
STAGE = var.stage
|
||||||
LOG_LEVEL = "info"
|
LOG_LEVEL = "info"
|
||||||
|
|
||||||
|
DB_USERNAME = local.db_credentials.db_assessment_model_username
|
||||||
|
DB_PASSWORD = local.db_credentials.db_assessment_model_password
|
||||||
|
DB_HOST = var.db_host
|
||||||
|
DB_NAME = var.db_name
|
||||||
|
DB_PORT = var.db_port
|
||||||
|
|
||||||
|
SHAREPOINT_CLIENT_ID = var.sharepoint_client_id
|
||||||
|
SHAREPOINT_CLIENT_SECRET = var.sharepoint_client_secret
|
||||||
|
DOMNA_SHAREPOINT_ID = var.domna_sharepoint_id
|
||||||
|
OSMOSIS_ACD_SHAREPOINT_ID = var.osmosis_acd_sharepoint_id
|
||||||
|
PRIVATE_PAY_SHAREPOINT_ID = var.private_pay_sharepoint_id
|
||||||
|
SOCIAL_HOUSING_WAVE_3_SHAREPOINT_ID = var.social_housing_wave_3_sharepoint_id
|
||||||
|
PASHUB_EMAIL = var.pashub_email
|
||||||
|
PASHUB_PASSWORD = var.pashub_password
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -35,3 +35,58 @@ locals {
|
||||||
output "resolved_image_uri" {
|
output "resolved_image_uri" {
|
||||||
value = local.image_uri
|
value = local.image_uri
|
||||||
}
|
}
|
||||||
|
|
||||||
|
variable "db_host" {
|
||||||
|
type = string
|
||||||
|
sensitive = true
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "db_name" {
|
||||||
|
type = string
|
||||||
|
sensitive = true
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "db_port" {
|
||||||
|
type = string
|
||||||
|
sensitive = true
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "sharepoint_client_id" {
|
||||||
|
type = string
|
||||||
|
sensitive = true
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "sharepoint_client_secret" {
|
||||||
|
type = string
|
||||||
|
sensitive = true
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "domna_sharepoint_id" {
|
||||||
|
type = string
|
||||||
|
sensitive = true
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "osmosis_acd_sharepoint_id" {
|
||||||
|
type = string
|
||||||
|
sensitive = true
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "private_pay_sharepoint_id" {
|
||||||
|
type = string
|
||||||
|
sensitive = true
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "social_housing_wave_3_sharepoint_id" {
|
||||||
|
type = string
|
||||||
|
sensitive = true
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "pashub_email" {
|
||||||
|
type = string
|
||||||
|
sensitive = true
|
||||||
|
}
|
||||||
|
|
||||||
|
variable "pashub_password" {
|
||||||
|
type = string
|
||||||
|
sensitive = true
|
||||||
|
}
|
||||||
|
|
@ -477,6 +477,34 @@ output "postcode_splitter_s3_read_arn" {
|
||||||
value = module.postcode_splitter_s3_read.policy_arn
|
value = module.postcode_splitter_s3_read.policy_arn
|
||||||
}
|
}
|
||||||
|
|
||||||
|
################################################
|
||||||
|
# Bulk Address2UPRN Combiner – Lambda ECR
|
||||||
|
################################################
|
||||||
|
module "bulk_address2uprn_combiner_state_bucket" {
|
||||||
|
source = "../modules/tf_state_bucket"
|
||||||
|
bucket_name = "bulk-address2uprn-combiner-terraform-state"
|
||||||
|
}
|
||||||
|
|
||||||
|
module "bulk_address2uprn_combiner_registry" {
|
||||||
|
source = "../modules/container_registry"
|
||||||
|
name = "bulk_address2uprn_combiner"
|
||||||
|
stage = var.stage
|
||||||
|
}
|
||||||
|
|
||||||
|
module "bulk_address2uprn_combiner_s3" {
|
||||||
|
source = "../modules/s3_iam_policy"
|
||||||
|
|
||||||
|
policy_name = "BulkAddress2UprnCombinerS3"
|
||||||
|
policy_description = "Allow bulk_address2uprn_combiner Lambda to read ara_raw_outputs and write bulk_final_outputs"
|
||||||
|
bucket_arns = ["arn:aws:s3:::retrofit-data-${var.stage}"]
|
||||||
|
actions = ["s3:GetObject", "s3:ListBucket", "s3:PutObject"]
|
||||||
|
resource_paths = ["/ara_raw_outputs/*", "/bulk_final_outputs/*"]
|
||||||
|
}
|
||||||
|
|
||||||
|
output "bulk_address2uprn_combiner_s3_arn" {
|
||||||
|
value = module.bulk_address2uprn_combiner_s3.policy_arn
|
||||||
|
}
|
||||||
|
|
||||||
################################################
|
################################################
|
||||||
# Categorisation – Lambda ECR
|
# Categorisation – Lambda ECR
|
||||||
################################################
|
################################################
|
||||||
|
|
|
||||||
Loading…
Add table
Reference in a new issue