diff --git a/backend/app/config.py b/backend/app/config.py index 5139a489..80a2d46a 100644 --- a/backend/app/config.py +++ b/backend/app/config.py @@ -66,6 +66,18 @@ class Settings(BaseSettings): ORDNANCE_SURVEY_API_KEY: str = "changeme" HUBSPOT_API_KEY: Optional[str] = None + # Sharepoint + SHAREPOINT_CLIENT_ID: Optional[str] = None + SHAREPOINT_CLIENT_SECRET: Optional[str] = None + SHAREPOINT_TENANT_ID: Optional[str] = None + DOMNA_SHAREPOINT_ID: Optional[str] = None + OSMOSIS_ACD_SHAREPOINT_ID: Optional[str] = None + PRIVATE_PAY_SHAREPOINT_ID: Optional[str] = None + SOCIAL_HOUSING_WAVE_3_SHAREPOINT_ID: Optional[str] = None + + # Pas Hub + PASHUB_EMAIL: Optional[str] = None + PASHUB_PASSWORD: Optional[str] = None # Optional AWS creds (only required in local) AWS_ACCESS_KEY_ID: Optional[str] = None @@ -91,7 +103,6 @@ def get_prediction_buckets(): "carbon_change_predictions": get_settings().CARBON_PREDICTIONS_BUCKET, "heating_kwh_predictions": get_settings().HEATING_KWH_PREDICTIONS_BUCKET, "hotwater_kwh_predictions": get_settings().HOTWATER_KWH_PREDICTIONS_BUCKET, - # Score model - SAP re-baselining model "retrofit_sap_baseline_predictions": get_settings().SAP_BASELINE_PREDICTIONS_BUCKET, "retrofit_carbon_baseline_predictions": get_settings().CARBON_BASELINE_PREDICTIONS_BUCKET, diff --git a/backend/app/db/models/uploaded_file.py b/backend/app/db/models/uploaded_file.py new file mode 100644 index 00000000..726ed0a3 --- /dev/null +++ b/backend/app/db/models/uploaded_file.py @@ -0,0 +1,44 @@ +import enum +from sqlalchemy import TIMESTAMP, BigInteger, Column, Text, Enum as SqlEnum + +from backend.app.db.base import Base + + +class FileTypeEnum(enum.Enum): + PHOTO_PACK = "photo_pack" + SITE_NOTE = "site_note" + RD_SAP_SITE_NOTE = "rd_sap_site_note" + PAS_2023_VENTILATION = "pas_2023_ventilation" + PAS_2023_CONDITION = "pas_2023_condition" + PAS_SIGNIFICANCE = "pas_significance" + PAR_PHOTO_PACK = "par_photo_pack" + PAS_2023_PROPERTY = "pas_2023_property" + PAS_2023_OCCUPANCY = "pas_2023_occupancy" + + +class FileSourceEnum(enum.Enum): + PAS_HUB = "pas hub" + SHAREPOINT = "sharepoint" + HUBSPOT = "hubspot" + + +class UploadedFile(Base): + __tablename__ = "uploaded_files" + + id = Column(BigInteger, primary_key=True, autoincrement=True) + + s3_file_bucket = Column(Text, nullable=False) + s3_file_key = Column(Text, nullable=False) + s3_upload_timestamp = Column(TIMESTAMP(timezone=True), nullable=False) + + landlord_property_id = Column(Text, nullable=True) + uprn = Column(BigInteger, nullable=True) + hubspot_listing_id = Column(BigInteger, nullable=True) + + file_type = Column( + SqlEnum(FileTypeEnum, name="file_type", create_type=False), nullable=True + ) + + file_source = Column( + SqlEnum(FileSourceEnum, name="file_source", create_type=False), nullable=True + ) diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 27151437..db15b09a 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -174,6 +174,8 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest): service="plan_engine", inputs=data, task_only=True, + source=SourceEnum.PORTFOLIO, + source_id=str(body.portfolio_id), ) subtask_interface = SubTaskInterface() @@ -222,6 +224,8 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest): service="plan_engine", inputs=data, task_only=False, + source=SourceEnum.PORTFOLIO, + source_id=str(body.portfolio_id), ) data["task_id"] = str(task_id) data["subtask_id"] = str(subtask_id) diff --git a/backend/export/property_scenarios/main.py b/backend/export/property_scenarios/main.py index d38db4c9..f3ea0100 100644 --- a/backend/export/property_scenarios/main.py +++ b/backend/export/property_scenarios/main.py @@ -134,10 +134,18 @@ def handler(event: Mapping[str, Any], context: Optional[Any]) -> Mapping[str, Un body_dict = { "task_id": "test", "subtask_id": "test", - "portfolio_id": 569, + "portfolio_id": 655, "scenario_ids": [], "default_plans_only": True, } + + body_dict = { + "task_id": "test", + "subtask_id": "test", + "portfolio_id": 655, + "scenario_ids": [1174], + "default_plans_only": False, + } :param event: Lambda event containing export request details :param context: Lambda context (not used in this handler but included for completeness) :return: HTTP response indicating success or failure of the export operation diff --git a/backend/pashub_fetcher/The_Guinness_Partnership_AtkinsR_alis_Coordination_Design_Board_1774881298.xlsx b/backend/pashub_fetcher/The_Guinness_Partnership_AtkinsR_alis_Coordination_Design_Board_1774881298.xlsx new file mode 100644 index 00000000..a6478e3b Binary files /dev/null and b/backend/pashub_fetcher/The_Guinness_Partnership_AtkinsR_alis_Coordination_Design_Board_1774881298.xlsx differ diff --git a/backend/pashub_fetcher/core_files.py b/backend/pashub_fetcher/core_files.py index 82637f70..4da10661 100644 --- a/backend/pashub_fetcher/core_files.py +++ b/backend/pashub_fetcher/core_files.py @@ -1,4 +1,7 @@ from enum import Enum +from typing import Optional + +from backend.app.db.models.uploaded_file import FileTypeEnum class CoreFiles(Enum): @@ -11,3 +14,23 @@ class CoreFiles(Enum): PAR_PHOTOPACK = "PAR Photo Pack" PAS2023_PROPERTY = "PAS 2023 Property Assessment Report" PAS2023_OCCUPANCY = "PAS 2023 Occupancy Assessment Report" + + +CORE_TO_FILETYPE_MAP = { + CoreFiles.PHOTOPACK: FileTypeEnum.PHOTO_PACK.value, + CoreFiles.SITENOTE: FileTypeEnum.SITE_NOTE.value, + CoreFiles.RDSAP_SITENOTE: FileTypeEnum.RD_SAP_SITE_NOTE.value, + CoreFiles.PAS2023_VENTILATION: FileTypeEnum.PAS_2023_VENTILATION.value, + CoreFiles.PAS2023_CONDITION: FileTypeEnum.PAS_2023_CONDITION.value, + CoreFiles.PAS_SIGNIFICANCE: FileTypeEnum.PAS_SIGNIFICANCE.value, + CoreFiles.PAR_PHOTOPACK: FileTypeEnum.PAR_PHOTO_PACK.value, + CoreFiles.PAS2023_PROPERTY: FileTypeEnum.PAS_2023_PROPERTY.value, + CoreFiles.PAS2023_OCCUPANCY: FileTypeEnum.PAS_2023_OCCUPANCY.value, +} + + +def infer_file_type(filename: str) -> Optional[str]: + for core_file, file_type in CORE_TO_FILETYPE_MAP.items(): + if filename.startswith(core_file.value): + return file_type + return None diff --git a/backend/pashub_fetcher/handler/handler.py b/backend/pashub_fetcher/handler/handler.py index 38b79ab4..df7bb9dc 100644 --- a/backend/pashub_fetcher/handler/handler.py +++ b/backend/pashub_fetcher/handler/handler.py @@ -1,13 +1,22 @@ +from datetime import datetime, timezone import os import re -from typing import Any, Dict, List, Mapping +from typing import Any, Dict, List, Mapping, Optional from openpyxl import load_workbook +from backend.app.config import get_settings +from backend.app.db.connection import db_session +from backend.app.db.models.uploaded_file import ( + FileSourceEnum, + UploadedFile, +) +from backend.pashub_fetcher.core_files import infer_file_type from backend.pashub_fetcher.job import Job from backend.pashub_fetcher.pashub_client import PashubClient, UnauthorizedError from backend.pashub_fetcher.sharepoint_subfolders import SharepointSubfolders from backend.pashub_fetcher.token_getter import get_token_from_local_storage from utils.logger import setup_logger +from utils.s3 import upload_file_to_s3 from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient from utils.sharepoint.domna_sites import DomnaSites @@ -17,7 +26,8 @@ logger = setup_logger() def extract_jobs(filepath: str) -> List[Job]: wb = load_workbook(filepath, data_only=True) - ws = wb["watford warm homes (wave 3) mai"] + # ws = wb["watford warm homes (wave 3) mai"] + ws = wb["filtered"] HEADER_ROW = 3 @@ -28,7 +38,8 @@ def extract_jobs(filepath: str) -> List[Job]: headers[value.strip()] = col name_col = headers["Name"] - link_col = headers["Pashub Link"] + # link_col = headers["Pashub Link"] + link_col = headers["PasHub Link"] jobs: List[Job] = [] @@ -39,88 +50,170 @@ def extract_jobs(filepath: str) -> List[Job]: if not name or not link: continue - link = str(link) - - match = re.search(r"/jobs/([0-9a-fA-F\-]+)/", link) + match = re.search(r"/jobs/([0-9a-fA-F\-]+)/", str(link)) if not match: continue - job_id = match.group(1) - - jobs.append({"id": job_id, "address": str(name)}) + jobs.append( + { + "id": match.group(1), + "address": str(name), + } + ) return jobs +def get_pashub_client(email: str, password: str) -> PashubClient: + token = get_token_from_local_storage(email, password) + logger.info("Token extracted successfully") + return PashubClient(token=token) + + +def upload_job_to_sharepoint( + sharepoint_client: DomnaSharepointClient, + base_path: str, + job: Job, + job_files: List[str], +) -> None: + job_path = f"{base_path}/{job['address']}" + + # Create main job folder + sharepoint_client.makedir(job["address"], base_path) + + # Create subfolders + for folder in SharepointSubfolders: + sharepoint_client.makedir(folder.value, job_path) + + # Upload into assessment folder + assessment_path = f"{job_path}/{SharepointSubfolders.ASSESSMENT.value}" + + for file_path in job_files: + filename = file_path.split("/")[-1] + + sharepoint_client.upload_file( + file_path, + assessment_path, + filename, + ) + + +def upload_job_to_s3_and_update_db(job_files: List[str], uprn: str) -> None: + bucket = "retrofit-energy-assessments-dev" + + base_path = f"documents/uprn/{uprn}" + + uploaded_files: List[UploadedFile] = [] + + for file_path in job_files: + filename = os.path.basename(file_path) + file_key = f"{base_path}/{filename}" + + upload_file_to_s3(file_path, bucket, file_key) + + # load row to db + uploaded_files.append( + UploadedFile( + s3_file_bucket=bucket, + s3_file_key=file_key, + s3_upload_timestamp=datetime.now(timezone.utc), + uprn=int(uprn), + file_source=FileSourceEnum.PAS_HUB.value, + file_type=infer_file_type(filename), + ) + ) + + with db_session() as session: + session.add_all(uploaded_files) + session.commit() + + pass + + +def process_job( + job: Job, + pashub_client: PashubClient, + sharepoint_client: DomnaSharepointClient, + base_path: str, +) -> List[str]: + job_id = job["id"] + + uprn: Optional[str] = pashub_client.get_uprn_by_job_id(job_id) + if uprn: + logger.info(f"Got UPRN {uprn} for job {job_id}") + else: + logger.info(f"No UPRN found for job {job_id}") + + job_files: List[str] = pashub_client.get_core_evidence_files_by_job_id(job_id) + + if uprn: + logger.info("Uploading files to s3") + upload_job_to_s3_and_update_db(job_files, uprn) + + upload_job_to_sharepoint(sharepoint_client, base_path, job, job_files) + + return job_files + + def handler(event: Mapping[str, Any], context: Any) -> None: + settings = get_settings() + BASE_DIR = os.path.dirname(os.path.dirname(__file__)) - filepath = os.path.join(BASE_DIR, "Watford_Warm_Homes_Wave_3_RA Downloads .xlsx") + # filepath = os.path.join(BASE_DIR, "Watford_Warm_Homes_Wave_3_RA Downloads .xlsx") + filepath = os.path.join( + BASE_DIR, + "The_Guinness_Partnership_AtkinsR_alis_Coordination_Design_Board_1774881298.xlsx", + ) jobs: List[Job] = extract_jobs(filepath) - logger.info("Successfully loaded jobs from spreadsheet") - pas_hub_email = "random@test.com" - pas_hub_password = "my_fake_password" + pas_hub_email = settings.PASHUB_EMAIL + pas_hub_password = settings.PASHUB_PASSWORD - try: - token: str = get_token_from_local_storage(pas_hub_email, pas_hub_password) - logger.info(f"Token extracted successfully") - except: - logger.error("Error getting auth token from Pas Hub") - raise + if (not pas_hub_email) or (not pas_hub_password): + raise ValueError("Pas Hub credentials not provided") + + pashub_client = get_pashub_client(pas_hub_email, pas_hub_password) - pashub_client = PashubClient(token=token) sharepoint_client = DomnaSharepointClient( sharepoint_location=DomnaSites.SOCIAL_HOUSING_WAVE_3 ) + BASE_PATH = "/Osmosis-ACD Projects/Watford Warm Homes/Watford Property Folders (Shared with Client)" + saved_file_paths: List[str] = [] - BASE_PATH = "/Osmosis-ACD Projects/Watford Warm Homes/Watford Property Folders (Shared with Client)" # TODO: get from request body for job in jobs: try: - job_files: List[str] = pashub_client.get_core_evidence_files_by_job_id( - job["id"] + files = process_job( + job, + pashub_client, + sharepoint_client, + BASE_PATH, ) - - # Upload files to sharepoint - job_path = f"{BASE_PATH}/{job['address']}" - - sharepoint_client.makedir(job["address"], BASE_PATH) - - for folder in SharepointSubfolders: - sharepoint_client.makedir(folder.value, job_path) - - assessment_path = f"{job_path}/{SharepointSubfolders.ASSESSMENT.value}" - - for file_path in job_files: - filename = file_path.split("/")[-1] - - sharepoint_client.upload_file( - file_path, - assessment_path, - filename, - ) - - saved_file_paths.extend(job_files) + saved_file_paths.extend(files) except UnauthorizedError: logger.warning("Token expired - refreshing") - token = get_token_from_local_storage(pas_hub_email, pas_hub_password) - - pashub_client = PashubClient(token=token) - - # retry once - saved_file_paths.extend( - pashub_client.get_core_evidence_files_by_job_id(job["id"]) + pashub_client = get_pashub_client( + pas_hub_email, + pas_hub_password, ) - print(f"saved {len(saved_file_paths)} files") + # retry once + files = process_job( + job, + pashub_client, + sharepoint_client, + BASE_PATH, + ) + saved_file_paths.extend(files) + + logger.info(f"Saved {len(saved_file_paths)} files") if __name__ == "__main__": event = {"Records": [{"body": "{}"}]} - handler(event, None) diff --git a/backend/pashub_fetcher/handler/test_handler.py b/backend/pashub_fetcher/handler/test_handler.py index 996835a2..07f651f4 100644 --- a/backend/pashub_fetcher/handler/test_handler.py +++ b/backend/pashub_fetcher/handler/test_handler.py @@ -1,7 +1,23 @@ from typing import Any, Mapping import json +from utils.logger import setup_logger + +logger = setup_logger() + def handler(event: Mapping[str, Any], context: Any) -> None: - print("Received event:") - print(json.dumps(event, indent=2)) + logger.info("Received event:") + + for record in event.get("Records", []): + body_str = record.get("body", "") + + try: + body_obj = json.loads(body_str) + except json.JSONDecodeError: + logger.error("Failed to parse body as JSON") + logger.info(body_str) + continue + + logger.info("Parsed message body:") + logger.info(json.dumps(body_obj, indent=2)) diff --git a/backend/pashub_fetcher/pashub_client.py b/backend/pashub_fetcher/pashub_client.py index efc21803..20b8590d 100644 --- a/backend/pashub_fetcher/pashub_client.py +++ b/backend/pashub_fetcher/pashub_client.py @@ -71,6 +71,21 @@ class PashubClient: return saved_files + def get_uprn_by_job_id(self, job_id: str) -> Optional[str]: + logger.info(f"Getting UPRN for job ID {job_id}") + url = f"{self.base}/jobs/{job_id}" + + r = self.session.get(url) + if r.status_code == 401: + raise UnauthorizedError("Token expired or invalid") + + r.raise_for_status() + + try: + return r.json()["uprn"] + except Exception: + return None + def _get_core_file_type(self, file: EvidenceFileData) -> Optional[CoreFiles]: for core_file in CoreFiles: if file.file_name.startswith(core_file.value): diff --git a/backend/pashub_fetcher/pashub_to_ara_trigger_request.py b/backend/pashub_fetcher/pashub_to_ara_trigger_request.py new file mode 100644 index 00000000..2e4f8380 --- /dev/null +++ b/backend/pashub_fetcher/pashub_to_ara_trigger_request.py @@ -0,0 +1,18 @@ +from typing import Optional +from pydantic import BaseModel + + +class PashubToAraTriggerRequest(BaseModel): + pashub_link: ( + str # e.g. https://pashub.net/jobs/12345-abcd-1234-abcd-12345abcde/details + ) + + address: Optional[str] = None + sharepoint_link: Optional[str] = None + uprn: Optional[str] = None + landlord_property_id: Optional[str] = None + deal_stage: Optional[str] = None + + @property + def pashub_job_id(self) -> str: + return self.pashub_link.split("/")[-2] diff --git a/backend/tests/test_rebaselining_pipeline.py b/backend/tests/test_rebaselining_pipeline.py index 9fbe1f35..a0c62f34 100644 --- a/backend/tests/test_rebaselining_pipeline.py +++ b/backend/tests/test_rebaselining_pipeline.py @@ -2,6 +2,10 @@ import os import pickle import pandas as pd import pytest +from datetime import datetime +from backend.ml_models.api import ModelApi +from backend.app.utils import sap_to_epc +from backend.app.config import get_prediction_buckets def load_sample_certificates(): @@ -60,12 +64,6 @@ def load_cleaning_data(): @pytest.mark.integration def test_rebaselining_pipeline_with_real_data(): - import pandas as pd - from datetime import datetime - from backend.ml_models.api import ModelApi - from backend.app.utils import sap_to_epc - from backend.app.config import get_prediction_buckets - df = load_sample_certificates() cleaning_data = load_cleaning_data() input_properties = [make_property_from_row(row, cleaning_data=cleaning_data) for _, row in df.iterrows()] diff --git a/recommendations/Costs.py b/recommendations/Costs.py index f2d43339..cb67a90f 100644 --- a/recommendations/Costs.py +++ b/recommendations/Costs.py @@ -209,7 +209,7 @@ class Costs: # Try and get the region after converting the keys to lower self.region = { k.lower(): v for k, v in county_to_region_map.items() - }.get(self.property.property.epc_record.local_authority_label.lower(), None) + }.get(self.property.epc_record.local_authority_label.lower(), None) if self.region is None: logger.warning("No region found for county %s, defaulting to South East England", diff --git a/utils/s3.py b/utils/s3.py index 6aa3f44e..242e0db5 100644 --- a/utils/s3.py +++ b/utils/s3.py @@ -472,3 +472,14 @@ def list_xmls_in_s3_folder(bucket_name, folder_name): f"Failed to list XML files in folder {folder_name} in bucket {bucket_name}: {str(e)}" ) return [] + + +def upload_file_to_s3(file_path: str, bucket_name: str, file_key: str) -> None: + try: + s3 = boto3.resource("s3") + bucket = s3.Bucket(bucket_name) + bucket.upload_file(file_path, file_key) + logger.info(f"Uploaded {file_path} to s3://{bucket_name}/{file_key}") + except Exception as e: + logger.error(f"Failed to upload {file_path} to S3: {e}") + raise