mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
resolve conflict form dan and me
This commit is contained in:
commit
b9df10eb23
13 changed files with 305 additions and 64 deletions
|
|
@ -66,6 +66,18 @@ class Settings(BaseSettings):
|
|||
ORDNANCE_SURVEY_API_KEY: str = "changeme"
|
||||
|
||||
HUBSPOT_API_KEY: Optional[str] = None
|
||||
# Sharepoint
|
||||
SHAREPOINT_CLIENT_ID: Optional[str] = None
|
||||
SHAREPOINT_CLIENT_SECRET: Optional[str] = None
|
||||
SHAREPOINT_TENANT_ID: Optional[str] = None
|
||||
DOMNA_SHAREPOINT_ID: Optional[str] = None
|
||||
OSMOSIS_ACD_SHAREPOINT_ID: Optional[str] = None
|
||||
PRIVATE_PAY_SHAREPOINT_ID: Optional[str] = None
|
||||
SOCIAL_HOUSING_WAVE_3_SHAREPOINT_ID: Optional[str] = None
|
||||
|
||||
# Pas Hub
|
||||
PASHUB_EMAIL: Optional[str] = None
|
||||
PASHUB_PASSWORD: Optional[str] = None
|
||||
|
||||
# Optional AWS creds (only required in local)
|
||||
AWS_ACCESS_KEY_ID: Optional[str] = None
|
||||
|
|
@ -91,7 +103,6 @@ def get_prediction_buckets():
|
|||
"carbon_change_predictions": get_settings().CARBON_PREDICTIONS_BUCKET,
|
||||
"heating_kwh_predictions": get_settings().HEATING_KWH_PREDICTIONS_BUCKET,
|
||||
"hotwater_kwh_predictions": get_settings().HOTWATER_KWH_PREDICTIONS_BUCKET,
|
||||
|
||||
# Score model - SAP re-baselining model
|
||||
"retrofit_sap_baseline_predictions": get_settings().SAP_BASELINE_PREDICTIONS_BUCKET,
|
||||
"retrofit_carbon_baseline_predictions": get_settings().CARBON_BASELINE_PREDICTIONS_BUCKET,
|
||||
|
|
|
|||
44
backend/app/db/models/uploaded_file.py
Normal file
44
backend/app/db/models/uploaded_file.py
Normal file
|
|
@ -0,0 +1,44 @@
|
|||
import enum
|
||||
from sqlalchemy import TIMESTAMP, BigInteger, Column, Text, Enum as SqlEnum
|
||||
|
||||
from backend.app.db.base import Base
|
||||
|
||||
|
||||
class FileTypeEnum(enum.Enum):
|
||||
PHOTO_PACK = "photo_pack"
|
||||
SITE_NOTE = "site_note"
|
||||
RD_SAP_SITE_NOTE = "rd_sap_site_note"
|
||||
PAS_2023_VENTILATION = "pas_2023_ventilation"
|
||||
PAS_2023_CONDITION = "pas_2023_condition"
|
||||
PAS_SIGNIFICANCE = "pas_significance"
|
||||
PAR_PHOTO_PACK = "par_photo_pack"
|
||||
PAS_2023_PROPERTY = "pas_2023_property"
|
||||
PAS_2023_OCCUPANCY = "pas_2023_occupancy"
|
||||
|
||||
|
||||
class FileSourceEnum(enum.Enum):
|
||||
PAS_HUB = "pas hub"
|
||||
SHAREPOINT = "sharepoint"
|
||||
HUBSPOT = "hubspot"
|
||||
|
||||
|
||||
class UploadedFile(Base):
|
||||
__tablename__ = "uploaded_files"
|
||||
|
||||
id = Column(BigInteger, primary_key=True, autoincrement=True)
|
||||
|
||||
s3_file_bucket = Column(Text, nullable=False)
|
||||
s3_file_key = Column(Text, nullable=False)
|
||||
s3_upload_timestamp = Column(TIMESTAMP(timezone=True), nullable=False)
|
||||
|
||||
landlord_property_id = Column(Text, nullable=True)
|
||||
uprn = Column(BigInteger, nullable=True)
|
||||
hubspot_listing_id = Column(BigInteger, nullable=True)
|
||||
|
||||
file_type = Column(
|
||||
SqlEnum(FileTypeEnum, name="file_type", create_type=False), nullable=True
|
||||
)
|
||||
|
||||
file_source = Column(
|
||||
SqlEnum(FileSourceEnum, name="file_source", create_type=False), nullable=True
|
||||
)
|
||||
|
|
@ -174,6 +174,8 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest):
|
|||
service="plan_engine",
|
||||
inputs=data,
|
||||
task_only=True,
|
||||
source=SourceEnum.PORTFOLIO,
|
||||
source_id=str(body.portfolio_id),
|
||||
)
|
||||
|
||||
subtask_interface = SubTaskInterface()
|
||||
|
|
@ -222,6 +224,8 @@ async def trigger_plan_entrypoint(body: PlanTriggerRequest):
|
|||
service="plan_engine",
|
||||
inputs=data,
|
||||
task_only=False,
|
||||
source=SourceEnum.PORTFOLIO,
|
||||
source_id=str(body.portfolio_id),
|
||||
)
|
||||
data["task_id"] = str(task_id)
|
||||
data["subtask_id"] = str(subtask_id)
|
||||
|
|
|
|||
|
|
@ -134,10 +134,18 @@ def handler(event: Mapping[str, Any], context: Optional[Any]) -> Mapping[str, Un
|
|||
body_dict = {
|
||||
"task_id": "test",
|
||||
"subtask_id": "test",
|
||||
"portfolio_id": 569,
|
||||
"portfolio_id": 655,
|
||||
"scenario_ids": [],
|
||||
"default_plans_only": True,
|
||||
}
|
||||
|
||||
body_dict = {
|
||||
"task_id": "test",
|
||||
"subtask_id": "test",
|
||||
"portfolio_id": 655,
|
||||
"scenario_ids": [1174],
|
||||
"default_plans_only": False,
|
||||
}
|
||||
:param event: Lambda event containing export request details
|
||||
:param context: Lambda context (not used in this handler but included for completeness)
|
||||
:return: HTTP response indicating success or failure of the export operation
|
||||
|
|
|
|||
Binary file not shown.
|
|
@ -1,4 +1,7 @@
|
|||
from enum import Enum
|
||||
from typing import Optional
|
||||
|
||||
from backend.app.db.models.uploaded_file import FileTypeEnum
|
||||
|
||||
|
||||
class CoreFiles(Enum):
|
||||
|
|
@ -11,3 +14,23 @@ class CoreFiles(Enum):
|
|||
PAR_PHOTOPACK = "PAR Photo Pack"
|
||||
PAS2023_PROPERTY = "PAS 2023 Property Assessment Report"
|
||||
PAS2023_OCCUPANCY = "PAS 2023 Occupancy Assessment Report"
|
||||
|
||||
|
||||
CORE_TO_FILETYPE_MAP = {
|
||||
CoreFiles.PHOTOPACK: FileTypeEnum.PHOTO_PACK.value,
|
||||
CoreFiles.SITENOTE: FileTypeEnum.SITE_NOTE.value,
|
||||
CoreFiles.RDSAP_SITENOTE: FileTypeEnum.RD_SAP_SITE_NOTE.value,
|
||||
CoreFiles.PAS2023_VENTILATION: FileTypeEnum.PAS_2023_VENTILATION.value,
|
||||
CoreFiles.PAS2023_CONDITION: FileTypeEnum.PAS_2023_CONDITION.value,
|
||||
CoreFiles.PAS_SIGNIFICANCE: FileTypeEnum.PAS_SIGNIFICANCE.value,
|
||||
CoreFiles.PAR_PHOTOPACK: FileTypeEnum.PAR_PHOTO_PACK.value,
|
||||
CoreFiles.PAS2023_PROPERTY: FileTypeEnum.PAS_2023_PROPERTY.value,
|
||||
CoreFiles.PAS2023_OCCUPANCY: FileTypeEnum.PAS_2023_OCCUPANCY.value,
|
||||
}
|
||||
|
||||
|
||||
def infer_file_type(filename: str) -> Optional[str]:
|
||||
for core_file, file_type in CORE_TO_FILETYPE_MAP.items():
|
||||
if filename.startswith(core_file.value):
|
||||
return file_type
|
||||
return None
|
||||
|
|
|
|||
|
|
@ -1,13 +1,22 @@
|
|||
from datetime import datetime, timezone
|
||||
import os
|
||||
import re
|
||||
from typing import Any, Dict, List, Mapping
|
||||
from typing import Any, Dict, List, Mapping, Optional
|
||||
from openpyxl import load_workbook
|
||||
|
||||
from backend.app.config import get_settings
|
||||
from backend.app.db.connection import db_session
|
||||
from backend.app.db.models.uploaded_file import (
|
||||
FileSourceEnum,
|
||||
UploadedFile,
|
||||
)
|
||||
from backend.pashub_fetcher.core_files import infer_file_type
|
||||
from backend.pashub_fetcher.job import Job
|
||||
from backend.pashub_fetcher.pashub_client import PashubClient, UnauthorizedError
|
||||
from backend.pashub_fetcher.sharepoint_subfolders import SharepointSubfolders
|
||||
from backend.pashub_fetcher.token_getter import get_token_from_local_storage
|
||||
from utils.logger import setup_logger
|
||||
from utils.s3 import upload_file_to_s3
|
||||
from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient
|
||||
from utils.sharepoint.domna_sites import DomnaSites
|
||||
|
||||
|
|
@ -17,7 +26,8 @@ logger = setup_logger()
|
|||
|
||||
def extract_jobs(filepath: str) -> List[Job]:
|
||||
wb = load_workbook(filepath, data_only=True)
|
||||
ws = wb["watford warm homes (wave 3) mai"]
|
||||
# ws = wb["watford warm homes (wave 3) mai"]
|
||||
ws = wb["filtered"]
|
||||
|
||||
HEADER_ROW = 3
|
||||
|
||||
|
|
@ -28,7 +38,8 @@ def extract_jobs(filepath: str) -> List[Job]:
|
|||
headers[value.strip()] = col
|
||||
|
||||
name_col = headers["Name"]
|
||||
link_col = headers["Pashub Link"]
|
||||
# link_col = headers["Pashub Link"]
|
||||
link_col = headers["PasHub Link"]
|
||||
|
||||
jobs: List[Job] = []
|
||||
|
||||
|
|
@ -39,88 +50,170 @@ def extract_jobs(filepath: str) -> List[Job]:
|
|||
if not name or not link:
|
||||
continue
|
||||
|
||||
link = str(link)
|
||||
|
||||
match = re.search(r"/jobs/([0-9a-fA-F\-]+)/", link)
|
||||
match = re.search(r"/jobs/([0-9a-fA-F\-]+)/", str(link))
|
||||
if not match:
|
||||
continue
|
||||
|
||||
job_id = match.group(1)
|
||||
|
||||
jobs.append({"id": job_id, "address": str(name)})
|
||||
jobs.append(
|
||||
{
|
||||
"id": match.group(1),
|
||||
"address": str(name),
|
||||
}
|
||||
)
|
||||
|
||||
return jobs
|
||||
|
||||
|
||||
def get_pashub_client(email: str, password: str) -> PashubClient:
|
||||
token = get_token_from_local_storage(email, password)
|
||||
logger.info("Token extracted successfully")
|
||||
return PashubClient(token=token)
|
||||
|
||||
|
||||
def upload_job_to_sharepoint(
|
||||
sharepoint_client: DomnaSharepointClient,
|
||||
base_path: str,
|
||||
job: Job,
|
||||
job_files: List[str],
|
||||
) -> None:
|
||||
job_path = f"{base_path}/{job['address']}"
|
||||
|
||||
# Create main job folder
|
||||
sharepoint_client.makedir(job["address"], base_path)
|
||||
|
||||
# Create subfolders
|
||||
for folder in SharepointSubfolders:
|
||||
sharepoint_client.makedir(folder.value, job_path)
|
||||
|
||||
# Upload into assessment folder
|
||||
assessment_path = f"{job_path}/{SharepointSubfolders.ASSESSMENT.value}"
|
||||
|
||||
for file_path in job_files:
|
||||
filename = file_path.split("/")[-1]
|
||||
|
||||
sharepoint_client.upload_file(
|
||||
file_path,
|
||||
assessment_path,
|
||||
filename,
|
||||
)
|
||||
|
||||
|
||||
def upload_job_to_s3_and_update_db(job_files: List[str], uprn: str) -> None:
|
||||
bucket = "retrofit-energy-assessments-dev"
|
||||
|
||||
base_path = f"documents/uprn/{uprn}"
|
||||
|
||||
uploaded_files: List[UploadedFile] = []
|
||||
|
||||
for file_path in job_files:
|
||||
filename = os.path.basename(file_path)
|
||||
file_key = f"{base_path}/{filename}"
|
||||
|
||||
upload_file_to_s3(file_path, bucket, file_key)
|
||||
|
||||
# load row to db
|
||||
uploaded_files.append(
|
||||
UploadedFile(
|
||||
s3_file_bucket=bucket,
|
||||
s3_file_key=file_key,
|
||||
s3_upload_timestamp=datetime.now(timezone.utc),
|
||||
uprn=int(uprn),
|
||||
file_source=FileSourceEnum.PAS_HUB.value,
|
||||
file_type=infer_file_type(filename),
|
||||
)
|
||||
)
|
||||
|
||||
with db_session() as session:
|
||||
session.add_all(uploaded_files)
|
||||
session.commit()
|
||||
|
||||
pass
|
||||
|
||||
|
||||
def process_job(
|
||||
job: Job,
|
||||
pashub_client: PashubClient,
|
||||
sharepoint_client: DomnaSharepointClient,
|
||||
base_path: str,
|
||||
) -> List[str]:
|
||||
job_id = job["id"]
|
||||
|
||||
uprn: Optional[str] = pashub_client.get_uprn_by_job_id(job_id)
|
||||
if uprn:
|
||||
logger.info(f"Got UPRN {uprn} for job {job_id}")
|
||||
else:
|
||||
logger.info(f"No UPRN found for job {job_id}")
|
||||
|
||||
job_files: List[str] = pashub_client.get_core_evidence_files_by_job_id(job_id)
|
||||
|
||||
if uprn:
|
||||
logger.info("Uploading files to s3")
|
||||
upload_job_to_s3_and_update_db(job_files, uprn)
|
||||
|
||||
upload_job_to_sharepoint(sharepoint_client, base_path, job, job_files)
|
||||
|
||||
return job_files
|
||||
|
||||
|
||||
def handler(event: Mapping[str, Any], context: Any) -> None:
|
||||
settings = get_settings()
|
||||
|
||||
BASE_DIR = os.path.dirname(os.path.dirname(__file__))
|
||||
filepath = os.path.join(BASE_DIR, "Watford_Warm_Homes_Wave_3_RA Downloads .xlsx")
|
||||
# filepath = os.path.join(BASE_DIR, "Watford_Warm_Homes_Wave_3_RA Downloads .xlsx")
|
||||
filepath = os.path.join(
|
||||
BASE_DIR,
|
||||
"The_Guinness_Partnership_AtkinsR_alis_Coordination_Design_Board_1774881298.xlsx",
|
||||
)
|
||||
|
||||
jobs: List[Job] = extract_jobs(filepath)
|
||||
|
||||
logger.info("Successfully loaded jobs from spreadsheet")
|
||||
|
||||
pas_hub_email = "random@test.com"
|
||||
pas_hub_password = "my_fake_password"
|
||||
pas_hub_email = settings.PASHUB_EMAIL
|
||||
pas_hub_password = settings.PASHUB_PASSWORD
|
||||
|
||||
try:
|
||||
token: str = get_token_from_local_storage(pas_hub_email, pas_hub_password)
|
||||
logger.info(f"Token extracted successfully")
|
||||
except:
|
||||
logger.error("Error getting auth token from Pas Hub")
|
||||
raise
|
||||
if (not pas_hub_email) or (not pas_hub_password):
|
||||
raise ValueError("Pas Hub credentials not provided")
|
||||
|
||||
pashub_client = get_pashub_client(pas_hub_email, pas_hub_password)
|
||||
|
||||
pashub_client = PashubClient(token=token)
|
||||
sharepoint_client = DomnaSharepointClient(
|
||||
sharepoint_location=DomnaSites.SOCIAL_HOUSING_WAVE_3
|
||||
)
|
||||
|
||||
BASE_PATH = "/Osmosis-ACD Projects/Watford Warm Homes/Watford Property Folders (Shared with Client)"
|
||||
|
||||
saved_file_paths: List[str] = []
|
||||
BASE_PATH = "/Osmosis-ACD Projects/Watford Warm Homes/Watford Property Folders (Shared with Client)" # TODO: get from request body
|
||||
|
||||
for job in jobs:
|
||||
try:
|
||||
job_files: List[str] = pashub_client.get_core_evidence_files_by_job_id(
|
||||
job["id"]
|
||||
files = process_job(
|
||||
job,
|
||||
pashub_client,
|
||||
sharepoint_client,
|
||||
BASE_PATH,
|
||||
)
|
||||
|
||||
# Upload files to sharepoint
|
||||
job_path = f"{BASE_PATH}/{job['address']}"
|
||||
|
||||
sharepoint_client.makedir(job["address"], BASE_PATH)
|
||||
|
||||
for folder in SharepointSubfolders:
|
||||
sharepoint_client.makedir(folder.value, job_path)
|
||||
|
||||
assessment_path = f"{job_path}/{SharepointSubfolders.ASSESSMENT.value}"
|
||||
|
||||
for file_path in job_files:
|
||||
filename = file_path.split("/")[-1]
|
||||
|
||||
sharepoint_client.upload_file(
|
||||
file_path,
|
||||
assessment_path,
|
||||
filename,
|
||||
)
|
||||
|
||||
saved_file_paths.extend(job_files)
|
||||
saved_file_paths.extend(files)
|
||||
|
||||
except UnauthorizedError:
|
||||
logger.warning("Token expired - refreshing")
|
||||
|
||||
token = get_token_from_local_storage(pas_hub_email, pas_hub_password)
|
||||
|
||||
pashub_client = PashubClient(token=token)
|
||||
|
||||
# retry once
|
||||
saved_file_paths.extend(
|
||||
pashub_client.get_core_evidence_files_by_job_id(job["id"])
|
||||
pashub_client = get_pashub_client(
|
||||
pas_hub_email,
|
||||
pas_hub_password,
|
||||
)
|
||||
|
||||
print(f"saved {len(saved_file_paths)} files")
|
||||
# retry once
|
||||
files = process_job(
|
||||
job,
|
||||
pashub_client,
|
||||
sharepoint_client,
|
||||
BASE_PATH,
|
||||
)
|
||||
saved_file_paths.extend(files)
|
||||
|
||||
logger.info(f"Saved {len(saved_file_paths)} files")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
event = {"Records": [{"body": "{}"}]}
|
||||
|
||||
handler(event, None)
|
||||
|
|
|
|||
|
|
@ -1,7 +1,23 @@
|
|||
from typing import Any, Mapping
|
||||
import json
|
||||
|
||||
from utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
def handler(event: Mapping[str, Any], context: Any) -> None:
|
||||
print("Received event:")
|
||||
print(json.dumps(event, indent=2))
|
||||
logger.info("Received event:")
|
||||
|
||||
for record in event.get("Records", []):
|
||||
body_str = record.get("body", "")
|
||||
|
||||
try:
|
||||
body_obj = json.loads(body_str)
|
||||
except json.JSONDecodeError:
|
||||
logger.error("Failed to parse body as JSON")
|
||||
logger.info(body_str)
|
||||
continue
|
||||
|
||||
logger.info("Parsed message body:")
|
||||
logger.info(json.dumps(body_obj, indent=2))
|
||||
|
|
|
|||
|
|
@ -71,6 +71,21 @@ class PashubClient:
|
|||
|
||||
return saved_files
|
||||
|
||||
def get_uprn_by_job_id(self, job_id: str) -> Optional[str]:
|
||||
logger.info(f"Getting UPRN for job ID {job_id}")
|
||||
url = f"{self.base}/jobs/{job_id}"
|
||||
|
||||
r = self.session.get(url)
|
||||
if r.status_code == 401:
|
||||
raise UnauthorizedError("Token expired or invalid")
|
||||
|
||||
r.raise_for_status()
|
||||
|
||||
try:
|
||||
return r.json()["uprn"]
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
def _get_core_file_type(self, file: EvidenceFileData) -> Optional[CoreFiles]:
|
||||
for core_file in CoreFiles:
|
||||
if file.file_name.startswith(core_file.value):
|
||||
|
|
|
|||
18
backend/pashub_fetcher/pashub_to_ara_trigger_request.py
Normal file
18
backend/pashub_fetcher/pashub_to_ara_trigger_request.py
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
from typing import Optional
|
||||
from pydantic import BaseModel
|
||||
|
||||
|
||||
class PashubToAraTriggerRequest(BaseModel):
|
||||
pashub_link: (
|
||||
str # e.g. https://pashub.net/jobs/12345-abcd-1234-abcd-12345abcde/details
|
||||
)
|
||||
|
||||
address: Optional[str] = None
|
||||
sharepoint_link: Optional[str] = None
|
||||
uprn: Optional[str] = None
|
||||
landlord_property_id: Optional[str] = None
|
||||
deal_stage: Optional[str] = None
|
||||
|
||||
@property
|
||||
def pashub_job_id(self) -> str:
|
||||
return self.pashub_link.split("/")[-2]
|
||||
|
|
@ -2,6 +2,10 @@ import os
|
|||
import pickle
|
||||
import pandas as pd
|
||||
import pytest
|
||||
from datetime import datetime
|
||||
from backend.ml_models.api import ModelApi
|
||||
from backend.app.utils import sap_to_epc
|
||||
from backend.app.config import get_prediction_buckets
|
||||
|
||||
|
||||
def load_sample_certificates():
|
||||
|
|
@ -60,12 +64,6 @@ def load_cleaning_data():
|
|||
|
||||
@pytest.mark.integration
|
||||
def test_rebaselining_pipeline_with_real_data():
|
||||
import pandas as pd
|
||||
from datetime import datetime
|
||||
from backend.ml_models.api import ModelApi
|
||||
from backend.app.utils import sap_to_epc
|
||||
from backend.app.config import get_prediction_buckets
|
||||
|
||||
df = load_sample_certificates()
|
||||
cleaning_data = load_cleaning_data()
|
||||
input_properties = [make_property_from_row(row, cleaning_data=cleaning_data) for _, row in df.iterrows()]
|
||||
|
|
|
|||
|
|
@ -209,7 +209,7 @@ class Costs:
|
|||
# Try and get the region after converting the keys to lower
|
||||
self.region = {
|
||||
k.lower(): v for k, v in county_to_region_map.items()
|
||||
}.get(self.property.property.epc_record.local_authority_label.lower(), None)
|
||||
}.get(self.property.epc_record.local_authority_label.lower(), None)
|
||||
|
||||
if self.region is None:
|
||||
logger.warning("No region found for county %s, defaulting to South East England",
|
||||
|
|
|
|||
11
utils/s3.py
11
utils/s3.py
|
|
@ -472,3 +472,14 @@ def list_xmls_in_s3_folder(bucket_name, folder_name):
|
|||
f"Failed to list XML files in folder {folder_name} in bucket {bucket_name}: {str(e)}"
|
||||
)
|
||||
return []
|
||||
|
||||
|
||||
def upload_file_to_s3(file_path: str, bucket_name: str, file_key: str) -> None:
|
||||
try:
|
||||
s3 = boto3.resource("s3")
|
||||
bucket = s3.Bucket(bucket_name)
|
||||
bucket.upload_file(file_path, file_key)
|
||||
logger.info(f"Uploaded {file_path} to s3://{bucket_name}/{file_key}")
|
||||
except Exception as e:
|
||||
logger.error(f"Failed to upload {file_path} to S3: {e}")
|
||||
raise
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue