Merge branch 'main' into feature/handle-new-magicplan-response-structure

This commit is contained in:
Daniel Roth 2026-06-04 10:52:32 +00:00
commit df0c8f69b1
19 changed files with 835 additions and 166 deletions

View file

@ -76,6 +76,8 @@ on:
required: false
TF_VAR_social_housing_wave_3_sharepoint_id:
required: false
TF_VAR_eco_sharepoint_id:
required: false
TF_VAR_pashub_email:
required: false
TF_VAR_pashub_password:
@ -159,6 +161,7 @@ jobs:
TF_VAR_osmosis_acd_sharepoint_id: ${{ secrets.TF_VAR_osmosis_acd_sharepoint_id }}
TF_VAR_private_pay_sharepoint_id: ${{ secrets.TF_VAR_private_pay_sharepoint_id }}
TF_VAR_social_housing_wave_3_sharepoint_id: ${{ secrets.TF_VAR_social_housing_wave_3_sharepoint_id }}
TF_VAR_eco_sharepoint_id: ${{ secrets.TF_VAR_eco_sharepoint_id }}
TF_VAR_pashub_email: ${{ secrets.TF_VAR_pashub_email }}
TF_VAR_pashub_password: ${{ secrets.TF_VAR_pashub_password }}
TF_VAR_pashub_coordination_email: ${{ secrets.TF_VAR_pashub_coordination_email }}
@ -210,6 +213,7 @@ jobs:
TF_VAR_osmosis_acd_sharepoint_id: ${{ secrets.TF_VAR_osmosis_acd_sharepoint_id }}
TF_VAR_private_pay_sharepoint_id: ${{ secrets.TF_VAR_private_pay_sharepoint_id }}
TF_VAR_social_housing_wave_3_sharepoint_id: ${{ secrets.TF_VAR_social_housing_wave_3_sharepoint_id }}
TF_VAR_eco_sharepoint_id: ${{ secrets.TF_VAR_eco_sharepoint_id }}
TF_VAR_pashub_email: ${{ secrets.TF_VAR_pashub_email }}
TF_VAR_pashub_password: ${{ secrets.TF_VAR_pashub_password }}
TF_VAR_pashub_coordination_email: ${{ secrets.TF_VAR_pashub_coordination_email }}

View file

@ -448,6 +448,7 @@ jobs:
TF_VAR_osmosis_acd_sharepoint_id: ${{ secrets.OSMOSIS_ACD_SHAREPOINT_ID }}
TF_VAR_private_pay_sharepoint_id: ${{ secrets.PRIVATE_PAY_SHAREPOINT_ID }}
TF_VAR_social_housing_wave_3_sharepoint_id: ${{ secrets.SOCIAL_HOUSING_WAVE_3_SHAREPOINT_ID }}
TF_VAR_eco_sharepoint_id: ${{ secrets.ECO_SHAREPOINT_ID }}
TF_VAR_pashub_email: ${{ secrets.PASHUB_EMAIL }}
TF_VAR_pashub_password: ${{ secrets.PASHUB_PASSWORD }}
TF_VAR_pashub_coordination_email: ${{ secrets.PASHUB_COORDINATION_EMAIL }}

View file

@ -82,6 +82,7 @@ class Settings(BaseSettings):
OSMOSIS_ACD_SHAREPOINT_ID: Optional[str] = None
PRIVATE_PAY_SHAREPOINT_ID: Optional[str] = None
SOCIAL_HOUSING_WAVE_3_SHAREPOINT_ID: Optional[str] = None
ECO_SHAREPOINT_ID: Optional[str] = None
OPENAI_API_KEY: Optional[str] = None
# Pas Hub

View file

@ -21,6 +21,8 @@ class FileTypeEnum(enum.Enum):
IMPROVEMENT_OPTION_EVALUATION = "improvement_option_evaluation"
MEDIUM_TERM_IMPROVEMENT_PLAN = "medium_term_improvement_plan"
RETROFIT_DESIGN_DOC = "retrofit_design_doc"
MCS_COMPLIANCE_CERTIFICATE = "mcs_compliance_certificate"
OTHER = "other"
class FileSourceEnum(enum.Enum):

View file

@ -17,6 +17,7 @@ class CoreFiles(Enum):
IMPROVEMENT_OPTION_EVALUATION = "Improvement Option Evaluation"
MEDIUM_TERM_IMPROVEMENT_PLAN = "Medium Term Improvement Plan"
RETROFIT_DESIGN_DOC = "Retrofit Design Doc"
MCS_COMPLIANCE_CERTIFICATE = "MCS Compliance Certificate"
_CORE_FILE_TO_FILE_TYPE: dict[CoreFiles, str] = {
@ -32,14 +33,21 @@ _CORE_FILE_TO_FILE_TYPE: dict[CoreFiles, str] = {
CoreFiles.IMPROVEMENT_OPTION_EVALUATION: FileTypeEnum.IMPROVEMENT_OPTION_EVALUATION.value,
CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN: FileTypeEnum.MEDIUM_TERM_IMPROVEMENT_PLAN.value,
CoreFiles.RETROFIT_DESIGN_DOC: FileTypeEnum.RETROFIT_DESIGN_DOC.value,
CoreFiles.MCS_COMPLIANCE_CERTIFICATE: FileTypeEnum.MCS_COMPLIANCE_CERTIFICATE.value,
}
def get_core_file_type(
filename: str, evidence_category: Optional[str] = None
) -> Optional[CoreFiles]:
# Identify retrofit design doc using evidence category as the name is possibly unreliable.
# Identify MCS certificate and design doc using evidence category as the names are possibly unreliable.
# We might change to always use evidence category, but needs more investigation
if (
evidence_category is not None
and evidence_category.lower() == "mcs compliance certificate"
):
return CoreFiles.MCS_COMPLIANCE_CERTIFICATE
if evidence_category is not None and evidence_category.lower() == "retrofit design":
return CoreFiles.RETROFIT_DESIGN_DOC
@ -56,6 +64,7 @@ def get_core_file_type(
CoreFiles.RETROFIT_DESIGN_DOC,
CoreFiles.IMPROVEMENT_OPTION_EVALUATION,
CoreFiles.MEDIUM_TERM_IMPROVEMENT_PLAN,
CoreFiles.MCS_COMPLIANCE_CERTIFICATE,
}
for core_file in CoreFiles:
@ -68,8 +77,10 @@ def get_core_file_type(
return None
def get_file_type_string(filename: str) -> Optional[str]:
core_file: Optional[CoreFiles] = get_core_file_type(filename)
def get_file_type_string(
filename: str, evidence_category: Optional[str] = None
) -> Optional[str]:
core_file: Optional[CoreFiles] = get_core_file_type(filename, evidence_category)
if core_file is None:
return None

View file

@ -0,0 +1,63 @@
EVIDENCE_CATEGORIES = [
"Advice report",
"Air Tests - BGV",
"Air Tightness Strategy",
"Assessment report",
"Blue Site Notes (PAS Assessment)",
"Building Assessment report",
"Building Condition report",
"Building Regulations Sign-off",
"Claim of compliance PAS2030",
"Claim of compliance PAS2035",
"Commissioning checklist",
"Condition report",
"Contract / Invoice",
"Electrical Certificate",
"Energy report",
"Evidence of submission to CPS",
"Floor Plan",
"Full Property Assessment",
"Gas Appliance Benchmarking Certificate",
"Gas Appliance Commissioning Checklist",
"Gas Inspection Certificate",
"Handover and Commissioning Documents",
"Handover Documents",
"Handover documents for client",
"Heat Demand Calculations",
"Heritage Impact Assessment",
"Improvement option evaluation",
"Installation Guides",
"Insurance guarantee",
"Intended outcomes",
"MCS Compliance Certificate",
"Medium term improvement plan",
"Medium term low carbon plan",
"Mid Photo",
"Mid-Install Inspection",
"Minor Works Electrical Certificate",
"Monitoring and evaluation outcomes",
"Occupancy assessment",
"Other",
"Other commissioning certificates",
"Photo",
"Post Energy Performance Report (EPR)",
"Post installation RdSAP",
"Post Photo",
"Pre Energy Performance Report (EPR)",
"Pre installation RdSAP",
"Pre Photo",
"Pre-Design Building Survey",
"Pre-Installation Building Inspection",
"Product Data sheets",
"Product warranty",
"Property Assessment",
"Qualifications",
"Retrofit design",
"Risk assessment",
"Significance survey",
"Site Note (Green /Blue) and Certificate(s)",
"Ventilation Assessment",
"Ventilation Assessment Checklist",
"Ventilation Report",
"Welsh - Checklist",
]

View file

@ -40,10 +40,6 @@ def handler(body: Dict[str, Any], context: Any) -> List[str]:
if (not pashub_email) or (not pashub_password):
raise ValueError("Pas Hub credentials not provided")
sharepoint_client = DomnaSharepointClient(
sharepoint_location=DomnaSites.SOCIAL_HOUSING_WAVE_3
)
if coordination_hub_email and coordination_hub_password:
_coord_email, _coord_password = (
coordination_hub_email,
@ -57,6 +53,16 @@ def handler(body: Dict[str, Any], context: Any) -> List[str]:
payload = PashubToAraTriggerRequest.model_validate(body)
logger.debug("Successfully validated request body")
sharepoint_client: Optional[DomnaSharepointClient] = None
if payload.sharepoint_site is not None:
try:
resolved_site = DomnaSites[payload.sharepoint_site]
sharepoint_client = DomnaSharepointClient(sharepoint_location=resolved_site)
except KeyError:
logger.warning(
f"Unrecognised sharepoint_site '{payload.sharepoint_site}'; skipping SharePoint upload"
)
service = PashubService(
pashub_client=get_pashub_client(pashub_email, pashub_password),
sharepoint_client=sharepoint_client,

View file

@ -1,6 +1,6 @@
from collections import defaultdict
import os
from typing import Dict, List, Optional
from typing import Dict, List, NamedTuple, Optional
from datetime import datetime
import requests
@ -13,6 +13,22 @@ from utils.logger import setup_logger
logger = setup_logger()
class DownloadedFile(NamedTuple):
file_path: str
evidence_category: Optional[str]
created_utc: datetime
class _EvidenceFileGroups(NamedTuple):
core: Dict[CoreFiles, EvidenceFileData]
other: List[EvidenceFileData]
class DownloadedFiles(NamedTuple):
core: List[DownloadedFile]
other: List[DownloadedFile]
class UnauthorizedError(Exception):
pass
@ -33,42 +49,60 @@ class PashubClient:
)
logger.info("Finished initialising CotalityClient")
def get_core_evidence_files_by_job_id(self, job_id: str) -> List[str]:
logger.info(f"Getting Core Evidence Files for job ID {job_id}")
def get_evidence_files_by_job_id(
self, job_id: str, include_other: bool = False
) -> DownloadedFiles:
logger.info(f"Getting evidence files for job ID {job_id}")
evidence_list: List[EvidenceFileData] = self._get_evidence_list(job_id)
logger.info(f"Found {len(evidence_list)} Evidence files to get")
logger.info(f"Found {len(evidence_list)} evidence files")
if not evidence_list:
return []
return DownloadedFiles(core=[], other=[])
saved_files: List[str] = []
core_files: Dict[CoreFiles, EvidenceFileData] = self._select_latest_core_files(
grouped: _EvidenceFileGroups = self._group_into_core_and_other_files(
evidence_list
)
logger.info(f"Number of core files to download is {len(core_files)}")
for _, evidence in core_files.items():
evidence_id = evidence.file_id
if not evidence_id:
core_files: List[DownloadedFile] = []
for _, evidence in grouped.core.items():
if not evidence.file_id:
continue
logger.info(f"Getting metadata for file {evidence.file_name}")
metadata: EvidenceMetadata = self._get_evidence_metadata(
job_id, evidence_id
job_id, evidence.file_id
)
download_url: str = self._build_download_url(metadata, evidence.file_id)
output_dir: str = "/tmp"
file_name: str = evidence.file_name
file_path: str = os.path.join(output_dir, file_name)
file_path: str = os.path.join("/tmp", evidence.file_name)
self._download_file(download_url, file_path)
logger.info("Successfully downloaded file")
saved_files.append(file_path)
core_files.append(
DownloadedFile(
file_path=file_path,
evidence_category=evidence.evidence_category,
created_utc=datetime.fromisoformat(evidence.created_utc),
)
)
return saved_files
other_files: List[DownloadedFile] = []
if include_other:
for evidence in grouped.other:
if not evidence.file_id:
continue
metadata = self._get_evidence_metadata(job_id, evidence.file_id)
download_url = self._build_download_url(metadata, evidence.file_id)
file_path = os.path.join("/tmp", evidence.file_name)
self._download_file(download_url, file_path)
logger.info("Successfully downloaded other file")
other_files.append(
DownloadedFile(
file_path=file_path,
evidence_category=evidence.evidence_category,
created_utc=datetime.fromisoformat(evidence.created_utc),
)
)
return DownloadedFiles(core=core_files, other=other_files)
def get_uprn_by_job_id(self, job_id: str) -> Optional[str]:
logger.info(f"Getting UPRN for job ID {job_id}")
@ -92,30 +126,32 @@ class PashubClient:
)
return None
def _select_latest_core_files(
def _group_into_core_and_other_files(
self,
files: List[EvidenceFileData],
) -> Dict[CoreFiles, EvidenceFileData]:
) -> _EvidenceFileGroups:
grouped: Dict[CoreFiles, List[EvidenceFileData]] = defaultdict(list)
other: List[EvidenceFileData] = []
for file in files:
core_type: Optional[CoreFiles] = get_core_file_type(
file.file_name, file.evidence_category
)
if not core_type:
other.append(file)
continue
grouped[core_type].append(file)
latest_files: Dict[CoreFiles, EvidenceFileData] = {}
latest_core_files: Dict[CoreFiles, EvidenceFileData] = {}
for core_type, group in grouped.items():
if core_type == CoreFiles.RETROFIT_DESIGN_DOC and len(group) > 1:
osm_candidates = [f for f in group if "-OSM-" in f.file_name]
group = osm_candidates if osm_candidates else group
latest = max(group, key=lambda f: datetime.fromisoformat(f.created_utc))
latest_files[core_type] = latest
latest_core_files[core_type] = latest
return latest_files
return _EvidenceFileGroups(core=latest_core_files, other=other)
def _get_evidence_list(self, job_id: str) -> List[EvidenceFileData]:
url = f"{self.base}/jobs/{job_id}/evidence"

View file

@ -11,11 +11,15 @@ from backend.app.db.models.uploaded_file import (
from backend.documents_parser.db_writer import save_epc_property_data
from backend.documents_parser.parser import parse_site_notes_pdf
from backend.pashub_fetcher.core_files import get_file_type_string
from backend.pashub_fetcher.pashub_client import PashubClient, UnauthorizedError
from backend.pashub_fetcher.pashub_client import (
DownloadedFile,
DownloadedFiles,
PashubClient,
UnauthorizedError,
)
from backend.pashub_fetcher.pashub_to_ara_trigger_request import (
PashubToAraTriggerRequest,
)
from backend.pashub_fetcher.sharepoint_subfolders import SharepointSubfolders
from datatypes.epc.domain.epc_property_data import EpcPropertyData
from utils.logger import setup_logger
from utils.s3 import upload_file_to_s3
@ -34,7 +38,7 @@ class PashubService:
def __init__(
self,
pashub_client: PashubClient,
sharepoint_client: DomnaSharepointClient,
sharepoint_client: Optional[DomnaSharepointClient],
s3_bucket: str,
coordination_client_factory: Optional[Callable[[], PashubClient]] = None,
) -> None:
@ -75,14 +79,16 @@ class PashubService:
logger.info(f"No UPRN found for job {job_id}")
try:
job_files: List[str] = active_client.get_core_evidence_files_by_job_id(
job_id
downloaded: DownloadedFiles = active_client.get_evidence_files_by_job_id(
job_id, include_other=request.get_other_files
)
except UnauthorizedError:
if active_client is not self._pashub_client:
raise
active_client = self._get_coordination_client()
job_files = active_client.get_core_evidence_files_by_job_id(job_id)
downloaded: DownloadedFiles = active_client.get_evidence_files_by_job_id(
job_id, include_other=request.get_other_files
)
if uprn or hubspot_deal_id:
logger.info("Uploading files to s3")
@ -92,29 +98,47 @@ class PashubService:
else FileSourceEnum.COORDINATION_HUB
)
upload_records = self._upload_to_s3_and_update_db(
job_files, uprn, hubspot_deal_id, file_source
downloaded.core, uprn, hubspot_deal_id, file_source
)
self._save_site_notes(upload_records)
# SharePoint upload disabled: pashub sharepoint_link is inconsistent
# (points to property or project unpredictably)
# if request.sharepoint_link:
# self._upload_to_sharepoint(request.sharepoint_link, job_files)
if downloaded.other:
self._upload_to_s3_and_update_db(
downloaded.other,
uprn,
hubspot_deal_id,
file_source,
default_file_type=FileTypeEnum.OTHER.value,
)
for file_path in job_files:
if self._sharepoint_client and request.sharepoint_link and request.address:
folder_name = request.address.split("|")[0].strip()
folders = self._sharepoint_client.get_folders_in_path(request.sharepoint_link)
match = next(
(f["name"] for f in folders.get("value", []) if f["name"].lower() == folder_name.lower()),
None,
)
if match is None:
logger.warning(f"SharePoint folder not found for '{folder_name}' in {request.sharepoint_link}")
else:
property_folder_path = f"{request.sharepoint_link}/{match}"
self._upload_to_sharepoint(property_folder_path, downloaded.core + downloaded.other)
for df in downloaded.core + downloaded.other:
try:
os.remove(file_path)
os.remove(df.file_path)
except OSError:
logger.warning(f"Failed to delete temp file {file_path}")
logger.warning(f"Failed to delete temp file {df.file_path}")
return job_files
return [df.file_path for df in downloaded.core + downloaded.other]
def _upload_to_s3_and_update_db(
self,
job_files: List[str],
job_files: List[DownloadedFile],
uprn: Optional[str],
hubspot_deal_id: Optional[str],
file_source: FileSourceEnum,
default_file_type: Optional[str] = None,
) -> List[_FileUploadRecord]:
if not uprn and not hubspot_deal_id:
return []
@ -128,11 +152,11 @@ class PashubService:
file_paths: List[str] = []
uploaded_files: List[UploadedFile] = []
for file_path in job_files:
filename = os.path.basename(file_path)
for df in job_files:
filename = os.path.basename(df.file_path)
file_key = f"{base_path}/{filename}"
upload_file_to_s3(file_path, self._s3_bucket, file_key)
upload_file_to_s3(df.file_path, self._s3_bucket, file_key)
uploaded_file = UploadedFile(
s3_file_bucket=self._s3_bucket,
@ -141,9 +165,9 @@ class PashubService:
uprn=int(uprn) if uprn else None,
hubspot_deal_id=hubspot_deal_id,
file_source=file_source.value,
file_type=get_file_type_string(filename),
file_type=get_file_type_string(filename, df.evidence_category) or default_file_type,
)
file_paths.append(file_path)
file_paths.append(df.file_path)
uploaded_files.append(uploaded_file)
with db_session() as session:
@ -180,11 +204,13 @@ class PashubService:
def _upload_to_sharepoint(
self,
sharepoint_link: str,
job_files: List[str],
property_folder_path: str,
files: List[DownloadedFile],
) -> None:
assessment_path = f"{sharepoint_link}/{SharepointSubfolders.ASSESSMENT.value}"
for file_path in job_files:
filename = file_path.split("/")[-1]
self._sharepoint_client.upload_file(file_path, assessment_path, filename)
assert self._sharepoint_client is not None
for df in files:
filename = os.path.basename(df.file_path)
try:
self._sharepoint_client.upload_file(df.file_path, property_folder_path, filename)
except Exception:
logger.warning(f"Failed to upload {filename} to SharePoint", exc_info=True)

View file

@ -14,6 +14,10 @@ class PashubToAraTriggerRequest(BaseModel):
hubspot_listing_id: Optional[int] = None
hubspot_deal_id: Optional[str] = None
sharepoint_site: Optional[str] = None
get_other_files: bool = False
@property
def pashub_job_id(self) -> str:
match = re.search(r"/jobs/([^/]+)", self.pashub_link)

View file

@ -183,3 +183,44 @@ def test_core_file_for_osm_fallback_does_not_fire_when_evidence_category_present
# Assert
assert result is None
def test_core_file_for_mcs_compliance_certificate_returns_mcs_compliance_certificate() -> None:
# Arrange
filename = "MCS_cert_job123.pdf"
# Act
result = get_core_file_type(
filename, evidence_category="mcs compliance certificate"
)
# Assert
assert result == CoreFiles.MCS_COMPLIANCE_CERTIFICATE
def test_core_file_for_mcs_compliance_certificate_is_case_insensitive() -> None:
# Arrange
filename = "some_cert.pdf"
# Act
result = get_core_file_type(
filename, evidence_category="MCS Compliance Certificate"
)
# Assert
assert result == CoreFiles.MCS_COMPLIANCE_CERTIFICATE
def test_get_file_type_string_with_mcs_evidence_category_returns_mcs_compliance_certificate() -> (
None
):
# Arrange
filename = "some_cert.pdf"
# Act
result = get_file_type_string(
filename, evidence_category="MCS Compliance Certificate"
)
# Assert
assert result == "mcs_compliance_certificate"

View file

@ -1,9 +1,22 @@
# pyright: reportPrivateUsage=false
from typing import Optional
from unittest.mock import patch
from backend.pashub_fetcher.core_files import CoreFiles
from backend.pashub_fetcher.evidence_file_data import EvidenceFileData
from backend.pashub_fetcher.pashub_client import PashubClient
from backend.pashub_fetcher.evidence_metadata import EvidenceMetadata
from backend.pashub_fetcher.pashub_client import (
DownloadedFile,
DownloadedFiles,
PashubClient,
)
def make_metadata() -> EvidenceMetadata:
return EvidenceMetadata(
container_name="my-container",
blob_uri="https://storage.example.com/blob?sas=token",
)
def make_client() -> PashubClient:
@ -26,11 +39,27 @@ def make_file(
# ---------------------------------------------------------------------------
# _select_latest_core_files
# _group_into_core_and_other_files
# ---------------------------------------------------------------------------
def test_select_latest_core_files_returns_single_retrofit_design_doc() -> None:
def test_group_into_core_and_other_files_classifies_core_and_other_correctly() -> None:
# Arrange
client = make_client()
files = [
make_file(file_name="SiteNote_001.pdf"),
make_file(file_name="some_unknown_document.pdf"),
]
# Act
result = client._group_into_core_and_other_files(files)
# Assert
assert CoreFiles.SITENOTE in result.core
assert [f.file_name for f in result.other] == ["some_unknown_document.pdf"]
def test_group_into_core_and_other_files_returns_single_retrofit_design_doc() -> None:
# Arrange
client = make_client()
files = [
@ -42,13 +71,16 @@ def test_select_latest_core_files_returns_single_retrofit_design_doc() -> None:
]
# Act
result = client._select_latest_core_files(files)
result = client._group_into_core_and_other_files(files)
# Assert
assert result[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf"
assert (
result.core[CoreFiles.RETROFIT_DESIGN_DOC].file_name
== "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf"
)
def test_select_latest_core_files_osm_candidate_wins_over_non_osm() -> None:
def test_group_into_core_and_other_files_osm_candidate_wins_over_non_osm() -> None:
# Arrange - the non-OSM file is newer but should lose to the OSM file
client = make_client()
files = [
@ -65,13 +97,18 @@ def test_select_latest_core_files_osm_candidate_wins_over_non_osm() -> None:
]
# Act
result = client._select_latest_core_files(files)
result = client._group_into_core_and_other_files(files)
# Assert
assert result[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf"
assert (
result.core[CoreFiles.RETROFIT_DESIGN_DOC].file_name
== "2512-OSM-H21M900-XX-DR-N-A_Lord Nelson Street 018.pdf"
)
def test_select_latest_core_files_picks_latest_when_both_candidates_have_osm() -> None:
def test_group_into_core_and_other_files_picks_latest_when_both_candidates_have_osm() -> (
None
):
# Arrange
client = make_client()
files = [
@ -88,13 +125,62 @@ def test_select_latest_core_files_picks_latest_when_both_candidates_have_osm() -
]
# Act
result = client._select_latest_core_files(files)
result = client._group_into_core_and_other_files(files)
# Assert
assert result[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "2603-OSM-B06M901-XX-DR-N-A_Alvaston Walk 022.pdf"
assert (
result.core[CoreFiles.RETROFIT_DESIGN_DOC].file_name
== "2603-OSM-B06M901-XX-DR-N-A_Alvaston Walk 022.pdf"
)
def test_select_latest_core_files_falls_back_to_latest_when_no_osm_candidates() -> None:
def test_group_into_core_and_other_files_classifies_mcs_cert_as_core() -> None:
# Arrange
client = make_client()
files = [
make_file(
file_name="MCS_cert_job123.pdf",
evidence_category="MCS Compliance Certificate",
),
]
# Act
result = client._group_into_core_and_other_files(files)
# Assert
assert CoreFiles.MCS_COMPLIANCE_CERTIFICATE in result.core
assert result.other == []
def test_group_into_core_and_other_files_picks_most_recent_mcs_cert() -> None:
# Arrange
client = make_client()
files = [
make_file(
file_name="mcs_cert_old.pdf",
evidence_category="MCS Compliance Certificate",
created_utc="2024-01-01T00:00:00",
),
make_file(
file_name="mcs_cert_new.pdf",
evidence_category="MCS Compliance Certificate",
created_utc="2024-06-01T00:00:00",
),
]
# Act
result = client._group_into_core_and_other_files(files)
# Assert
assert (
result.core[CoreFiles.MCS_COMPLIANCE_CERTIFICATE].file_name
== "mcs_cert_new.pdf"
)
def test_group_into_core_and_other_files_falls_back_to_latest_when_no_osm_candidates() -> (
None
):
# Arrange
client = make_client()
files = [
@ -111,7 +197,84 @@ def test_select_latest_core_files_falls_back_to_latest_when_no_osm_candidates()
]
# Act
result = client._select_latest_core_files(files)
result = client._group_into_core_and_other_files(files)
# Assert
assert result[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "retrofit_design_v2.pdf"
assert (
result.core[CoreFiles.RETROFIT_DESIGN_DOC].file_name == "retrofit_design_v2.pdf"
)
# ---------------------------------------------------------------------------
# get_evidence_files_by_job_id
# ---------------------------------------------------------------------------
def test_get_evidence_files_by_job_id_returns_downloaded_files_with_empty_other_when_include_other_false() -> (
None
):
# Arrange
client = make_client()
files = [
make_file(file_name="SiteNote_001.pdf"),
make_file(file_name="unknown_doc.pdf"),
]
# Act
with (
patch.object(client, "_get_evidence_list", return_value=files),
patch.object(client, "_get_evidence_metadata", return_value=make_metadata()),
patch.object(client, "_download_file"),
):
result = client.get_evidence_files_by_job_id("job-1", include_other=False)
# Assert
assert isinstance(result, DownloadedFiles)
assert [df.file_path for df in result.core] == ["/tmp/SiteNote_001.pdf"]
assert result.other == []
def test_get_evidence_files_by_job_id_core_files_carry_evidence_category() -> None:
# Arrange
client = make_client()
files = [
make_file(
file_name="MCS_cert.pdf",
evidence_category="MCS Compliance Certificate",
),
]
# Act
with (
patch.object(client, "_get_evidence_list", return_value=files),
patch.object(client, "_get_evidence_metadata", return_value=make_metadata()),
patch.object(client, "_download_file"),
):
result = client.get_evidence_files_by_job_id("job-1", include_other=False)
# Assert
assert len(result.core) == 1
assert result.core[0].evidence_category == "MCS Compliance Certificate"
def test_get_evidence_files_by_job_id_downloads_other_files_when_include_other_true() -> (
None
):
# Arrange
client = make_client()
files = [
make_file(file_name="SiteNote_001.pdf"),
make_file(file_name="unknown_doc.pdf"),
]
# Act
with (
patch.object(client, "_get_evidence_list", return_value=files),
patch.object(client, "_get_evidence_metadata", return_value=make_metadata()),
patch.object(client, "_download_file"),
):
result = client.get_evidence_files_by_job_id("job-1", include_other=True)
# Assert
assert [df.file_path for df in result.core] == ["/tmp/SiteNote_001.pdf"]
assert [df.file_path for df in result.other] == ["/tmp/unknown_doc.pdf"]

View file

@ -1,17 +1,22 @@
import pytest
from datetime import datetime
from typing import Any, Callable, Optional
from unittest.mock import MagicMock, call, patch
from backend.app.db.models.uploaded_file import FileSourceEnum
from backend.pashub_fetcher.pashub_client import PashubClient, UnauthorizedError
from backend.app.db.models.uploaded_file import FileSourceEnum, FileTypeEnum
from backend.pashub_fetcher.pashub_client import (
DownloadedFile,
DownloadedFiles,
PashubClient,
UnauthorizedError,
)
from backend.pashub_fetcher.pashub_service import PashubService
from backend.pashub_fetcher.pashub_to_ara_trigger_request import (
PashubToAraTriggerRequest,
)
from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient
FAKE_JOB_LINK = "https://pashub.net/jobs/job-id-123/details"
@ -20,12 +25,16 @@ def make_request(
uprn: Optional[str] = None,
hubspot_deal_id: Optional[str] = None,
sharepoint_link: Optional[str] = None,
get_other_files: bool = False,
address: Optional[str] = None,
) -> PashubToAraTriggerRequest:
return PashubToAraTriggerRequest(
pashub_link=pashub_link,
uprn=uprn,
hubspot_deal_id=hubspot_deal_id,
sharepoint_link=sharepoint_link,
get_other_files=get_other_files,
address=address,
)
@ -43,6 +52,16 @@ def make_service(
)
_DEFAULT_UTC = datetime(2024, 1, 1)
def make_downloaded(core: list[str], other: list[str] = []) -> DownloadedFiles:
return DownloadedFiles(
core=[DownloadedFile(fp, None, _DEFAULT_UTC) for fp in core],
other=[DownloadedFile(fp, None, _DEFAULT_UTC) for fp in other],
)
# ---------------------------------------------------------------------------
# run(): returns file paths
# ---------------------------------------------------------------------------
@ -51,10 +70,9 @@ def make_service(
def test_run_returns_file_paths() -> None:
mock_client = MagicMock(spec=PashubClient)
mock_client.get_uprn_by_job_id.return_value = None
mock_client.get_core_evidence_files_by_job_id.return_value = [
"/tmp/a.pdf",
"/tmp/b.pdf",
]
mock_client.get_evidence_files_by_job_id.return_value = make_downloaded(
core=["/tmp/a.pdf", "/tmp/b.pdf"]
)
service = make_service(pashub_client=mock_client)
@ -64,6 +82,30 @@ def test_run_returns_file_paths() -> None:
assert result == ["/tmp/a.pdf", "/tmp/b.pdf"]
# ---------------------------------------------------------------------------
# run(): returns core + other file paths when get_other_files=True
# ---------------------------------------------------------------------------
def test_run_returns_core_and_other_file_paths() -> None:
# Arrange
mock_client = MagicMock(spec=PashubClient)
mock_client.get_uprn_by_job_id.return_value = None
mock_client.get_evidence_files_by_job_id.return_value = make_downloaded(
core=["/tmp/core.pdf"],
other=["/tmp/other.pdf"],
)
service = make_service(pashub_client=mock_client)
# Act
with patch("backend.pashub_fetcher.pashub_service.os.remove"):
result = service.run(make_request(get_other_files=True))
# Assert
assert result == ["/tmp/core.pdf", "/tmp/other.pdf"]
# ---------------------------------------------------------------------------
# run(): skips upload when neither uprn nor hubspot_deal_id
# ---------------------------------------------------------------------------
@ -72,7 +114,9 @@ def test_run_returns_file_paths() -> None:
def test_run_skips_upload_when_no_uprn_and_no_deal_id() -> None:
mock_client = MagicMock(spec=PashubClient)
mock_client.get_uprn_by_job_id.return_value = None
mock_client.get_core_evidence_files_by_job_id.return_value = ["/tmp/a.pdf"]
mock_client.get_evidence_files_by_job_id.return_value = make_downloaded(
core=["/tmp/a.pdf"]
)
service = make_service(pashub_client=mock_client)
@ -93,10 +137,9 @@ def test_run_skips_upload_when_no_uprn_and_no_deal_id() -> None:
def test_run_uploads_files_to_s3_using_uprn_path() -> None:
mock_client = MagicMock(spec=PashubClient)
mock_client.get_uprn_by_job_id.return_value = None
mock_client.get_core_evidence_files_by_job_id.return_value = [
"/tmp/SiteNote_001.pdf",
"/tmp/Photopack_002.pdf",
]
mock_client.get_evidence_files_by_job_id.return_value = make_downloaded(
core=["/tmp/SiteNote_001.pdf", "/tmp/Photopack_002.pdf"]
)
service = make_service(pashub_client=mock_client, s3_bucket="my-bucket")
@ -132,9 +175,9 @@ def test_run_uploads_files_to_s3_using_uprn_path() -> None:
def test_run_persists_uploaded_file_records_to_db() -> None:
mock_client = MagicMock(spec=PashubClient)
mock_client.get_uprn_by_job_id.return_value = None
mock_client.get_core_evidence_files_by_job_id.return_value = [
"/tmp/SiteNote_001.pdf"
]
mock_client.get_evidence_files_by_job_id.return_value = make_downloaded(
core=["/tmp/SiteNote_001.pdf"]
)
fake_session = MagicMock()
service = make_service(pashub_client=mock_client)
@ -163,9 +206,9 @@ def test_run_persists_uploaded_file_records_to_db() -> None:
def test_run_uses_hubspot_deal_id_path_when_no_uprn() -> None:
mock_client = MagicMock(spec=PashubClient)
mock_client.get_uprn_by_job_id.return_value = None
mock_client.get_core_evidence_files_by_job_id.return_value = [
"/tmp/SiteNote_001.pdf"
]
mock_client.get_evidence_files_by_job_id.return_value = make_downloaded(
core=["/tmp/SiteNote_001.pdf"]
)
service = make_service(pashub_client=mock_client, s3_bucket="my-bucket")
@ -191,9 +234,9 @@ def test_run_uses_hubspot_deal_id_path_when_no_uprn() -> None:
def test_run_parses_and_saves_site_notes_for_rd_sap_site_note_file() -> None:
mock_client = MagicMock(spec=PashubClient)
mock_client.get_uprn_by_job_id.return_value = None
mock_client.get_core_evidence_files_by_job_id.return_value = [
"/tmp/RdSAP_SiteNote_001.pdf"
]
mock_client.get_evidence_files_by_job_id.return_value = make_downloaded(
core=["/tmp/RdSAP_SiteNote_001.pdf"]
)
fake_epc_data = MagicMock()
fake_session = MagicMock()
@ -241,11 +284,15 @@ def test_run_uses_coordination_client_when_pas_401_on_uprn_lookup() -> None:
coord_client = MagicMock(spec=PashubClient)
coord_client.get_uprn_by_job_id.return_value = "99999"
coord_client.get_core_evidence_files_by_job_id.return_value = ["/tmp/a.pdf"]
coord_client.get_evidence_files_by_job_id.return_value = make_downloaded(
core=["/tmp/a.pdf"]
)
factory = MagicMock(return_value=coord_client)
service = make_service(pashub_client=pas_client, coordination_client_factory=factory)
service = make_service(
pashub_client=pas_client, coordination_client_factory=factory
)
with (
patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3"),
@ -256,20 +303,24 @@ def test_run_uses_coordination_client_when_pas_401_on_uprn_lookup() -> None:
assert result == ["/tmp/a.pdf"]
coord_client.get_uprn_by_job_id.assert_called_once()
coord_client.get_core_evidence_files_by_job_id.assert_called_once()
coord_client.get_evidence_files_by_job_id.assert_called_once()
assert factory.call_count == 1
def test_run_uses_coordination_client_when_pas_401_on_file_listing() -> None:
pas_client = MagicMock(spec=PashubClient)
pas_client.get_core_evidence_files_by_job_id.side_effect = UnauthorizedError()
pas_client.get_evidence_files_by_job_id.side_effect = UnauthorizedError()
coord_client = MagicMock(spec=PashubClient)
coord_client.get_core_evidence_files_by_job_id.return_value = ["/tmp/a.pdf"]
coord_client.get_evidence_files_by_job_id.return_value = make_downloaded(
core=["/tmp/a.pdf"]
)
factory = MagicMock(return_value=coord_client)
service = make_service(pashub_client=pas_client, coordination_client_factory=factory)
service = make_service(
pashub_client=pas_client, coordination_client_factory=factory
)
with (
patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3"),
@ -279,7 +330,7 @@ def test_run_uses_coordination_client_when_pas_401_on_file_listing() -> None:
result = service.run(make_request(uprn="12345"))
assert result == ["/tmp/a.pdf"]
coord_client.get_core_evidence_files_by_job_id.assert_called_once()
coord_client.get_evidence_files_by_job_id.assert_called_once()
pas_client.get_uprn_by_job_id.assert_not_called()
@ -302,24 +353,32 @@ def test_run_raises_unauthorized_when_both_clients_401() -> None:
factory = MagicMock(return_value=coord_client)
service = make_service(pashub_client=pas_client, coordination_client_factory=factory)
service = make_service(
pashub_client=pas_client, coordination_client_factory=factory
)
with pytest.raises(UnauthorizedError):
service.run(make_request())
def test_run_persists_coordination_hub_file_source_when_pas_401_on_uprn_lookup() -> None:
def test_run_persists_coordination_hub_file_source_when_pas_401_on_uprn_lookup() -> (
None
):
pas_client = MagicMock(spec=PashubClient)
pas_client.get_uprn_by_job_id.side_effect = UnauthorizedError()
coord_client = MagicMock(spec=PashubClient)
coord_client.get_uprn_by_job_id.return_value = "99999"
coord_client.get_core_evidence_files_by_job_id.return_value = ["/tmp/a.pdf"]
coord_client.get_evidence_files_by_job_id.return_value = make_downloaded(
core=["/tmp/a.pdf"]
)
factory = MagicMock(return_value=coord_client)
fake_session = MagicMock()
service = make_service(pashub_client=pas_client, coordination_client_factory=factory)
service = make_service(
pashub_client=pas_client, coordination_client_factory=factory
)
with (
patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3"),
@ -334,17 +393,23 @@ def test_run_persists_coordination_hub_file_source_when_pas_401_on_uprn_lookup()
assert added[0].file_source == FileSourceEnum.COORDINATION_HUB.value
def test_run_persists_coordination_hub_file_source_when_pas_401_on_file_listing() -> None:
def test_run_persists_coordination_hub_file_source_when_pas_401_on_file_listing() -> (
None
):
pas_client = MagicMock(spec=PashubClient)
pas_client.get_core_evidence_files_by_job_id.side_effect = UnauthorizedError()
pas_client.get_evidence_files_by_job_id.side_effect = UnauthorizedError()
coord_client = MagicMock(spec=PashubClient)
coord_client.get_core_evidence_files_by_job_id.return_value = ["/tmp/a.pdf"]
coord_client.get_evidence_files_by_job_id.return_value = make_downloaded(
core=["/tmp/a.pdf"]
)
factory = MagicMock(return_value=coord_client)
fake_session = MagicMock()
service = make_service(pashub_client=pas_client, coordination_client_factory=factory)
service = make_service(
pashub_client=pas_client, coordination_client_factory=factory
)
with (
patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3"),
@ -359,12 +424,231 @@ def test_run_persists_coordination_hub_file_source_when_pas_401_on_file_listing(
assert added[0].file_source == FileSourceEnum.COORDINATION_HUB.value
# ---------------------------------------------------------------------------
# run(): get_other_files=True → other temp files deleted after run
# ---------------------------------------------------------------------------
def test_run_deletes_other_temp_files_when_get_other_files_true() -> None:
# Arrange
mock_client = MagicMock(spec=PashubClient)
mock_client.get_uprn_by_job_id.return_value = None
mock_client.get_evidence_files_by_job_id.return_value = make_downloaded(
core=["/tmp/core.pdf"],
other=["/tmp/other.pdf"],
)
service = make_service(pashub_client=mock_client)
# Act
with patch("backend.pashub_fetcher.pashub_service.os.remove") as mock_remove:
service.run(make_request(get_other_files=True))
# Assert
mock_remove.assert_any_call("/tmp/core.pdf")
mock_remove.assert_any_call("/tmp/other.pdf")
# ---------------------------------------------------------------------------
# run(): get_other_files=True → other files uploaded to S3
# ---------------------------------------------------------------------------
def test_run_uploads_other_files_to_s3_when_get_other_files_true() -> None:
# Arrange
mock_client = MagicMock(spec=PashubClient)
mock_client.get_uprn_by_job_id.return_value = None
mock_client.get_evidence_files_by_job_id.return_value = make_downloaded(
core=["/tmp/SiteNote_001.pdf"],
other=["/tmp/unknown_file.pdf"],
)
service = make_service(pashub_client=mock_client, s3_bucket="my-bucket")
# Act
with (
patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3") as mock_s3,
patch("backend.pashub_fetcher.pashub_service.db_session"),
patch("backend.pashub_fetcher.pashub_service.os.remove"),
):
service.run(make_request(uprn="12345", get_other_files=True))
# Assert
mock_s3.assert_any_call(
"/tmp/unknown_file.pdf",
"my-bucket",
"documents/uprn/12345/unknown_file.pdf",
)
# ---------------------------------------------------------------------------
# run(): get_other_files=True → other files persisted with file_type OTHER
# ---------------------------------------------------------------------------
def test_run_persists_other_files_with_other_file_type() -> None:
# Arrange
mock_client = MagicMock(spec=PashubClient)
mock_client.get_uprn_by_job_id.return_value = None
mock_client.get_evidence_files_by_job_id.return_value = make_downloaded(
core=[],
other=["/tmp/unknown_file.pdf"],
)
fake_session = MagicMock()
service = make_service(pashub_client=mock_client)
# Act
with (
patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3"),
patch("backend.pashub_fetcher.pashub_service.db_session") as mock_db,
patch("backend.pashub_fetcher.pashub_service.os.remove"),
):
mock_db.return_value.__enter__.return_value = fake_session
service.run(make_request(uprn="12345", get_other_files=True))
# Assert
all_added = [item for c in fake_session.add_all.call_args_list for item in c[0][0]]
assert len(all_added) == 1
assert all_added[0].file_type == FileTypeEnum.OTHER.value
def test_run_persists_mcs_cert_with_mcs_compliance_certificate_file_type() -> None:
# Arrange
mock_client = MagicMock(spec=PashubClient)
mock_client.get_uprn_by_job_id.return_value = None
mock_client.get_evidence_files_by_job_id.return_value = DownloadedFiles(
core=[
DownloadedFile(
"/tmp/MCS_cert.pdf", "MCS Compliance Certificate", datetime(2024, 1, 1)
)
],
other=[],
)
fake_session = MagicMock()
service = make_service(pashub_client=mock_client)
# Act
with (
patch("backend.pashub_fetcher.pashub_service.upload_file_to_s3"),
patch("backend.pashub_fetcher.pashub_service.db_session") as mock_db,
patch("backend.pashub_fetcher.pashub_service.os.remove"),
):
mock_db.return_value.__enter__.return_value = fake_session
service.run(make_request(uprn="12345"))
# Assert
fake_session.add_all.assert_called_once()
added: list[Any] = fake_session.add_all.call_args[0][0]
assert added[0].file_type == FileTypeEnum.MCS_COMPLIANCE_CERTIFICATE.value
# ---------------------------------------------------------------------------
# run(): SharePoint upload
# ---------------------------------------------------------------------------
def test_sharepoint_uploads_all_files_to_property_folder() -> None:
# Arrange
mock_client = MagicMock(spec=PashubClient)
mock_client.get_uprn_by_job_id.return_value = None
mock_client.get_evidence_files_by_job_id.return_value = make_downloaded(
core=["/tmp/core.pdf"],
other=["/tmp/other.pdf"],
)
mock_sharepoint = MagicMock(spec=DomnaSharepointClient)
mock_sharepoint.get_folders_in_path.return_value = {
"value": [{"name": "123 Main St"}]
}
service = make_service(pashub_client=mock_client, sharepoint_client=mock_sharepoint)
# Act
with patch("backend.pashub_fetcher.pashub_service.os.remove"):
service.run(
make_request(
sharepoint_link="Retrofit/Properties",
get_other_files=True,
address="123 Main St | some deal",
)
)
# Assert
mock_sharepoint.upload_file.assert_any_call(
"/tmp/core.pdf", "Retrofit/Properties/123 Main St", "core.pdf"
)
mock_sharepoint.upload_file.assert_any_call(
"/tmp/other.pdf", "Retrofit/Properties/123 Main St", "other.pdf"
)
def test_sharepoint_skips_upload_when_folder_not_found() -> None:
# Arrange
mock_client = MagicMock(spec=PashubClient)
mock_client.get_uprn_by_job_id.return_value = None
mock_client.get_evidence_files_by_job_id.return_value = make_downloaded(
core=["/tmp/core.pdf"]
)
mock_sharepoint = MagicMock(spec=DomnaSharepointClient)
mock_sharepoint.get_folders_in_path.return_value = {
"value": [{"name": "Different Property"}]
}
service = make_service(pashub_client=mock_client, sharepoint_client=mock_sharepoint)
# Act
with (
patch("backend.pashub_fetcher.pashub_service.os.remove"),
patch("backend.pashub_fetcher.pashub_service.logger") as mock_logger,
):
service.run(
make_request(
sharepoint_link="Retrofit/Properties",
address="No Such Property | deal",
)
)
# Assert
mock_sharepoint.upload_file.assert_not_called()
mock_logger.warning.assert_called()
def test_sharepoint_skips_upload_when_sharepoint_client_is_none() -> None:
# Arrange
mock_client = MagicMock(spec=PashubClient)
mock_client.get_uprn_by_job_id.return_value = None
mock_client.get_evidence_files_by_job_id.return_value = make_downloaded(
core=["/tmp/core.pdf"]
)
service = PashubService(
pashub_client=mock_client,
sharepoint_client=None,
s3_bucket="test-bucket",
)
# Act — should not raise AttributeError on None._sharepoint_client
with patch("backend.pashub_fetcher.pashub_service.os.remove"):
result = service.run(
make_request(
sharepoint_link="Retrofit/Properties",
address="123 Main St | deal",
)
)
# Assert
assert result == ["/tmp/core.pdf"]
def test_run_warns_and_continues_when_site_notes_parsing_fails() -> None:
mock_client = MagicMock(spec=PashubClient)
mock_client.get_uprn_by_job_id.return_value = None
mock_client.get_core_evidence_files_by_job_id.return_value = [
"/tmp/RdSAP_SiteNote_001.pdf"
]
mock_client.get_evidence_files_by_job_id.return_value = make_downloaded(
core=["/tmp/RdSAP_SiteNote_001.pdf"]
)
service = make_service(pashub_client=mock_client)

View file

@ -10,19 +10,19 @@ from backend.pashub_fetcher.pashub_to_ara_trigger_request import (
)
from backend.pashub_fetcher.handler.handler import handler
if __name__ == "__main__":
BASE_DIR = os.path.dirname(os.path.dirname(__file__))
filepath: str = os.path.join(
BASE_DIR,
"pashub_fetcher",
"The_Guinness_Partnership_AtkinsR_alis_Coordination_Design_Board_1774881298.xlsx",
"local_run_02-06-2026",
"ECO_Approach_Coordination_Design_KN.xlsx",
)
wb = load_workbook(filepath, data_only=True)
ws = wb["filtered_2"]
ws = wb["filtered"]
HEADER_ROW = 3
HEADER_ROW = 1
headers: Dict[str, int] = {}
for col in range(1, ws.max_column + 1):
@ -31,7 +31,7 @@ if __name__ == "__main__":
headers[value.strip()] = col
name_col = headers["Name"]
link_col = headers["PasHub Link"]
link_col = headers["PasHub ID"]
hubspot_deal_id_col = headers["HubSpot ID"]
trigger_requests: List[PashubToAraTriggerRequest] = []
@ -50,7 +50,10 @@ if __name__ == "__main__":
trigger_requests.append(
PashubToAraTriggerRequest(
pashub_link=str(link), hubspot_deal_id=str(hubspot_deal_id)
pashub_link=str(link),
hubspot_deal_id=str(hubspot_deal_id),
address=str(name),
get_other_files=True,
)
)

View file

@ -16,44 +16,52 @@ logger: logging.Logger = logging.getLogger(__name__)
DRY_RUN: bool = False
DEAL_ID_FILTER: frozenset[str] = frozenset(
{
"379452094688",
"379466504437",
"379660170452",
"380016925932",
"379848065216",
"379466504434",
"379452094690",
"379965924567",
"380016925923",
"379792072898",
"379654754502",
"379560262861",
"379969670369",
"379248717001",
"379971468493",
"379999888607",
"379606372580",
"379969603797",
"379967743213",
"379263155434",
"379855267025",
"379889899719",
"379071064307",
"379867925741",
}
)
# DEAL_ID_FILTER: frozenset[str] = frozenset(
# {
# "379452094688",
# "379466504437",
# "379660170452",
# "380016925932",
# "379848065216",
# "379466504434",
# "379452094690",
# "379965924567",
# "380016925923",
# "379792072898",
# "379654754502",
# "379560262861",
# "379969670369",
# "379248717001",
# "379971468493",
# "379999888607",
# "379606372580",
# "379969603797",
# "379967743213",
# "379263155434",
# "379855267025",
# "379889899719",
# "379071064307",
# "379867925741",
# }
# )
DEAL_ID_FILTER = None
EXCEL_PATH: str = os.path.join(
os.path.dirname(__file__),
"united-infrastructure-exports-all-deals-2026-05-14.xlsx",
"local_run_02-06-2026/ECO_Approach_Coordination_Design_KN.xlsx",
)
SHAREPOINT_PROPERTIES_FOLDER: str = (
"Housing Associations/- Client Shared Folders/Abri/Abri Property Folders (Full PAS Info)"
)
SHAREPOINT_SITE: str = "ECO"
def _build_requests(excel_path: str) -> list[PashubToAraTriggerRequest]:
wb = load_workbook(excel_path, data_only=True)
ws = wb.worksheets[0]
ws = wb.worksheets[1]
headers: dict[str, int] = {}
for col in range(1, ws.max_column + 1):
@ -61,10 +69,10 @@ def _build_requests(excel_path: str) -> list[PashubToAraTriggerRequest]:
if header_val is not None:
headers[str(header_val).strip()] = col
pashub_col: int = headers["PasHub link"]
record_id_col: int = headers["Record ID"]
deal_name_col: int = headers["Deal Name"]
deal_stage_col: int = headers["Deal Stage"]
pashub_col: int = headers["PasHub ID"]
record_id_col: int = headers["HubSpot ID"]
deal_name_col: int = headers["Name"]
deal_stage_col: Optional[int] = headers.get("Deal Stage", None)
requests: list[PashubToAraTriggerRequest] = []
@ -77,7 +85,9 @@ def _build_requests(excel_path: str) -> list[PashubToAraTriggerRequest]:
record_id_raw = ws.cell(row=row, column=record_id_col).value
deal_name_raw = ws.cell(row=row, column=deal_name_col).value
deal_stage_raw = ws.cell(row=row, column=deal_stage_col).value
deal_stage_raw = (
ws.cell(row=row, column=deal_stage_col).value if deal_stage_col else None
)
hubspot_deal_id: Optional[str] = (
str(record_id_raw) if record_id_raw is not None else None
@ -95,6 +105,8 @@ def _build_requests(excel_path: str) -> list[PashubToAraTriggerRequest]:
hubspot_deal_id=hubspot_deal_id,
address=address,
deal_stage=deal_stage,
sharepoint_link=SHAREPOINT_PROPERTIES_FOLDER or None,
sharepoint_site=SHAREPOINT_SITE,
)
)
@ -116,7 +128,7 @@ def main() -> None:
for request in trigger_requests:
action: str = "DRY RUN" if DRY_RUN else "SENDING"
logger.info(
f"[{action}] deal_id={request.hubspot_deal_id} pashub_link={request.pashub_link}"
f"[{action}] deal_id={request.hubspot_deal_id} pashub_link={request.pashub_link} sharepoint_link={request.sharepoint_link}"
)
if not DRY_RUN:

View file

@ -47,6 +47,7 @@ module "lambda" {
OSMOSIS_ACD_SHAREPOINT_ID = var.osmosis_acd_sharepoint_id
PRIVATE_PAY_SHAREPOINT_ID = var.private_pay_sharepoint_id
SOCIAL_HOUSING_WAVE_3_SHAREPOINT_ID = var.social_housing_wave_3_sharepoint_id
ECO_SHAREPOINT_ID = var.eco_sharepoint_id
PASHUB_EMAIL = var.pashub_email
PASHUB_PASSWORD = var.pashub_password
PASHUB_COORDINATION_EMAIL = var.pashub_coordination_email

View file

@ -92,6 +92,11 @@ variable "social_housing_wave_3_sharepoint_id" {
sensitive = true
}
variable "eco_sharepoint_id" {
type = string
sensitive = true
}
variable "pashub_email" {
type = string
sensitive = true

View file

@ -67,7 +67,9 @@ def handler(body: dict[str, Any], context: Any) -> None:
logger.info(
f"Triggering MagicPlan fetcher for HubSpot deal ID {hubspot_deal_id}"
)
_trigger_magicplan_fetcher(sqs_client, hubspot_deal, listing, hubspot_deal_id)
_trigger_magicplan_fetcher(
sqs_client, hubspot_deal, listing, hubspot_deal_id
)
else:
# Deal already in db, check whether anything has changed
logger.info(
@ -119,13 +121,18 @@ def handler(body: dict[str, Any], context: Any) -> None:
logger.info(
f"Triggering MagicPlan fetcher for HubSpot deal ID {hubspot_deal_id}"
)
_trigger_magicplan_fetcher(sqs_client, hubspot_deal, listing, hubspot_deal_id)
_trigger_magicplan_fetcher(
sqs_client, hubspot_deal, listing, hubspot_deal_id
)
print("done")
def _trigger_magicplan_fetcher(
sqs_client: Any, hubspot_deal: Dict[str, str], listing: Optional[dict[str, str]], hubspot_deal_id: str
sqs_client: Any,
hubspot_deal: Dict[str, str],
listing: Optional[dict[str, str]],
hubspot_deal_id: str,
) -> None:
message_body = {
"address": hubspot_deal.get("dealname"),
@ -136,9 +143,7 @@ def _trigger_magicplan_fetcher(
QueueUrl=get_settings().MAGICPLAN_SQS_URL,
MessageBody=json.dumps(message_body),
)
logger.info(
f"Sent message to MagicPlan queue. MessageId: {response['MessageId']}"
)
logger.info(f"Sent message to MagicPlan queue. MessageId: {response['MessageId']}")
def _trigger_pashub_fetcher(
@ -148,7 +153,7 @@ def _trigger_pashub_fetcher(
"pashub_link": hubspot_deal["pashub_link"],
"address": None, # potentially available from Listing, leave as None for now
"hubspot_deal_id": deal_id,
"sharepoint_link": hubspot_deal.get("sharepoint_link", None),
# "sharepoint_link": hubspot_deal.get("sharepoint_link", None), # Don't send sharepoint link for now as they are inconsistent
"uprn": hubspot_deal.get("national_uprn", None),
"landlord_property_id": hubspot_deal.get("owner_property_id", None),
"deal_stage": hubspot_deal.get("deal_stage", None),

View file

@ -9,3 +9,4 @@ class DomnaSites(Enum):
OSMOSIS_ACD = os.getenv("OSMOSIS_ACD_SHAREPOINT_ID")
PRIVATE_PAY = os.getenv("PRIVATE_PAY_SHAREPOINT_ID")
SOCIAL_HOUSING_WAVE_3 = os.getenv("SOCIAL_HOUSING_WAVE_3_SHAREPOINT_ID")
ECO = os.getenv("ECO_SHAREPOINT_ID")