Model/backend/ecmk_fetcher/tests/test_ecmk_service.py
2026-04-29 14:10:55 +00:00

594 lines
20 KiB
Python

from typing import Dict
from unittest.mock import MagicMock, call, patch
from backend.app.db.models.uploaded_file import FileTypeEnum
from backend.ecmk_fetcher.address_list import PropertyRow
from backend.ecmk_fetcher.ecmk_service import EcmkService
from backend.ecmk_fetcher.reports import FileDownloadButtonType
from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient
FAKE_PROPERTY_MAP: Dict[str, PropertyRow] = {
"10 FAKE ST SW1A 1AA": PropertyRow(
row_index=2, address="10 Fake St SW1A 1AA", listing_id="hs-001"
)
}
def make_service(
sharepoint_client: DomnaSharepointClient | None = None,
s3_bucket: str = "test-bucket",
property_list_filepath: str = "/fake/properties.xlsx",
sharepoint_base_path: str = "/base",
sharepoint_excel_path: str = "/excel",
local_dimensions_path: str = "/fake/Dimensions.xlsx",
) -> EcmkService:
return EcmkService(
sharepoint_client=sharepoint_client or MagicMock(spec=DomnaSharepointClient),
s3_bucket=s3_bucket,
property_list_filepath=property_list_filepath,
sharepoint_base_path=sharepoint_base_path,
sharepoint_excel_path=sharepoint_excel_path,
local_dimensions_path=local_dimensions_path,
)
# ---------------------------------------------------------------------------
# __init__: loads property map from spreadsheet filepath
# ---------------------------------------------------------------------------
def test_init_loads_property_map_from_filepath() -> None:
with patch(
"backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet",
return_value=FAKE_PROPERTY_MAP,
) as mock_extract:
_ = make_service(property_list_filepath="/some/props.xlsx")
mock_extract.assert_called_once_with("/some/props.xlsx")
# ---------------------------------------------------------------------------
# run(): downloads Dimensions.xlsx before Playwright browser launches
# ---------------------------------------------------------------------------
def _make_playwright_mocks() -> tuple[MagicMock, MagicMock, MagicMock, MagicMock]:
mock_page = MagicMock()
mock_context = MagicMock()
mock_context.new_page.return_value = mock_page
mock_browser = MagicMock()
mock_browser.new_context.return_value = mock_context
mock_playwright = MagicMock()
mock_playwright.chromium.launch.return_value = mock_browser
return mock_page, mock_context, mock_browser, mock_playwright
def test_run_downloads_dimensions_before_browser_launch() -> None:
call_order: list[str] = []
mock_client = MagicMock(spec=DomnaSharepointClient)
def _on_download(**_: object) -> None:
call_order.append("download")
mock_client.download_file.side_effect = _on_download
_, _, mock_browser, mock_playwright = _make_playwright_mocks()
def _on_launch(**_: object) -> MagicMock:
call_order.append("browser")
return mock_browser
mock_playwright.chromium.launch.side_effect = _on_launch
with (
patch(
"backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet",
return_value=FAKE_PROPERTY_MAP,
),
patch("backend.ecmk_fetcher.ecmk_service.sync_playwright") as mock_sync_pw,
):
mock_sync_pw.return_value.__enter__.return_value = mock_playwright
service = make_service(
sharepoint_client=mock_client,
sharepoint_excel_path="/excel",
local_dimensions_path="/fake/Dimensions.xlsx",
)
with patch.object(service, "_run_browser_session"):
service.run()
assert call_order == ["download", "browser"]
def test_run_downloads_dimensions_with_correct_paths() -> None:
mock_client = MagicMock(spec=DomnaSharepointClient)
_, _, _, mock_playwright = _make_playwright_mocks()
with (
patch(
"backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet",
return_value=FAKE_PROPERTY_MAP,
),
patch("backend.ecmk_fetcher.ecmk_service.sync_playwright") as mock_sync_pw,
):
mock_sync_pw.return_value.__enter__.return_value = mock_playwright
service = make_service(
sharepoint_client=mock_client,
sharepoint_excel_path="/excel",
local_dimensions_path="/fake/Dimensions.xlsx",
)
with patch.object(service, "_run_browser_session"):
service.run()
mock_client.download_file.assert_called_once_with(
sharepoint_path="/excel/Dimensions.xlsx",
local_path="/fake/Dimensions.xlsx",
)
# ---------------------------------------------------------------------------
# run(): passes the Playwright Page into _run_browser_session
# ---------------------------------------------------------------------------
def test_run_passes_page_to_run_browser_session() -> None:
mock_page, _, _, mock_playwright = _make_playwright_mocks()
with (
patch(
"backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet",
return_value=FAKE_PROPERTY_MAP,
),
patch("backend.ecmk_fetcher.ecmk_service.sync_playwright") as mock_sync_pw,
):
mock_sync_pw.return_value.__enter__.return_value = mock_playwright
service = make_service()
with patch.object(service, "_run_browser_session") as mock_session:
service.run()
mock_session.assert_called_once_with(mock_page)
# ---------------------------------------------------------------------------
# _process_file: dispatches based on report_type
# ---------------------------------------------------------------------------
def test_process_file_dispatches_to_xml_for_raw_xml() -> None:
with patch(
"backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet",
return_value=FAKE_PROPERTY_MAP,
):
service = make_service()
with (
patch.object(service, "_process_xml_file") as mock_xml,
patch.object(service, "_process_pdf_file") as mock_pdf,
):
service._process_file(
file_path="/tmp/file.xml",
report_type=FileDownloadButtonType.RAW_XML.value,
db_file_type=FileTypeEnum.ECMK_SURVEY_XML,
sharepoint_address="10 Fake St",
hubspot_listing_id="hs-001",
)
mock_xml.assert_called_once()
mock_pdf.assert_not_called()
def test_process_file_dispatches_to_pdf_for_non_xml() -> None:
with patch(
"backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet",
return_value=FAKE_PROPERTY_MAP,
):
service = make_service()
with (
patch.object(service, "_process_xml_file") as mock_xml,
patch.object(service, "_process_pdf_file") as mock_pdf,
):
service._process_file(
file_path="/tmp/file.pdf",
report_type=FileDownloadButtonType.SITENOTE_REPORT.value,
db_file_type=FileTypeEnum.ECMK_RD_SAP_SITE_NOTE,
sharepoint_address="10 Fake St",
hubspot_listing_id="hs-001",
)
mock_pdf.assert_called_once()
mock_xml.assert_not_called()
# ---------------------------------------------------------------------------
# _process_xml_file: parse → flatten → write row → upload excel → S3
# ---------------------------------------------------------------------------
def test_process_xml_file_full_chain() -> None:
fake_details = MagicMock()
fake_row_data = MagicMock()
with patch(
"backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet",
return_value=FAKE_PROPERTY_MAP,
):
service = make_service(
s3_bucket="my-bucket",
sharepoint_excel_path="/excel",
local_dimensions_path="/dims/Dimensions.xlsx",
)
with (
patch(
"backend.ecmk_fetcher.ecmk_service.parse_rdsap", return_value=fake_details
) as mock_parse,
patch(
"backend.ecmk_fetcher.ecmk_service.flatten_sap_property",
return_value=fake_row_data,
) as mock_flatten,
patch("backend.ecmk_fetcher.ecmk_service.write_row") as mock_write,
patch(
"backend.ecmk_fetcher.ecmk_service.upload_excel_to_sharepoint"
) as mock_upload_excel,
patch(
"backend.ecmk_fetcher.ecmk_service.upload_file_to_s3_and_record"
) as mock_s3,
patch(
"builtins.open",
MagicMock(return_value=MagicMock(
__enter__=lambda s: MagicMock(read=lambda: "<xml/>"),
__exit__=MagicMock(return_value=False),
)),
),
):
service._process_xml_file(
file_path="/tmp/report.xml",
db_file_type=FileTypeEnum.ECMK_SURVEY_XML,
hubspot_listing_id="hs-001",
)
mock_parse.assert_called_once()
mock_flatten.assert_called_once_with(fake_details)
mock_write.assert_called_once_with("/dims/Dimensions.xlsx", fake_row_data)
mock_upload_excel.assert_called_once_with(
client=service._sharepoint_client,
file_path="/dims/Dimensions.xlsx",
sharepoint_path="/excel",
)
mock_s3.assert_called_once_with(
bucket="my-bucket",
file_path="/tmp/report.xml",
hubspot_listing_id="hs-001",
file_type=FileTypeEnum.ECMK_SURVEY_XML,
)
# ---------------------------------------------------------------------------
# _process_pdf_file: sharepoint upload → S3 upload
# ---------------------------------------------------------------------------
def test_process_pdf_file_uploads_to_sharepoint_then_s3() -> None:
with patch(
"backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet",
return_value=FAKE_PROPERTY_MAP,
):
service = make_service(
s3_bucket="my-bucket",
sharepoint_base_path="/base",
)
with (
patch(
"backend.ecmk_fetcher.ecmk_service.upload_file_to_sharepoint"
) as mock_sp,
patch(
"backend.ecmk_fetcher.ecmk_service.upload_file_to_s3_and_record",
return_value=42,
) as mock_s3,
patch("backend.ecmk_fetcher.ecmk_service.parse_site_notes_pdf"),
patch("backend.ecmk_fetcher.ecmk_service.db_session"),
):
service._process_pdf_file(
file_path="/tmp/report.pdf",
file_type=FileTypeEnum.ECMK_SITE_NOTE,
sharepoint_address="10 Fake St",
hubspot_listing_id="hs-001",
)
mock_sp.assert_called_once_with(
client=service._sharepoint_client,
file_path="/tmp/report.pdf",
base_path="/base",
subpath="10 Fake St",
)
mock_s3.assert_called_once_with(
bucket="my-bucket",
file_path="/tmp/report.pdf",
hubspot_listing_id="hs-001",
file_type=FileTypeEnum.ECMK_SITE_NOTE,
)
# ---------------------------------------------------------------------------
# _process_pdf_file: EPC extraction conditional on file_type
# ---------------------------------------------------------------------------
def test_process_pdf_file_runs_epc_extraction_for_rd_sap_site_note() -> None:
fake_epc_data = MagicMock()
fake_session = MagicMock()
with patch(
"backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet",
return_value=FAKE_PROPERTY_MAP,
):
service = make_service()
with (
patch("backend.ecmk_fetcher.ecmk_service.upload_file_to_sharepoint"),
patch(
"backend.ecmk_fetcher.ecmk_service.upload_file_to_s3_and_record",
return_value=99,
),
patch(
"backend.ecmk_fetcher.ecmk_service.parse_site_notes_pdf",
return_value=fake_epc_data,
) as mock_parse,
patch(
"backend.ecmk_fetcher.ecmk_service.save_epc_property_data"
) as mock_save,
patch(
"backend.ecmk_fetcher.ecmk_service.db_session"
) as mock_db_session,
):
mock_db_session.return_value.__enter__.return_value = fake_session
service._process_pdf_file(
file_path="/tmp/sitenote.pdf",
file_type=FileTypeEnum.ECMK_RD_SAP_SITE_NOTE,
sharepoint_address="10 Fake St",
hubspot_listing_id="hs-001",
)
mock_parse.assert_called_once_with("/tmp/sitenote.pdf")
mock_save.assert_called_once_with(
session=fake_session,
data=fake_epc_data,
uploaded_file_id=99,
)
def test_process_pdf_file_skips_epc_extraction_for_ecmk_site_note() -> None:
with patch(
"backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet",
return_value=FAKE_PROPERTY_MAP,
):
service = make_service()
with (
patch("backend.ecmk_fetcher.ecmk_service.upload_file_to_sharepoint"),
patch(
"backend.ecmk_fetcher.ecmk_service.upload_file_to_s3_and_record",
return_value=42,
),
patch(
"backend.ecmk_fetcher.ecmk_service.parse_site_notes_pdf"
) as mock_parse,
patch("backend.ecmk_fetcher.ecmk_service.db_session") as mock_db_session,
):
service._process_pdf_file(
file_path="/tmp/sitenote.pdf",
file_type=FileTypeEnum.ECMK_SITE_NOTE,
sharepoint_address="10 Fake St",
hubspot_listing_id="hs-001",
)
mock_parse.assert_not_called()
mock_db_session.assert_not_called()
def test_process_pdf_file_epc_uses_separate_db_session_from_s3_upload() -> None:
"""EPC db_session opens only after upload_file_to_s3_and_record returns."""
call_order: list[str] = []
def _on_s3(**_: object) -> int:
call_order.append("s3")
return 77
def _on_db_session() -> MagicMock:
call_order.append("db_session")
ctx = MagicMock()
ctx.__enter__ = MagicMock(return_value=MagicMock())
ctx.__exit__ = MagicMock(return_value=False)
return ctx
with patch(
"backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet",
return_value=FAKE_PROPERTY_MAP,
):
service = make_service()
with (
patch("backend.ecmk_fetcher.ecmk_service.upload_file_to_sharepoint"),
patch(
"backend.ecmk_fetcher.ecmk_service.upload_file_to_s3_and_record",
side_effect=_on_s3,
),
patch("backend.ecmk_fetcher.ecmk_service.parse_site_notes_pdf"),
patch("backend.ecmk_fetcher.ecmk_service.save_epc_property_data"),
patch(
"backend.ecmk_fetcher.ecmk_service.db_session",
side_effect=_on_db_session,
),
):
service._process_pdf_file(
file_path="/tmp/sitenote.pdf",
file_type=FileTypeEnum.ECMK_RD_SAP_SITE_NOTE,
sharepoint_address="10 Fake St",
hubspot_listing_id="hs-001",
)
assert call_order == ["s3", "db_session"]
# ---------------------------------------------------------------------------
# _process_pdf_file: EPC failures swallowed with warning
# ---------------------------------------------------------------------------
def _pdf_file_patches_for_failure() -> tuple: # type: ignore[type-arg]
return (
patch("backend.ecmk_fetcher.ecmk_service.upload_file_to_sharepoint"),
patch(
"backend.ecmk_fetcher.ecmk_service.upload_file_to_s3_and_record",
return_value=1,
),
)
def test_process_pdf_file_parse_failure_logged_as_warning_not_raised() -> None:
with patch(
"backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet",
return_value=FAKE_PROPERTY_MAP,
):
service = make_service()
sp_patch, s3_patch = _pdf_file_patches_for_failure()
with (
sp_patch,
s3_patch,
patch(
"backend.ecmk_fetcher.ecmk_service.parse_site_notes_pdf",
side_effect=ValueError("bad pdf"),
),
patch("backend.ecmk_fetcher.ecmk_service.save_epc_property_data") as mock_save,
patch("backend.ecmk_fetcher.ecmk_service.db_session"),
patch("backend.ecmk_fetcher.ecmk_service.logger") as mock_logger,
):
service._process_pdf_file(
file_path="/tmp/sitenote.pdf",
file_type=FileTypeEnum.ECMK_RD_SAP_SITE_NOTE,
sharepoint_address="10 Fake St",
hubspot_listing_id="hs-001",
)
mock_logger.warning.assert_called_once()
mock_save.assert_not_called()
def test_process_pdf_file_save_failure_logged_as_warning_not_raised() -> None:
fake_session = MagicMock()
with patch(
"backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet",
return_value=FAKE_PROPERTY_MAP,
):
service = make_service()
sp_patch, s3_patch = _pdf_file_patches_for_failure()
with (
sp_patch,
s3_patch,
patch(
"backend.ecmk_fetcher.ecmk_service.parse_site_notes_pdf",
return_value=MagicMock(),
),
patch(
"backend.ecmk_fetcher.ecmk_service.save_epc_property_data",
side_effect=RuntimeError("db exploded"),
),
patch("backend.ecmk_fetcher.ecmk_service.db_session") as mock_db_session,
patch("backend.ecmk_fetcher.ecmk_service.logger") as mock_logger,
):
mock_db_session.return_value.__enter__.return_value = fake_session
service._process_pdf_file(
file_path="/tmp/sitenote.pdf",
file_type=FileTypeEnum.ECMK_RD_SAP_SITE_NOTE,
sharepoint_address="10 Fake St",
hubspot_listing_id="hs-001",
)
mock_logger.warning.assert_called_once()
# ---------------------------------------------------------------------------
# _run_browser_session: delegates file processing to _process_file
# ---------------------------------------------------------------------------
def _make_page_mock_with_one_matching_row() -> MagicMock:
cells_nth: dict[int, MagicMock] = {n: MagicMock() for n in (1, 2, 5, 7, 9)}
cells_nth[1].inner_text.return_value = "John"
cells_nth[2].inner_text.return_value = "Doe"
cells_nth[5].inner_text.return_value = "10 FAKE ST"
cells_nth[7].inner_text.return_value = "SW1A 1AA"
cells_nth[9].inner_text.return_value = "Submitted (not Lodged)"
cells_mock = MagicMock()
cells_mock.nth.side_effect = lambda n: cells_nth[n]
row_mock = MagicMock()
row_mock.locator.return_value = cells_mock
rows_mock = MagicMock()
rows_mock.count.return_value = 1
rows_mock.nth.return_value = row_mock
page = MagicMock()
page.locator.return_value = rows_mock
return page
# address "10 FAKE ST" + postcode "SW1A 1AA" → build_property_id → "10SW1A1AA"
_BROWSER_SESSION_PROPERTY_MAP: Dict[str, PropertyRow] = {
"10SW1A1AA": PropertyRow(
row_index=2, address="10 Fake St SW1A 1AA", listing_id="12345"
)
}
def test_run_browser_session_calls_process_file_for_downloaded_file() -> None:
mock_page = _make_page_mock_with_one_matching_row()
with patch(
"backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet",
return_value=_BROWSER_SESSION_PROPERTY_MAP,
):
service = make_service()
with (
patch("backend.ecmk_fetcher.ecmk_service.attach_debug_listeners"),
patch("backend.ecmk_fetcher.ecmk_service.login"),
patch("backend.ecmk_fetcher.ecmk_service.go_to_assessments"),
patch("backend.ecmk_fetcher.ecmk_service.go_to_assessment_details"),
patch("backend.ecmk_fetcher.ecmk_service.go_to_next_page", return_value=False),
patch(
"backend.ecmk_fetcher.ecmk_service.get_uploaded_file_by_listing_type_and_source",
return_value=None,
),
patch(
"backend.ecmk_fetcher.ecmk_service.download_with_retry",
return_value="/tmp/fake.pdf",
),
patch(
"backend.ecmk_fetcher.ecmk_service.map_report_type_to_db_file_type",
return_value=FileTypeEnum.ECMK_SITE_NOTE,
),
patch(
"backend.ecmk_fetcher.ecmk_service.REPORT_TYPES",
[FileDownloadButtonType.SITENOTE_REPORT.value],
),
patch.object(service, "_process_file") as mock_process_file,
patch("os.path.exists", return_value=False),
):
service._run_browser_session(mock_page)
mock_process_file.assert_called_once_with(
file_path="/tmp/fake.pdf",
report_type=FileDownloadButtonType.SITENOTE_REPORT.value,
db_file_type=FileTypeEnum.ECMK_SITE_NOTE,
sharepoint_address="10 Fake St SW1A 1AA",
hubspot_listing_id="12345",
)