from typing import Dict from unittest.mock import MagicMock, call, patch from backend.app.db.models.uploaded_file import FileTypeEnum from backend.ecmk_fetcher.address_list import PropertyRow from backend.ecmk_fetcher.ecmk_service import EcmkService from backend.ecmk_fetcher.reports import FileDownloadButtonType from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient FAKE_PROPERTY_MAP: Dict[str, PropertyRow] = { "10 FAKE ST SW1A 1AA": PropertyRow( row_index=2, address="10 Fake St SW1A 1AA", listing_id="hs-001" ) } def make_service( sharepoint_client: DomnaSharepointClient | None = None, s3_bucket: str = "test-bucket", property_list_filepath: str = "/fake/properties.xlsx", sharepoint_base_path: str = "/base", sharepoint_excel_path: str = "/excel", local_dimensions_path: str = "/fake/Dimensions.xlsx", ) -> EcmkService: return EcmkService( sharepoint_client=sharepoint_client or MagicMock(spec=DomnaSharepointClient), s3_bucket=s3_bucket, property_list_filepath=property_list_filepath, sharepoint_base_path=sharepoint_base_path, sharepoint_excel_path=sharepoint_excel_path, local_dimensions_path=local_dimensions_path, ) # --------------------------------------------------------------------------- # __init__: loads property map from spreadsheet filepath # --------------------------------------------------------------------------- def test_init_loads_property_map_from_filepath() -> None: with patch( "backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet", return_value=FAKE_PROPERTY_MAP, ) as mock_extract: _ = make_service(property_list_filepath="/some/props.xlsx") mock_extract.assert_called_once_with("/some/props.xlsx") # --------------------------------------------------------------------------- # run(): downloads Dimensions.xlsx before Playwright browser launches # --------------------------------------------------------------------------- def _make_playwright_mocks() -> tuple[MagicMock, MagicMock, MagicMock, MagicMock]: mock_page = MagicMock() mock_context = MagicMock() mock_context.new_page.return_value = mock_page mock_browser = MagicMock() mock_browser.new_context.return_value = mock_context mock_playwright = MagicMock() mock_playwright.chromium.launch.return_value = mock_browser return mock_page, mock_context, mock_browser, mock_playwright def test_run_downloads_dimensions_before_browser_launch() -> None: call_order: list[str] = [] mock_client = MagicMock(spec=DomnaSharepointClient) def _on_download(**_: object) -> None: call_order.append("download") mock_client.download_file.side_effect = _on_download _, _, mock_browser, mock_playwright = _make_playwright_mocks() def _on_launch(**_: object) -> MagicMock: call_order.append("browser") return mock_browser mock_playwright.chromium.launch.side_effect = _on_launch with ( patch( "backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet", return_value=FAKE_PROPERTY_MAP, ), patch("backend.ecmk_fetcher.ecmk_service.sync_playwright") as mock_sync_pw, ): mock_sync_pw.return_value.__enter__.return_value = mock_playwright service = make_service( sharepoint_client=mock_client, sharepoint_excel_path="/excel", local_dimensions_path="/fake/Dimensions.xlsx", ) with patch.object(service, "_run_browser_session"): service.run() assert call_order == ["download", "browser"] def test_run_downloads_dimensions_with_correct_paths() -> None: mock_client = MagicMock(spec=DomnaSharepointClient) _, _, _, mock_playwright = _make_playwright_mocks() with ( patch( "backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet", return_value=FAKE_PROPERTY_MAP, ), patch("backend.ecmk_fetcher.ecmk_service.sync_playwright") as mock_sync_pw, ): mock_sync_pw.return_value.__enter__.return_value = mock_playwright service = make_service( sharepoint_client=mock_client, sharepoint_excel_path="/excel", local_dimensions_path="/fake/Dimensions.xlsx", ) with patch.object(service, "_run_browser_session"): service.run() mock_client.download_file.assert_called_once_with( sharepoint_path="/excel/Dimensions.xlsx", local_path="/fake/Dimensions.xlsx", ) # --------------------------------------------------------------------------- # run(): passes the Playwright Page into _run_browser_session # --------------------------------------------------------------------------- def test_run_passes_page_to_run_browser_session() -> None: mock_page, _, _, mock_playwright = _make_playwright_mocks() with ( patch( "backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet", return_value=FAKE_PROPERTY_MAP, ), patch("backend.ecmk_fetcher.ecmk_service.sync_playwright") as mock_sync_pw, ): mock_sync_pw.return_value.__enter__.return_value = mock_playwright service = make_service() with patch.object(service, "_run_browser_session") as mock_session: service.run() mock_session.assert_called_once_with(mock_page) # --------------------------------------------------------------------------- # _process_file: dispatches based on report_type # --------------------------------------------------------------------------- def test_process_file_dispatches_to_xml_for_raw_xml() -> None: with patch( "backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet", return_value=FAKE_PROPERTY_MAP, ): service = make_service() with ( patch.object(service, "_process_xml_file") as mock_xml, patch.object(service, "_process_pdf_file") as mock_pdf, ): service._process_file( file_path="/tmp/file.xml", report_type=FileDownloadButtonType.RAW_XML.value, db_file_type=FileTypeEnum.ECMK_SURVEY_XML, sharepoint_address="10 Fake St", hubspot_listing_id="hs-001", ) mock_xml.assert_called_once() mock_pdf.assert_not_called() def test_process_file_dispatches_to_pdf_for_non_xml() -> None: with patch( "backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet", return_value=FAKE_PROPERTY_MAP, ): service = make_service() with ( patch.object(service, "_process_xml_file") as mock_xml, patch.object(service, "_process_pdf_file") as mock_pdf, ): service._process_file( file_path="/tmp/file.pdf", report_type=FileDownloadButtonType.SITENOTE_REPORT.value, db_file_type=FileTypeEnum.ECMK_RD_SAP_SITE_NOTE, sharepoint_address="10 Fake St", hubspot_listing_id="hs-001", ) mock_pdf.assert_called_once() mock_xml.assert_not_called() # --------------------------------------------------------------------------- # _process_xml_file: parse → flatten → write row → upload excel → S3 # --------------------------------------------------------------------------- def test_process_xml_file_full_chain() -> None: fake_details = MagicMock() fake_row_data = MagicMock() with patch( "backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet", return_value=FAKE_PROPERTY_MAP, ): service = make_service( s3_bucket="my-bucket", sharepoint_excel_path="/excel", local_dimensions_path="/dims/Dimensions.xlsx", ) with ( patch( "backend.ecmk_fetcher.ecmk_service.parse_rdsap", return_value=fake_details ) as mock_parse, patch( "backend.ecmk_fetcher.ecmk_service.flatten_sap_property", return_value=fake_row_data, ) as mock_flatten, patch("backend.ecmk_fetcher.ecmk_service.write_row") as mock_write, patch( "backend.ecmk_fetcher.ecmk_service.upload_excel_to_sharepoint" ) as mock_upload_excel, patch( "backend.ecmk_fetcher.ecmk_service.upload_file_to_s3_and_record" ) as mock_s3, patch( "builtins.open", MagicMock(return_value=MagicMock( __enter__=lambda s: MagicMock(read=lambda: ""), __exit__=MagicMock(return_value=False), )), ), ): service._process_xml_file( file_path="/tmp/report.xml", db_file_type=FileTypeEnum.ECMK_SURVEY_XML, hubspot_listing_id="hs-001", ) mock_parse.assert_called_once() mock_flatten.assert_called_once_with(fake_details) mock_write.assert_called_once_with("/dims/Dimensions.xlsx", fake_row_data) mock_upload_excel.assert_called_once_with( client=service._sharepoint_client, file_path="/dims/Dimensions.xlsx", sharepoint_path="/excel", ) mock_s3.assert_called_once_with( bucket="my-bucket", file_path="/tmp/report.xml", hubspot_listing_id="hs-001", file_type=FileTypeEnum.ECMK_SURVEY_XML, ) # --------------------------------------------------------------------------- # _process_pdf_file: sharepoint upload → S3 upload # --------------------------------------------------------------------------- def test_process_pdf_file_uploads_to_sharepoint_then_s3() -> None: with patch( "backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet", return_value=FAKE_PROPERTY_MAP, ): service = make_service( s3_bucket="my-bucket", sharepoint_base_path="/base", ) with ( patch( "backend.ecmk_fetcher.ecmk_service.upload_file_to_sharepoint" ) as mock_sp, patch( "backend.ecmk_fetcher.ecmk_service.upload_file_to_s3_and_record", return_value=42, ) as mock_s3, patch("backend.ecmk_fetcher.ecmk_service.parse_site_notes_pdf"), patch("backend.ecmk_fetcher.ecmk_service.db_session"), ): service._process_pdf_file( file_path="/tmp/report.pdf", file_type=FileTypeEnum.ECMK_SITE_NOTE, sharepoint_address="10 Fake St", hubspot_listing_id="hs-001", ) mock_sp.assert_called_once_with( client=service._sharepoint_client, file_path="/tmp/report.pdf", base_path="/base", subpath="10 Fake St", ) mock_s3.assert_called_once_with( bucket="my-bucket", file_path="/tmp/report.pdf", hubspot_listing_id="hs-001", file_type=FileTypeEnum.ECMK_SITE_NOTE, ) # --------------------------------------------------------------------------- # _process_pdf_file: EPC extraction conditional on file_type # --------------------------------------------------------------------------- def test_process_pdf_file_runs_epc_extraction_for_rd_sap_site_note() -> None: fake_epc_data = MagicMock() fake_session = MagicMock() with patch( "backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet", return_value=FAKE_PROPERTY_MAP, ): service = make_service() with ( patch("backend.ecmk_fetcher.ecmk_service.upload_file_to_sharepoint"), patch( "backend.ecmk_fetcher.ecmk_service.upload_file_to_s3_and_record", return_value=99, ), patch( "backend.ecmk_fetcher.ecmk_service.parse_site_notes_pdf", return_value=fake_epc_data, ) as mock_parse, patch( "backend.ecmk_fetcher.ecmk_service.save_epc_property_data" ) as mock_save, patch( "backend.ecmk_fetcher.ecmk_service.db_session" ) as mock_db_session, ): mock_db_session.return_value.__enter__.return_value = fake_session service._process_pdf_file( file_path="/tmp/sitenote.pdf", file_type=FileTypeEnum.ECMK_RD_SAP_SITE_NOTE, sharepoint_address="10 Fake St", hubspot_listing_id="hs-001", ) mock_parse.assert_called_once_with("/tmp/sitenote.pdf") mock_save.assert_called_once_with( session=fake_session, data=fake_epc_data, uploaded_file_id=99, ) def test_process_pdf_file_skips_epc_extraction_for_ecmk_site_note() -> None: with patch( "backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet", return_value=FAKE_PROPERTY_MAP, ): service = make_service() with ( patch("backend.ecmk_fetcher.ecmk_service.upload_file_to_sharepoint"), patch( "backend.ecmk_fetcher.ecmk_service.upload_file_to_s3_and_record", return_value=42, ), patch( "backend.ecmk_fetcher.ecmk_service.parse_site_notes_pdf" ) as mock_parse, patch("backend.ecmk_fetcher.ecmk_service.db_session") as mock_db_session, ): service._process_pdf_file( file_path="/tmp/sitenote.pdf", file_type=FileTypeEnum.ECMK_SITE_NOTE, sharepoint_address="10 Fake St", hubspot_listing_id="hs-001", ) mock_parse.assert_not_called() mock_db_session.assert_not_called() def test_process_pdf_file_epc_uses_separate_db_session_from_s3_upload() -> None: """EPC db_session opens only after upload_file_to_s3_and_record returns.""" call_order: list[str] = [] def _on_s3(**_: object) -> int: call_order.append("s3") return 77 def _on_db_session() -> MagicMock: call_order.append("db_session") ctx = MagicMock() ctx.__enter__ = MagicMock(return_value=MagicMock()) ctx.__exit__ = MagicMock(return_value=False) return ctx with patch( "backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet", return_value=FAKE_PROPERTY_MAP, ): service = make_service() with ( patch("backend.ecmk_fetcher.ecmk_service.upload_file_to_sharepoint"), patch( "backend.ecmk_fetcher.ecmk_service.upload_file_to_s3_and_record", side_effect=_on_s3, ), patch("backend.ecmk_fetcher.ecmk_service.parse_site_notes_pdf"), patch("backend.ecmk_fetcher.ecmk_service.save_epc_property_data"), patch( "backend.ecmk_fetcher.ecmk_service.db_session", side_effect=_on_db_session, ), ): service._process_pdf_file( file_path="/tmp/sitenote.pdf", file_type=FileTypeEnum.ECMK_RD_SAP_SITE_NOTE, sharepoint_address="10 Fake St", hubspot_listing_id="hs-001", ) assert call_order == ["s3", "db_session"] # --------------------------------------------------------------------------- # _process_pdf_file: EPC failures swallowed with warning # --------------------------------------------------------------------------- def _pdf_file_patches_for_failure() -> tuple: # type: ignore[type-arg] return ( patch("backend.ecmk_fetcher.ecmk_service.upload_file_to_sharepoint"), patch( "backend.ecmk_fetcher.ecmk_service.upload_file_to_s3_and_record", return_value=1, ), ) def test_process_pdf_file_parse_failure_logged_as_warning_not_raised() -> None: with patch( "backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet", return_value=FAKE_PROPERTY_MAP, ): service = make_service() sp_patch, s3_patch = _pdf_file_patches_for_failure() with ( sp_patch, s3_patch, patch( "backend.ecmk_fetcher.ecmk_service.parse_site_notes_pdf", side_effect=ValueError("bad pdf"), ), patch("backend.ecmk_fetcher.ecmk_service.save_epc_property_data") as mock_save, patch("backend.ecmk_fetcher.ecmk_service.db_session"), patch("backend.ecmk_fetcher.ecmk_service.logger") as mock_logger, ): service._process_pdf_file( file_path="/tmp/sitenote.pdf", file_type=FileTypeEnum.ECMK_RD_SAP_SITE_NOTE, sharepoint_address="10 Fake St", hubspot_listing_id="hs-001", ) mock_logger.warning.assert_called_once() mock_save.assert_not_called() def test_process_pdf_file_save_failure_logged_as_warning_not_raised() -> None: fake_session = MagicMock() with patch( "backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet", return_value=FAKE_PROPERTY_MAP, ): service = make_service() sp_patch, s3_patch = _pdf_file_patches_for_failure() with ( sp_patch, s3_patch, patch( "backend.ecmk_fetcher.ecmk_service.parse_site_notes_pdf", return_value=MagicMock(), ), patch( "backend.ecmk_fetcher.ecmk_service.save_epc_property_data", side_effect=RuntimeError("db exploded"), ), patch("backend.ecmk_fetcher.ecmk_service.db_session") as mock_db_session, patch("backend.ecmk_fetcher.ecmk_service.logger") as mock_logger, ): mock_db_session.return_value.__enter__.return_value = fake_session service._process_pdf_file( file_path="/tmp/sitenote.pdf", file_type=FileTypeEnum.ECMK_RD_SAP_SITE_NOTE, sharepoint_address="10 Fake St", hubspot_listing_id="hs-001", ) mock_logger.warning.assert_called_once() # --------------------------------------------------------------------------- # _run_browser_session: delegates file processing to _process_file # --------------------------------------------------------------------------- def _make_page_mock_with_one_matching_row() -> MagicMock: cells_nth: dict[int, MagicMock] = {n: MagicMock() for n in (1, 2, 5, 7, 9)} cells_nth[1].inner_text.return_value = "John" cells_nth[2].inner_text.return_value = "Doe" cells_nth[5].inner_text.return_value = "10 FAKE ST" cells_nth[7].inner_text.return_value = "SW1A 1AA" cells_nth[9].inner_text.return_value = "Submitted (not Lodged)" cells_mock = MagicMock() cells_mock.nth.side_effect = lambda n: cells_nth[n] row_mock = MagicMock() row_mock.locator.return_value = cells_mock rows_mock = MagicMock() rows_mock.count.return_value = 1 rows_mock.nth.return_value = row_mock page = MagicMock() page.locator.return_value = rows_mock return page # address "10 FAKE ST" + postcode "SW1A 1AA" → build_property_id → "10SW1A1AA" _BROWSER_SESSION_PROPERTY_MAP: Dict[str, PropertyRow] = { "10SW1A1AA": PropertyRow( row_index=2, address="10 Fake St SW1A 1AA", listing_id="12345" ) } def test_run_browser_session_calls_process_file_for_downloaded_file() -> None: mock_page = _make_page_mock_with_one_matching_row() with patch( "backend.ecmk_fetcher.ecmk_service.extract_addresses_from_spreadsheet", return_value=_BROWSER_SESSION_PROPERTY_MAP, ): service = make_service() with ( patch("backend.ecmk_fetcher.ecmk_service.attach_debug_listeners"), patch("backend.ecmk_fetcher.ecmk_service.login"), patch("backend.ecmk_fetcher.ecmk_service.go_to_assessments"), patch("backend.ecmk_fetcher.ecmk_service.go_to_assessment_details"), patch("backend.ecmk_fetcher.ecmk_service.go_to_next_page", return_value=False), patch( "backend.ecmk_fetcher.ecmk_service.get_uploaded_file_by_listing_type_and_source", return_value=None, ), patch( "backend.ecmk_fetcher.ecmk_service.download_with_retry", return_value="/tmp/fake.pdf", ), patch( "backend.ecmk_fetcher.ecmk_service.map_report_type_to_db_file_type", return_value=FileTypeEnum.ECMK_SITE_NOTE, ), patch( "backend.ecmk_fetcher.ecmk_service.REPORT_TYPES", [FileDownloadButtonType.SITENOTE_REPORT.value], ), patch.object(service, "_process_file") as mock_process_file, patch("os.path.exists", return_value=False), ): service._run_browser_session(mock_page) mock_process_file.assert_called_once_with( file_path="/tmp/fake.pdf", report_type=FileDownloadButtonType.SITENOTE_REPORT.value, db_file_type=FileTypeEnum.ECMK_SITE_NOTE, sharepoint_address="10 Fake St SW1A 1AA", hubspot_listing_id="12345", )