diff --git a/backend/ecmk_fetcher/processor.py b/backend/ecmk_fetcher/processor.py index 4f8c24ea..03e40f1c 100644 --- a/backend/ecmk_fetcher/processor.py +++ b/backend/ecmk_fetcher/processor.py @@ -33,7 +33,7 @@ from backend.ecmk_fetcher.reports import ( from backend.ecmk_fetcher.excel_writer import write_row from backend.ecmk_fetcher.upload import ( upload_excel_to_sharepoint, - upload_file_to_s3_and_update_db, + upload_file_to_s3_and_record, upload_file_to_sharepoint, ) from backend.ecmk_fetcher.xml_processor import flatten_sap_property, parse_rdsap @@ -180,7 +180,7 @@ def run_job() -> None: ) # Upload to s3 and update db - upload_file_to_s3_and_update_db( + upload_file_to_s3_and_record( bucket=s3_bucket, file_path=file_path, hubspot_listing_id=hubspot_listing_id, diff --git a/backend/ecmk_fetcher/tests/test_upload.py b/backend/ecmk_fetcher/tests/test_upload.py new file mode 100644 index 00000000..79823e8e --- /dev/null +++ b/backend/ecmk_fetcher/tests/test_upload.py @@ -0,0 +1,108 @@ +from typing import Generator +from unittest.mock import MagicMock, call, patch + +import pytest + +from backend.app.db.models.uploaded_file import FileTypeEnum +from backend.ecmk_fetcher.upload import upload_file_to_s3_and_record + + +@pytest.fixture +def mock_uploaded_file() -> MagicMock: + obj = MagicMock() + obj.id = 42 + return obj + + +@pytest.fixture +def mock_session() -> MagicMock: + return MagicMock() + + +@pytest.fixture +def patched_deps( + mock_uploaded_file: MagicMock, mock_session: MagicMock +) -> Generator[dict[str, MagicMock], None, None]: + with ( + patch( + "backend.ecmk_fetcher.upload.upload_file_to_s3" + ) as mock_s3, + patch( + "backend.ecmk_fetcher.upload.db_session" + ) as mock_db_ctx, + patch( + "backend.ecmk_fetcher.upload.UploadedFile", + return_value=mock_uploaded_file, + ) as mock_model, + ): + mock_db_ctx.return_value.__enter__.return_value = mock_session + mock_db_ctx.return_value.__exit__.return_value = False + yield { + "s3": mock_s3, + "db_ctx": mock_db_ctx, + "model": mock_model, + "session": mock_session, + "uploaded_file": mock_uploaded_file, + } + + +def test_returns_uploaded_file_id_as_int( + patched_deps: dict[str, MagicMock], +) -> None: + result = upload_file_to_s3_and_record( + bucket="test-bucket", + file_path="/tmp/report.pdf", + hubspot_listing_id="hs-001", + file_type=FileTypeEnum.ECMK_RD_SAP_SITE_NOTE, + ) + + assert isinstance(result, int) + assert result == 42 + + +def test_uploads_to_s3_with_key_derived_from_listing_id_and_filename( + patched_deps: dict[str, MagicMock], +) -> None: + upload_file_to_s3_and_record( + bucket="my-bucket", + file_path="/some/path/site_note.pdf", + hubspot_listing_id="hs-999", + file_type=FileTypeEnum.ECMK_RD_SAP_SITE_NOTE, + ) + + patched_deps["s3"].assert_called_once_with( + "/some/path/site_note.pdf", + "my-bucket", + "documents/hubspot_listing_id/hs-999/site_note.pdf", + ) + + +def test_adds_uploaded_file_record_to_session( + patched_deps: dict[str, MagicMock], +) -> None: + upload_file_to_s3_and_record( + bucket="test-bucket", + file_path="/tmp/report.pdf", + hubspot_listing_id="hs-001", + file_type=FileTypeEnum.ECMK_RD_SAP_SITE_NOTE, + ) + + patched_deps["session"].add.assert_called_once_with( + patched_deps["uploaded_file"] + ) + patched_deps["session"].flush.assert_called_once() + + +def test_site_note_type_does_not_trigger_pdf_parsing( + patched_deps: dict[str, MagicMock], +) -> None: + # If parsing branch still existed, this would blow up without a + # parse_site_notes_pdf mock — test passes only when branch is absent. + result = upload_file_to_s3_and_record( + bucket="test-bucket", + file_path="/tmp/site_note.pdf", + hubspot_listing_id="hs-002", + file_type=FileTypeEnum.ECMK_RD_SAP_SITE_NOTE, + ) + + assert result == 42 diff --git a/backend/ecmk_fetcher/upload.py b/backend/ecmk_fetcher/upload.py index edfaf9f1..fc05363c 100644 --- a/backend/ecmk_fetcher/upload.py +++ b/backend/ecmk_fetcher/upload.py @@ -8,8 +8,6 @@ from backend.app.db.models.uploaded_file import ( FileTypeEnum, UploadedFile, ) -from backend.documents_parser.db_writer import save_epc_property_data -from backend.documents_parser.parser import parse_site_notes_pdf from utils.logger import setup_logger from utils.s3 import upload_file_to_s3 from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient @@ -47,7 +45,7 @@ def upload_excel_to_sharepoint( # TODO: this should be moved to somewhere common and called by pashub fetcher -def upload_file_to_s3_and_update_db( +def upload_file_to_s3_and_record( bucket: str, file_path: str, hubspot_listing_id: str, file_type: FileTypeEnum ) -> int: filename: str = os.path.basename(file_path) @@ -70,11 +68,4 @@ def upload_file_to_s3_and_update_db( session.flush() uploaded_file_id: int = int(cast(int, uploaded_file.id)) - if file_type == FileTypeEnum.ECMK_RD_SAP_SITE_NOTE: - try: - epc_data = parse_site_notes_pdf(file_path) - save_epc_property_data(session, epc_data, uploaded_file_id=uploaded_file_id) - except Exception: - logger.warning(f"Failed to parse/save site notes {file_path}", exc_info=True) - return uploaded_file_id