diff --git a/backend/condition/condition_trigger_request.py b/backend/condition/condition_trigger_request.py index 81cb6439..180259dc 100644 --- a/backend/condition/condition_trigger_request.py +++ b/backend/condition/condition_trigger_request.py @@ -1,6 +1,14 @@ +from enum import Enum from pydantic import BaseModel +class ConditionFileType(Enum): + LBWF = "LBWF" + Peabody = "Peabody" + # TODO: make these asset management systems rather than client names + + class ConditionTriggerRequest(BaseModel): + file_type: ConditionFileType trigger_file_path: str # TODO: split into bucket/prefix? uprn_lookup_file_path: str # TODO: split into bucket/prefix? diff --git a/backend/condition/file_type.py b/backend/condition/file_type.py deleted file mode 100644 index e0736814..00000000 --- a/backend/condition/file_type.py +++ /dev/null @@ -1,16 +0,0 @@ -from enum import Enum - -class FileType(Enum): - LBWF = "lbwf" - Peabody = "peabody" - -def detect_file_type(filepath: str) -> FileType: - path = filepath.lower() - - if "lbwf" in path: - return FileType.LBWF - - if "peabody" in path: - return FileType.Peabody - - raise ValueError("Unrecognised file path") \ No newline at end of file diff --git a/backend/condition/handler.py b/backend/condition/handler.py index 18ac1da7..a3a92d70 100644 --- a/backend/condition/handler.py +++ b/backend/condition/handler.py @@ -1,6 +1,9 @@ +import asyncio +import json from typing import Mapping, Any from io import BytesIO +from backend.condition.condition_trigger_request import ConditionTriggerRequest from backend.condition.lookups.uprn_lookup_s3 import UprnLookupS3 from utils.logger import setup_logger from backend.condition.processor import process_file @@ -15,8 +18,18 @@ def handler(event: Mapping[str, Any], context: Any) -> None: bucket="", key="" ) # TODO: replace with postgres implementation - dummy_stream = BytesIO(b"") + for record in event.get("Records", []): + try: + body_dict = json.loads(record["body"]) + payload = ConditionTriggerRequest.model_validate(body_dict) - source_key = event.get("source_key", "unknown-source") + # fetch file from s3 - process_file(dummy_stream, source_key) + # open file and send bytes to processor + + except Exception as e: + logger.error(f"Failed to process record: {e}") + + # dummy_stream = BytesIO(b"") + + # process_file(dummy_stream, source_key) diff --git a/backend/condition/local_runner.py b/backend/condition/local_runner.py index 4efeec3a..4595b93b 100644 --- a/backend/condition/local_runner.py +++ b/backend/condition/local_runner.py @@ -1,5 +1,6 @@ from pathlib import Path +from backend.condition.condition_trigger_request import ConditionFileType from backend.condition.lookups.uprn_lookup_csv import UprnLookupLocal from backend.condition.processor import process_file @@ -25,15 +26,23 @@ def main() -> None: path / "peabody" / "PeabodyPropertymatched_Dec25_propref_UPRN.csv" ) # filepaths = [lbwf_path, peabody_path] - # filepaths = [lbwf_path] - filepaths = [peabody_path] + filepaths = [lbwf_path] + # filepaths = [peabody_path] uprn_lookup = UprnLookupLocal(csv_path=peabody_uprn_lookup_path.as_posix()) + def get_file_type(file_path: str) -> ConditionFileType: + if "peabody" in file_path: + return ConditionFileType.Peabody + if "lbwf" in file_path: + return ConditionFileType.LBWF + for fp in filepaths: with fp.open("rb") as f: process_file( - file_stream=f, source_key=fp.as_posix(), uprn_lookup=uprn_lookup + file_stream=f, + file_type=get_file_type(fp.as_posix()), + uprn_lookup=uprn_lookup, ) diff --git a/backend/condition/parsing/factory.py b/backend/condition/parsing/factory.py index e28a15a2..b5d28e18 100644 --- a/backend/condition/parsing/factory.py +++ b/backend/condition/parsing/factory.py @@ -1,8 +1,8 @@ from typing import Optional +from backend.condition.condition_trigger_request import ConditionFileType from backend.condition.domain.mapping.lbwf.lbwf_mapper import LbwfMapper from backend.condition.domain.mapping.mapper import Mapper from backend.condition.domain.mapping.peabody.peabody_mapper import PeabodyMapper -from backend.condition.file_type import FileType from backend.condition.lookups.uprn_lookup import UprnLookup from backend.condition.parsing.parser import Parser from backend.condition.parsing.lbwf_parser import LbwfParser @@ -10,12 +10,12 @@ from backend.condition.parsing.peabody_parser import PeabodyParser def select_parser( - file_type: FileType, uprn_lookup: Optional[UprnLookup] = None + file_type: ConditionFileType, uprn_lookup: Optional[UprnLookup] = None ) -> Parser: - if file_type is FileType.LBWF: + if file_type is ConditionFileType.LBWF: return LbwfParser() - if file_type is FileType.Peabody: + if file_type is ConditionFileType.Peabody: if not uprn_lookup: raise ValueError( "Cannot instantiate Peabody Parser without UPRN lookup being provided" @@ -25,11 +25,11 @@ def select_parser( raise ValueError("Unrecognised file type, unable to instantiate Parser") -def select_mapper(file_type: FileType) -> Mapper: - if file_type is FileType.LBWF: +def select_mapper(file_type: ConditionFileType) -> Mapper: + if file_type is ConditionFileType.LBWF: return LbwfMapper() - if file_type is FileType.Peabody: + if file_type is ConditionFileType.Peabody: return PeabodyMapper() raise ValueError("Unrecognised file type, unable to instantiate Mapper") diff --git a/backend/condition/processor.py b/backend/condition/processor.py index 77994536..70ce2df9 100644 --- a/backend/condition/processor.py +++ b/backend/condition/processor.py @@ -1,25 +1,22 @@ from typing import Any, BinaryIO, List from datetime import datetime +from backend.condition.condition_trigger_request import ConditionFileType from backend.condition.lookups.uprn_lookup import UprnLookup from utils.logger import setup_logger from backend.condition.domain.mapping.mapper import Mapper from backend.condition.domain.property_condition_survey import PropertyConditionSurvey from backend.condition.parsing.parser import Parser from backend.condition.persistence.condition_postgres import ConditionPostgres -from backend.condition.file_type import FileType, detect_file_type from backend.condition.parsing.factory import select_parser, select_mapper logger = setup_logger() def process_file( - file_stream: BinaryIO, source_key: str, uprn_lookup: UprnLookup + file_stream: BinaryIO, file_type: ConditionFileType, uprn_lookup: UprnLookup ) -> None: - logger.info(f"[processor] Received file: {source_key}") - # Instantiation - file_type: FileType = detect_file_type(source_key) parser: Parser = select_parser(file_type, uprn_lookup) mapper: Mapper = select_mapper(file_type) persistence = ConditionPostgres() diff --git a/backend/condition/tests/test_detect_file_type.py b/backend/condition/tests/test_detect_file_type.py deleted file mode 100644 index fecf22c1..00000000 --- a/backend/condition/tests/test_detect_file_type.py +++ /dev/null @@ -1,22 +0,0 @@ -import pytest - -from backend.condition.file_type import FileType, detect_file_type - -def test_detects_lbwf_file_type(): - # arrange - file_path_str = "uploads/lbwf/Exaple Asset Data.xlsx" - expected_file_type = FileType.LBWF - - # act - actual_file_type: FileType = detect_file_type(file_path_str) - - # assert - assert expected_file_type == actual_file_type - -def test_unknown_filepath_raises_value_error(): - # arrange - file_path_str = "unknown/Example Asset Data.xlsx" - - # act + assert - with pytest.raises(ValueError): - detect_file_type(file_path_str) \ No newline at end of file