get file type from request body

This commit is contained in:
Daniel Roth 2026-02-04 14:29:43 +00:00
parent 06e1d53509
commit 62906f6023
7 changed files with 45 additions and 56 deletions

View file

@ -1,6 +1,14 @@
from enum import Enum
from pydantic import BaseModel from pydantic import BaseModel
class ConditionFileType(Enum):
LBWF = "LBWF"
Peabody = "Peabody"
# TODO: make these asset management systems rather than client names
class ConditionTriggerRequest(BaseModel): class ConditionTriggerRequest(BaseModel):
file_type: ConditionFileType
trigger_file_path: str # TODO: split into bucket/prefix? trigger_file_path: str # TODO: split into bucket/prefix?
uprn_lookup_file_path: str # TODO: split into bucket/prefix? uprn_lookup_file_path: str # TODO: split into bucket/prefix?

View file

@ -1,16 +0,0 @@
from enum import Enum
class FileType(Enum):
LBWF = "lbwf"
Peabody = "peabody"
def detect_file_type(filepath: str) -> FileType:
path = filepath.lower()
if "lbwf" in path:
return FileType.LBWF
if "peabody" in path:
return FileType.Peabody
raise ValueError("Unrecognised file path")

View file

@ -1,6 +1,9 @@
import asyncio
import json
from typing import Mapping, Any from typing import Mapping, Any
from io import BytesIO from io import BytesIO
from backend.condition.condition_trigger_request import ConditionTriggerRequest
from backend.condition.lookups.uprn_lookup_s3 import UprnLookupS3 from backend.condition.lookups.uprn_lookup_s3 import UprnLookupS3
from utils.logger import setup_logger from utils.logger import setup_logger
from backend.condition.processor import process_file from backend.condition.processor import process_file
@ -15,8 +18,18 @@ def handler(event: Mapping[str, Any], context: Any) -> None:
bucket="", key="" bucket="", key=""
) # TODO: replace with postgres implementation ) # TODO: replace with postgres implementation
dummy_stream = BytesIO(b"") for record in event.get("Records", []):
try:
body_dict = json.loads(record["body"])
payload = ConditionTriggerRequest.model_validate(body_dict)
source_key = event.get("source_key", "unknown-source") # fetch file from s3
process_file(dummy_stream, source_key) # open file and send bytes to processor
except Exception as e:
logger.error(f"Failed to process record: {e}")
# dummy_stream = BytesIO(b"")
# process_file(dummy_stream, source_key)

View file

@ -1,5 +1,6 @@
from pathlib import Path from pathlib import Path
from backend.condition.condition_trigger_request import ConditionFileType
from backend.condition.lookups.uprn_lookup_csv import UprnLookupLocal from backend.condition.lookups.uprn_lookup_csv import UprnLookupLocal
from backend.condition.processor import process_file from backend.condition.processor import process_file
@ -25,15 +26,23 @@ def main() -> None:
path / "peabody" / "PeabodyPropertymatched_Dec25_propref_UPRN.csv" path / "peabody" / "PeabodyPropertymatched_Dec25_propref_UPRN.csv"
) )
# filepaths = [lbwf_path, peabody_path] # filepaths = [lbwf_path, peabody_path]
# filepaths = [lbwf_path] filepaths = [lbwf_path]
filepaths = [peabody_path] # filepaths = [peabody_path]
uprn_lookup = UprnLookupLocal(csv_path=peabody_uprn_lookup_path.as_posix()) uprn_lookup = UprnLookupLocal(csv_path=peabody_uprn_lookup_path.as_posix())
def get_file_type(file_path: str) -> ConditionFileType:
if "peabody" in file_path:
return ConditionFileType.Peabody
if "lbwf" in file_path:
return ConditionFileType.LBWF
for fp in filepaths: for fp in filepaths:
with fp.open("rb") as f: with fp.open("rb") as f:
process_file( process_file(
file_stream=f, source_key=fp.as_posix(), uprn_lookup=uprn_lookup file_stream=f,
file_type=get_file_type(fp.as_posix()),
uprn_lookup=uprn_lookup,
) )

View file

@ -1,8 +1,8 @@
from typing import Optional from typing import Optional
from backend.condition.condition_trigger_request import ConditionFileType
from backend.condition.domain.mapping.lbwf.lbwf_mapper import LbwfMapper from backend.condition.domain.mapping.lbwf.lbwf_mapper import LbwfMapper
from backend.condition.domain.mapping.mapper import Mapper from backend.condition.domain.mapping.mapper import Mapper
from backend.condition.domain.mapping.peabody.peabody_mapper import PeabodyMapper from backend.condition.domain.mapping.peabody.peabody_mapper import PeabodyMapper
from backend.condition.file_type import FileType
from backend.condition.lookups.uprn_lookup import UprnLookup from backend.condition.lookups.uprn_lookup import UprnLookup
from backend.condition.parsing.parser import Parser from backend.condition.parsing.parser import Parser
from backend.condition.parsing.lbwf_parser import LbwfParser from backend.condition.parsing.lbwf_parser import LbwfParser
@ -10,12 +10,12 @@ from backend.condition.parsing.peabody_parser import PeabodyParser
def select_parser( def select_parser(
file_type: FileType, uprn_lookup: Optional[UprnLookup] = None file_type: ConditionFileType, uprn_lookup: Optional[UprnLookup] = None
) -> Parser: ) -> Parser:
if file_type is FileType.LBWF: if file_type is ConditionFileType.LBWF:
return LbwfParser() return LbwfParser()
if file_type is FileType.Peabody: if file_type is ConditionFileType.Peabody:
if not uprn_lookup: if not uprn_lookup:
raise ValueError( raise ValueError(
"Cannot instantiate Peabody Parser without UPRN lookup being provided" "Cannot instantiate Peabody Parser without UPRN lookup being provided"
@ -25,11 +25,11 @@ def select_parser(
raise ValueError("Unrecognised file type, unable to instantiate Parser") raise ValueError("Unrecognised file type, unable to instantiate Parser")
def select_mapper(file_type: FileType) -> Mapper: def select_mapper(file_type: ConditionFileType) -> Mapper:
if file_type is FileType.LBWF: if file_type is ConditionFileType.LBWF:
return LbwfMapper() return LbwfMapper()
if file_type is FileType.Peabody: if file_type is ConditionFileType.Peabody:
return PeabodyMapper() return PeabodyMapper()
raise ValueError("Unrecognised file type, unable to instantiate Mapper") raise ValueError("Unrecognised file type, unable to instantiate Mapper")

View file

@ -1,25 +1,22 @@
from typing import Any, BinaryIO, List from typing import Any, BinaryIO, List
from datetime import datetime from datetime import datetime
from backend.condition.condition_trigger_request import ConditionFileType
from backend.condition.lookups.uprn_lookup import UprnLookup from backend.condition.lookups.uprn_lookup import UprnLookup
from utils.logger import setup_logger from utils.logger import setup_logger
from backend.condition.domain.mapping.mapper import Mapper from backend.condition.domain.mapping.mapper import Mapper
from backend.condition.domain.property_condition_survey import PropertyConditionSurvey from backend.condition.domain.property_condition_survey import PropertyConditionSurvey
from backend.condition.parsing.parser import Parser from backend.condition.parsing.parser import Parser
from backend.condition.persistence.condition_postgres import ConditionPostgres from backend.condition.persistence.condition_postgres import ConditionPostgres
from backend.condition.file_type import FileType, detect_file_type
from backend.condition.parsing.factory import select_parser, select_mapper from backend.condition.parsing.factory import select_parser, select_mapper
logger = setup_logger() logger = setup_logger()
def process_file( def process_file(
file_stream: BinaryIO, source_key: str, uprn_lookup: UprnLookup file_stream: BinaryIO, file_type: ConditionFileType, uprn_lookup: UprnLookup
) -> None: ) -> None:
logger.info(f"[processor] Received file: {source_key}")
# Instantiation # Instantiation
file_type: FileType = detect_file_type(source_key)
parser: Parser = select_parser(file_type, uprn_lookup) parser: Parser = select_parser(file_type, uprn_lookup)
mapper: Mapper = select_mapper(file_type) mapper: Mapper = select_mapper(file_type)
persistence = ConditionPostgres() persistence = ConditionPostgres()

View file

@ -1,22 +0,0 @@
import pytest
from backend.condition.file_type import FileType, detect_file_type
def test_detects_lbwf_file_type():
# arrange
file_path_str = "uploads/lbwf/Exaple Asset Data.xlsx"
expected_file_type = FileType.LBWF
# act
actual_file_type: FileType = detect_file_type(file_path_str)
# assert
assert expected_file_type == actual_file_type
def test_unknown_filepath_raises_value_error():
# arrange
file_path_str = "unknown/Example Asset Data.xlsx"
# act + assert
with pytest.raises(ValueError):
detect_file_type(file_path_str)