diff --git a/backend/condition/lookups/uprn_lookup.py b/backend/condition/lookups/uprn_lookup.py index b817445d..0f6e78fd 100644 --- a/backend/condition/lookups/uprn_lookup.py +++ b/backend/condition/lookups/uprn_lookup.py @@ -3,7 +3,6 @@ from typing import BinaryIO, Dict class UprnLookup(ABC): - def get_location_ref_to_uprn_map( - self, lookup_file_stream: BinaryIO - ) -> Dict[str, int]: + @abstractmethod + def get_property_ref_to_uprn_lookup(self) -> Dict[str, int]: pass diff --git a/backend/condition/lookups/uprn_lookup_csv.py b/backend/condition/lookups/uprn_lookup_csv.py index 43544ccd..8b1c21a2 100644 --- a/backend/condition/lookups/uprn_lookup_csv.py +++ b/backend/condition/lookups/uprn_lookup_csv.py @@ -4,21 +4,20 @@ from typing import BinaryIO, Dict, TextIO from backend.condition.lookups.uprn_lookup import UprnLookup -class UprnLookupCsv(UprnLookup): - def get_location_ref_to_uprn_map( - self, lookup_file_stream: BinaryIO - ) -> Dict[str, int]: - text_stream: TextIO = TextIOWrapper(lookup_file_stream, encoding="utf-8") - location_ref_to_uprn_map: Dict[str, int] = {} +class UprnLookupLocal(UprnLookup): + def __init__(self, csv_path: str): + self.csv_path = csv_path + def get_property_ref_to_uprn_lookup(self) -> Dict[str, int]: + with open(self.csv_path, "rb") as f: + return self.parse_csv(f) + + def parse_csv(self, file_stream: BinaryIO) -> Dict[str, int]: + text_stream: TextIO = TextIOWrapper(file_stream, encoding="utf-8") + mapping: Dict[str, int] = {} reader = csv.DictReader(text_stream) for row in reader: if not row["reference"] or not row["out_uprn"]: - # skip empty rows continue - - ref = row["reference"].strip() - uprn = int(row["out_uprn"].strip()) - location_ref_to_uprn_map[ref] = uprn - - return location_ref_to_uprn_map + mapping[row["reference"].strip()] = int(row["out_uprn"].strip()) + return mapping diff --git a/backend/condition/lookups/uprn_lookup_s3.py b/backend/condition/lookups/uprn_lookup_s3.py new file mode 100644 index 00000000..1eb68aae --- /dev/null +++ b/backend/condition/lookups/uprn_lookup_s3.py @@ -0,0 +1,23 @@ +import csv +from io import TextIOWrapper +from typing import BinaryIO, Dict, TextIO +from backend.condition.lookups.uprn_lookup import UprnLookup + + +class UprnLookupS3(UprnLookup): + def __init__(self, bucket: str = "", key: str = ""): + self.bucket = bucket + self.key = key + + def get_property_ref_to_uprn_lookup(self) -> Dict[str, int]: + raise NotImplementedError() + + def _parse_csv(self, file_stream: BinaryIO) -> Dict[str, int]: + text_stream: TextIO = TextIOWrapper(file_stream, encoding="utf-8") + mapping: Dict[str, int] = {} + reader = csv.DictReader(text_stream) + for row in reader: + if not row["reference"] or not row["out_uprn"]: + continue + mapping[row["reference"].strip()] = int(row["out_uprn"].strip()) + return mapping diff --git a/backend/condition/tests/lookups/test_uprn_lookup_csv.py b/backend/condition/tests/lookups/test_uprn_lookup_csv.py index a2d66a94..d01c52c2 100644 --- a/backend/condition/tests/lookups/test_uprn_lookup_csv.py +++ b/backend/condition/tests/lookups/test_uprn_lookup_csv.py @@ -1,23 +1,26 @@ -from typing import Any, Dict import pytest -from io import BytesIO +from typing import Dict +from tempfile import NamedTemporaryFile -from backend.condition.lookups.uprn_lookup_csv import UprnLookupCsv +from backend.condition.lookups.uprn_lookup_csv import UprnLookupLocal @pytest.fixture -def prop_ref_uprn_csv_bytes() -> BytesIO: - csv_bytes = b"""reference,out_uprn +def prop_ref_uprn_csv_file() -> str: + csv_content = """reference,out_uprn ABC123,10000000001 DEF456,10000000002 GHI789,10000000003 """ - return BytesIO(csv_bytes) + with NamedTemporaryFile(mode="w+", delete=False, suffix=".csv") as tmp: + tmp.write(csv_content) + tmp.flush() + return tmp.name -def test_generate_prop_ref_uprn_from_csv_bytes(prop_ref_uprn_csv_bytes) -> None: +def test_generate_prop_ref_uprn_from_csv_file(prop_ref_uprn_csv_file: str) -> None: # arrange - uprn_lookup = UprnLookupCsv() + uprn_lookup = UprnLookupLocal(prop_ref_uprn_csv_file) expected_map: Dict[str, int] = { "ABC123": 10000000001, "DEF456": 10000000002, @@ -25,9 +28,7 @@ def test_generate_prop_ref_uprn_from_csv_bytes(prop_ref_uprn_csv_bytes) -> None: } # act - actual_map: Dict[str, int] = uprn_lookup.get_location_ref_to_uprn_map( - prop_ref_uprn_csv_bytes - ) + actual_map: Dict[str, int] = uprn_lookup.get_property_ref_to_uprn_lookup() # assert assert actual_map == expected_map