From 8f99431df30ad7b1701d0ce8dc8b710f2042178f Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 16 Jun 2023 09:36:02 +0100 Subject: [PATCH] Added unit tests for land registry --- model_data/Need.py | 32 ------------ model_data/app.py | 9 ---- model_data/tests/test_land_registry_client.py | 49 +++++++++++++++++++ 3 files changed, 49 insertions(+), 41 deletions(-) delete mode 100644 model_data/Need.py create mode 100644 model_data/tests/test_land_registry_client.py diff --git a/model_data/Need.py b/model_data/Need.py deleted file mode 100644 index 50e30e54..00000000 --- a/model_data/Need.py +++ /dev/null @@ -1,32 +0,0 @@ -import pandas as pd -from model_data.utils import setup_logger - -logger = setup_logger() - - -class Need: - """ - Contains methods to read and interface with the NEED dataset. - - Current iterations of this data is the 2021 anonymised dataset, which can be found here: - https://www.gov.uk/government/statistics/national-energy-efficiency-data-framework-need-anonymised-data-2021 - """ - - def __init__(self, local_authorities, path): - self.local_authorities = local_authorities - self.path = path - - def read(self) -> pd.DataFrame: - """ - Reads the NEED dataset from a csv file. - :param path: path to the csv file - :return: pandas dataframe containing the data - """ - logger.info("Reading NEED data - could take a moment") - df = pd.read_csv(self.path) - df = df[df["REGION"].isin(self.local_authorities)] - - z = df[df["REGION"].str.contains("E9")] - - type(df["REGION"].values[0]) - return df diff --git a/model_data/app.py b/model_data/app.py index 424c19ca..c5a019c9 100644 --- a/model_data/app.py +++ b/model_data/app.py @@ -91,15 +91,6 @@ def handler(): # # df.to_dict("records") - from model_data.Need import Need - import os - - need_client = Need( - local_authorities=local_authorities, - path=os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/need_2021_anon_dataset_4million.csv" - ) - need_data = need_client.read() - ## Land registry from model_data.LandRegistryClient import LandRegistryClient import os diff --git a/model_data/tests/test_land_registry_client.py b/model_data/tests/test_land_registry_client.py new file mode 100644 index 00000000..97a30641 --- /dev/null +++ b/model_data/tests/test_land_registry_client.py @@ -0,0 +1,49 @@ +import pandas as pd +from unittest.mock import patch, call +from model_data.LandRegistryClient import LandRegistryClient + + +class TestLandRegistryClient: + @patch('pandas.read_csv') + @patch('fuzzywuzzy.fuzz.ratio') + def test_read(self, mock_fuzz, mock_read_csv): + # setup mocks + mock_read_csv.return_value = pd.DataFrame([ + ['1', '815000', '2022-06-28', 'SW6 3JA', 'F', 'N', 'L', 'FLAT', 'GROUND FLOOR', '6 DYMOCK STREET', None, + 'LONDON', 'HAMMERSMITH AND FULHAM', 'GREATER LONDON', 'A', 'Y'], + ['2', '200', '2023-03-04', 'N16 0EG', 'S', 'N', 'F', '25', None, 'DEFOE ROAD', None, 'LONDON', 'HACKNEY', + 'GREATER LONDON', 'B', 'Y'], + ['3', '300', '2023-03-05', 'N16 0EG', 'O', 'Y', 'L', '26', None, 'DEFOE ROAD', None, 'LONDON', 'HACKNEY', + 'GREATER LONDON', 'A', 'N'] + ], columns=LandRegistryClient.COLUMN_NAMES) + mock_fuzz.side_effect = [70, 100, 50] + + # setup client + addresses = [{ + 'postcode': 'SW6 3JA', 'address1': 'GROUND FLOOR', 'address2': 'FLAT 6 DYMOCK STREET', 'address3': 'FULHAM', + 'address': 'Ground Floor Flat, 6 Dymock Street, Fulham', 'uprn': '34063921' + }] + client = LandRegistryClient(['path_to_data.csv'], addresses) + + # call read + result = client.read() + + # assertions + mock_read_csv.assert_called_once_with('path_to_data.csv', header=None) + + assert mock_fuzz.call_args_list == [ + call('GROUND FLOOR FLAT 6 DYMOCK STREET ', 'GROUND FLOOR FLAT 6 DYMOCK STREET FULHAM'), + call('GROUND FLOOR FLAT 6 DYMOCK STREET ', 'GROUND FLOOR FLAT 6 DYMOCK STREET FULHAM') + ] + + pd.testing.assert_frame_equal(result, pd.DataFrame({ + 'price': ["815000"], + 'date_of_transfer': ['2022-06-28'], + 'property_type': ['F'], + 'old_new': ['N'], + 'duration': ['L'], + 'ppd_category_type': ['A'], + 'record_status': ['Y'], + 'uprn': ['34063921'], + 'address': ['Ground Floor Flat, 6 Dymock Street, Fulham'] + }))