Added unit tests for land registry

This commit is contained in:
Khalim Conn-Kowlessar 2023-06-16 09:36:02 +01:00
parent f61b7d371d
commit 8f99431df3
3 changed files with 49 additions and 41 deletions

View file

@ -1,32 +0,0 @@
import pandas as pd
from model_data.utils import setup_logger
logger = setup_logger()
class Need:
"""
Contains methods to read and interface with the NEED dataset.
Current iterations of this data is the 2021 anonymised dataset, which can be found here:
https://www.gov.uk/government/statistics/national-energy-efficiency-data-framework-need-anonymised-data-2021
"""
def __init__(self, local_authorities, path):
self.local_authorities = local_authorities
self.path = path
def read(self) -> pd.DataFrame:
"""
Reads the NEED dataset from a csv file.
:param path: path to the csv file
:return: pandas dataframe containing the data
"""
logger.info("Reading NEED data - could take a moment")
df = pd.read_csv(self.path)
df = df[df["REGION"].isin(self.local_authorities)]
z = df[df["REGION"].str.contains("E9")]
type(df["REGION"].values[0])
return df

View file

@ -91,15 +91,6 @@ def handler():
#
# df.to_dict("records")
from model_data.Need import Need
import os
need_client = Need(
local_authorities=local_authorities,
path=os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/need_2021_anon_dataset_4million.csv"
)
need_data = need_client.read()
## Land registry
from model_data.LandRegistryClient import LandRegistryClient
import os

View file

@ -0,0 +1,49 @@
import pandas as pd
from unittest.mock import patch, call
from model_data.LandRegistryClient import LandRegistryClient
class TestLandRegistryClient:
@patch('pandas.read_csv')
@patch('fuzzywuzzy.fuzz.ratio')
def test_read(self, mock_fuzz, mock_read_csv):
# setup mocks
mock_read_csv.return_value = pd.DataFrame([
['1', '815000', '2022-06-28', 'SW6 3JA', 'F', 'N', 'L', 'FLAT', 'GROUND FLOOR', '6 DYMOCK STREET', None,
'LONDON', 'HAMMERSMITH AND FULHAM', 'GREATER LONDON', 'A', 'Y'],
['2', '200', '2023-03-04', 'N16 0EG', 'S', 'N', 'F', '25', None, 'DEFOE ROAD', None, 'LONDON', 'HACKNEY',
'GREATER LONDON', 'B', 'Y'],
['3', '300', '2023-03-05', 'N16 0EG', 'O', 'Y', 'L', '26', None, 'DEFOE ROAD', None, 'LONDON', 'HACKNEY',
'GREATER LONDON', 'A', 'N']
], columns=LandRegistryClient.COLUMN_NAMES)
mock_fuzz.side_effect = [70, 100, 50]
# setup client
addresses = [{
'postcode': 'SW6 3JA', 'address1': 'GROUND FLOOR', 'address2': 'FLAT 6 DYMOCK STREET', 'address3': 'FULHAM',
'address': 'Ground Floor Flat, 6 Dymock Street, Fulham', 'uprn': '34063921'
}]
client = LandRegistryClient(['path_to_data.csv'], addresses)
# call read
result = client.read()
# assertions
mock_read_csv.assert_called_once_with('path_to_data.csv', header=None)
assert mock_fuzz.call_args_list == [
call('GROUND FLOOR FLAT 6 DYMOCK STREET ', 'GROUND FLOOR FLAT 6 DYMOCK STREET FULHAM'),
call('GROUND FLOOR FLAT 6 DYMOCK STREET ', 'GROUND FLOOR FLAT 6 DYMOCK STREET FULHAM')
]
pd.testing.assert_frame_equal(result, pd.DataFrame({
'price': ["815000"],
'date_of_transfer': ['2022-06-28'],
'property_type': ['F'],
'old_new': ['N'],
'duration': ['L'],
'ppd_category_type': ['A'],
'record_status': ['Y'],
'uprn': ['34063921'],
'address': ['Ground Floor Flat, 6 Dymock Street, Fulham']
}))