From 287960361d28e3c9ca224f7b2cf09e4d617992fe Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 19 Aug 2024 11:32:56 +0100 Subject: [PATCH] added land registry get and filter --- etl/ownership/Ownership.py | 44 ++++++++++++- .../projects/midlands_portfolio/app.py | 64 +++++++++++++++++++ 2 files changed, 106 insertions(+), 2 deletions(-) create mode 100644 etl/ownership/projects/midlands_portfolio/app.py diff --git a/etl/ownership/Ownership.py b/etl/ownership/Ownership.py index 3bdae59c..7403c45c 100644 --- a/etl/ownership/Ownership.py +++ b/etl/ownership/Ownership.py @@ -28,6 +28,26 @@ class Ownership: # flagged as potentially in the process of being sold LODGED_RECENTLY_MONTHS = 12 + # These are the columns in the land registry data + LAND_REGISTRY_COLUMNS = [ + "transaction_id", + "price", + "date_of_transfer", + "postcode", + "property_type", + "old_new", + "duration", + "paon", + "saon", + "street", + "locality", + "town_city", + "district", + "county", + "ppd_category_type", + "record_status", + ] + def __init__( self, epc_paths: List[str], @@ -517,16 +537,36 @@ class Ownership: .str.replace(",", "") ) + def get_land_registry(self): + """ + This function reads in the land registry data and filters it on the postcodes found in the EPC data + """ + land_registry = pd.read_csv(self.land_registry_path, header=None) + land_registry.columns = self.LAND_REGISTRY_COLUMNS + land_registry = land_registry[ + land_registry["postcode"].str.lower().isin(self.epc_data["POSTCODE"].str.lower().unique()) + ] + land_registry["date_of_transfer"] = pd.to_datetime( + land_registry["date_of_transfer"], format="%Y-%m-%d", errors="coerce" + ) + # Take data from the last 5 years + land_registry = land_registry[ + (land_registry["date_of_transfer"] >= datetime.now() - pd.DateOffset(years=5)) + ] + + return land_registry + def match_with_land_registry(self): """ This function matches the land registry data to the existing matches :return: """ - # TODO: Refactor this + # TODO: Refactor this entire function if self.matched_addresses is None: raise ValueError("Run match() first!") - self.land_registry = pd.read_csv(self.land_registry_path) + logger.info("Reading land registry data") + self.land_registry = self.get_land_registry() for col in ["postcode", "street", "paon", "saon"]: self.land_registry[col] = self.land_registry[col].str.lower().str.strip() diff --git a/etl/ownership/projects/midlands_portfolio/app.py b/etl/ownership/projects/midlands_portfolio/app.py new file mode 100644 index 00000000..d370ba1e --- /dev/null +++ b/etl/ownership/projects/midlands_portfolio/app.py @@ -0,0 +1,64 @@ +from etl.ownership.Ownership import Ownership + +# Set up the project configuration +USER_IDS = [ + 2, # Khalim + 3, # Chenai + 5, # Anna + 30, # Patricia +] +PORTFOLIO_ID = None + +EPC_PATHS = [ + "local_data/all-domestic-certificates/domestic-E08000025-Birmingham/certificates.csv", + "local_data/all-domestic-certificates/domestic-E08000031-Wolverhampton/certificates.csv", + "local_data/all-domestic-certificates/domestic-E08000026-Coventry/certificates.csv", + "local_data/all-domestic-certificates/domestic-E06000016-Leicester/certificates.csv", + "local_data/all-domestic-certificates/domestic-E06000015-Derby/certificates.csv", + "local_data/all-domestic-certificates/domestic-E06000021-Stoke-on-Trent/certificates.csv", + "local_data/all-domestic-certificates/domestic-E06000018-Nottingham/certificates.csv", + "local_data/all-domestic-certificates/domestic-E07000154-Northampton/certificates.csv", + "local_data/all-domestic-certificates/domestic-E06000061-North-Northamptonshire/certificates.csv", + "local_data/all-domestic-certificates/domestic-E06000062-West-Northamptonshire/certificates.csv", + "local_data/all-domestic-certificates/domestic-E07000152-East-Northamptonshire/certificates.csv", + "local_data/all-domestic-certificates/domestic-E07000155-South-Northamptonshire/certificates.csv", + # + "local_data/all-domestic-certificates/domestic-E08000027-Dudley/certificates.csv", + "local_data/all-domestic-certificates/domestic-E08000029-Solihull/certificates.csv", + "local_data/all-domestic-certificates/domestic-E07000234-Bromsgrove/certificates.csv", + "local_data/all-domestic-certificates/domestic-E08000030-Walsall/certificates.csv", + "local_data/all-domestic-certificates/domestic-E08000028-Sandwell/certificates.csv", + "local_data/all-domestic-certificates/domestic-E06000019-Herefordshire-County-of/certificates.csv", + "local_data/all-domestic-certificates/domestic-E06000020-Telford-and-Wrekin/certificates.csv", + "local_data/all-domestic-certificates/domestic-E07000218-North-Warwickshire/certificates.csv", + "local_data/all-domestic-certificates/domestic-E07000222-Warwick/certificates.csv", + "local_data/all-domestic-certificates/domestic-E07000237-Worcester/certificates.csv", + # East midlands + "local_data/all-domestic-certificates/domestic-E07000035-Derbyshire-Dales/certificates.csv", + "local_data/all-domestic-certificates/domestic-E07000038-North-East-Derbyshire/certificates.csv", + "local_data/all-domestic-certificates/domestic-E07000039-South-Derbyshire/certificates.csv", + "local_data/all-domestic-certificates/domestic-E06000012-North-East-Lincolnshire/certificates.csv", + "local_data/all-domestic-certificates/domestic-E06000013-North-Lincolnshire/certificates.csv", + "local_data/all-domestic-certificates/domestic-E07000138-Lincoln/certificates.csv", + "local_data/all-domestic-certificates/domestic-E07000134-North-West-Leicestershire/certificates.csv", + "local_data/all-domestic-certificates/domestic-E06000017-Rutland/certificates.csv", +] + +DOMESTIC_OWNERSHIP_PATH = "/Users/khalimconn-kowlessar/Downloads/CCOD_FULL_2024_07.csv" +OVERSEAS_OWNERSHIP_PATH = "/Users/khalimconn-kowlessar/Downloads/OCOD_FULL_2024_07.csv" +LAND_REGISTRY_PATH = "/Users/khalimconn-kowlessar/Downloads/pp-complete.csv" + +PROJECT_NAME = "Midlands Portfolio" + + +def app(): + ownership_instance = Ownership( + epc_paths=EPC_PATHS, + domestic_ownership_path=DOMESTIC_OWNERSHIP_PATH, + overseas_ownership_path=OVERSEAS_OWNERSHIP_PATH, + land_registry_path=LAND_REGISTRY_PATH, + project_name=PROJECT_NAME + ) + ownership_instance.pipeline() + + # TODO: Create portfolio and payload