added land registry get and filter

This commit is contained in:
Khalim Conn-Kowlessar 2024-08-19 11:32:56 +01:00
parent 6f053a20d1
commit 287960361d
2 changed files with 106 additions and 2 deletions

View file

@ -28,6 +28,26 @@ class Ownership:
# flagged as potentially in the process of being sold
LODGED_RECENTLY_MONTHS = 12
# These are the columns in the land registry data
LAND_REGISTRY_COLUMNS = [
"transaction_id",
"price",
"date_of_transfer",
"postcode",
"property_type",
"old_new",
"duration",
"paon",
"saon",
"street",
"locality",
"town_city",
"district",
"county",
"ppd_category_type",
"record_status",
]
def __init__(
self,
epc_paths: List[str],
@ -517,16 +537,36 @@ class Ownership:
.str.replace(",", "")
)
def get_land_registry(self):
"""
This function reads in the land registry data and filters it on the postcodes found in the EPC data
"""
land_registry = pd.read_csv(self.land_registry_path, header=None)
land_registry.columns = self.LAND_REGISTRY_COLUMNS
land_registry = land_registry[
land_registry["postcode"].str.lower().isin(self.epc_data["POSTCODE"].str.lower().unique())
]
land_registry["date_of_transfer"] = pd.to_datetime(
land_registry["date_of_transfer"], format="%Y-%m-%d", errors="coerce"
)
# Take data from the last 5 years
land_registry = land_registry[
(land_registry["date_of_transfer"] >= datetime.now() - pd.DateOffset(years=5))
]
return land_registry
def match_with_land_registry(self):
"""
This function matches the land registry data to the existing matches
:return:
"""
# TODO: Refactor this
# TODO: Refactor this entire function
if self.matched_addresses is None:
raise ValueError("Run match() first!")
self.land_registry = pd.read_csv(self.land_registry_path)
logger.info("Reading land registry data")
self.land_registry = self.get_land_registry()
for col in ["postcode", "street", "paon", "saon"]:
self.land_registry[col] = self.land_registry[col].str.lower().str.strip()

View file

@ -0,0 +1,64 @@
from etl.ownership.Ownership import Ownership
# Set up the project configuration
USER_IDS = [
2, # Khalim
3, # Chenai
5, # Anna
30, # Patricia
]
PORTFOLIO_ID = None
EPC_PATHS = [
"local_data/all-domestic-certificates/domestic-E08000025-Birmingham/certificates.csv",
"local_data/all-domestic-certificates/domestic-E08000031-Wolverhampton/certificates.csv",
"local_data/all-domestic-certificates/domestic-E08000026-Coventry/certificates.csv",
"local_data/all-domestic-certificates/domestic-E06000016-Leicester/certificates.csv",
"local_data/all-domestic-certificates/domestic-E06000015-Derby/certificates.csv",
"local_data/all-domestic-certificates/domestic-E06000021-Stoke-on-Trent/certificates.csv",
"local_data/all-domestic-certificates/domestic-E06000018-Nottingham/certificates.csv",
"local_data/all-domestic-certificates/domestic-E07000154-Northampton/certificates.csv",
"local_data/all-domestic-certificates/domestic-E06000061-North-Northamptonshire/certificates.csv",
"local_data/all-domestic-certificates/domestic-E06000062-West-Northamptonshire/certificates.csv",
"local_data/all-domestic-certificates/domestic-E07000152-East-Northamptonshire/certificates.csv",
"local_data/all-domestic-certificates/domestic-E07000155-South-Northamptonshire/certificates.csv",
#
"local_data/all-domestic-certificates/domestic-E08000027-Dudley/certificates.csv",
"local_data/all-domestic-certificates/domestic-E08000029-Solihull/certificates.csv",
"local_data/all-domestic-certificates/domestic-E07000234-Bromsgrove/certificates.csv",
"local_data/all-domestic-certificates/domestic-E08000030-Walsall/certificates.csv",
"local_data/all-domestic-certificates/domestic-E08000028-Sandwell/certificates.csv",
"local_data/all-domestic-certificates/domestic-E06000019-Herefordshire-County-of/certificates.csv",
"local_data/all-domestic-certificates/domestic-E06000020-Telford-and-Wrekin/certificates.csv",
"local_data/all-domestic-certificates/domestic-E07000218-North-Warwickshire/certificates.csv",
"local_data/all-domestic-certificates/domestic-E07000222-Warwick/certificates.csv",
"local_data/all-domestic-certificates/domestic-E07000237-Worcester/certificates.csv",
# East midlands
"local_data/all-domestic-certificates/domestic-E07000035-Derbyshire-Dales/certificates.csv",
"local_data/all-domestic-certificates/domestic-E07000038-North-East-Derbyshire/certificates.csv",
"local_data/all-domestic-certificates/domestic-E07000039-South-Derbyshire/certificates.csv",
"local_data/all-domestic-certificates/domestic-E06000012-North-East-Lincolnshire/certificates.csv",
"local_data/all-domestic-certificates/domestic-E06000013-North-Lincolnshire/certificates.csv",
"local_data/all-domestic-certificates/domestic-E07000138-Lincoln/certificates.csv",
"local_data/all-domestic-certificates/domestic-E07000134-North-West-Leicestershire/certificates.csv",
"local_data/all-domestic-certificates/domestic-E06000017-Rutland/certificates.csv",
]
DOMESTIC_OWNERSHIP_PATH = "/Users/khalimconn-kowlessar/Downloads/CCOD_FULL_2024_07.csv"
OVERSEAS_OWNERSHIP_PATH = "/Users/khalimconn-kowlessar/Downloads/OCOD_FULL_2024_07.csv"
LAND_REGISTRY_PATH = "/Users/khalimconn-kowlessar/Downloads/pp-complete.csv"
PROJECT_NAME = "Midlands Portfolio"
def app():
ownership_instance = Ownership(
epc_paths=EPC_PATHS,
domestic_ownership_path=DOMESTIC_OWNERSHIP_PATH,
overseas_ownership_path=OVERSEAS_OWNERSHIP_PATH,
land_registry_path=LAND_REGISTRY_PATH,
project_name=PROJECT_NAME
)
ownership_instance.pipeline()
# TODO: Create portfolio and payload