added land registry store

This commit is contained in:
Khalim Conn-Kowlessar 2024-08-19 11:34:47 +01:00
parent 287960361d
commit 56889fa4b0

View file

@ -81,6 +81,9 @@ class Ownership:
# Data storage paths
self.epc_data_filepath = f"ownership/{project_name}/{self.run_timestamp}/epc_data.xlsx"
self.filtered_land_registry_filepath = (
f"ownership/{project_name}/{self.run_timestamp}/filtered_land_registry.xlsx"
)
# Data
self.epc_data = None
@ -567,12 +570,19 @@ class Ownership:
logger.info("Reading land registry data")
self.land_registry = self.get_land_registry()
# Store this fitereed version in s3
save_excel_to_s3(
df=self.land_registry,
bucket_name="epc_data",
file_key=self.filtered_land_registry_filepath,
)
for col in ["postcode", "street", "paon", "saon"]:
self.land_registry[col] = self.land_registry[col].str.lower().str.strip()
self.land_registry["date_of_transfer"] = pd.to_datetime(self.land_registry["date_of_transfer"])
logger.info("Performing land registry matching")
land_registry_matches = []
for _, match in tqdm(self.matched_addresses.iterrows(), total=len(self.matched_addresses)):
# Filter land registry on the postcode