mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
debugging ownership class
This commit is contained in:
parent
9938dea190
commit
41c38e622d
2 changed files with 60 additions and 3 deletions
|
|
@ -4,7 +4,7 @@ from tqdm import tqdm
|
|||
import pandas as pd
|
||||
import Levenshtein
|
||||
import re
|
||||
from utils.s3 import save_excel_to_s3
|
||||
from utils.s3 import save_excel_to_s3, read_excel_from_s3
|
||||
from utils.logger import setup_logger
|
||||
from backend.SearchEpc import SearchEpc
|
||||
from etl.spatial.OpenUprnClient import OpenUprnClient
|
||||
|
|
@ -948,7 +948,7 @@ class Ownership:
|
|||
matched_addresses_final = self.matched_addresses[
|
||||
~self.matched_addresses["sold_recently"] &
|
||||
~self.matched_addresses["sale_lodged_recently"]
|
||||
]
|
||||
].copy()
|
||||
|
||||
logger.info("Performing conservation area and listed/herigage building filtering")
|
||||
|
||||
|
|
@ -973,7 +973,7 @@ class Ownership:
|
|||
|
||||
# Filter combined_matching_lookup accordingly
|
||||
combined_matching_lookup_final = self.combined_matching_lookup[
|
||||
self.combined_matching_lookup["UPRN"].isin(self.combined_matching_lookup["UPRN"])
|
||||
self.combined_matching_lookup["UPRN"].isin(matched_addresses_final["UPRN"])
|
||||
]
|
||||
|
||||
# Roll up portfolio
|
||||
|
|
@ -991,8 +991,16 @@ class Ownership:
|
|||
)
|
||||
]
|
||||
|
||||
# We perform some checks
|
||||
if self.portfolio_owners["total_number_of_properties"].sum() != self.portfolio_properties["UPRN"].nunique():
|
||||
raise ValueError("Portfolio owners and properties don't match")
|
||||
|
||||
self.portfolio_epc_data = self.epc_data[self.epc_data["UPRN"].isin(self.portfolio_properties["UPRN"])]
|
||||
|
||||
# Additional checks
|
||||
if self.portfolio_properties["UPRN"].nunique() != self.portfolio_epc_data["UPRN"].nunique():
|
||||
raise ValueError("Portfolio properties and epc data don't match")
|
||||
|
||||
logger.info("Storing final outpus")
|
||||
# Store data
|
||||
save_excel_to_s3(
|
||||
|
|
@ -1028,3 +1036,49 @@ class Ownership:
|
|||
)
|
||||
|
||||
return asset_list
|
||||
|
||||
def create_final_outputs(self, portfolio_timestamp):
|
||||
"""
|
||||
Given the completed outputs of the matching process, this function creates the final outputs, after matching
|
||||
valuation data, and creates a "working" directory, which is our current view of the sfr portfolio. This means
|
||||
that we can iterate on the portfolio without affecting the final outputs, and then once we're happy with the
|
||||
new version, we can commit those files to the "working" directory. This inforamtion shouldn't update very
|
||||
often and so we're ok to store this at a daily level
|
||||
:return:
|
||||
"""
|
||||
|
||||
# Step 1: Read in the valuations data
|
||||
valuations = read_excel_from_s3(
|
||||
bucket_name=self.bucket,
|
||||
file_key=f"ownership/{self.project_name}/sfr property valuations.xlsx",
|
||||
header_row=0
|
||||
)
|
||||
|
||||
# Load in the portfolio data
|
||||
# 1) owners
|
||||
portfolio_owners = read_excel_from_s3(
|
||||
bucket_name=self.bucket,
|
||||
file_key=f"ownership/{self.project_name}/{portfolio_timestamp}/portfolio_owners.xlsx",
|
||||
header_row=0
|
||||
)
|
||||
# 2) EPC
|
||||
portfolio_epc_data = read_excel_from_s3(
|
||||
bucket_name=self.bucket,
|
||||
file_key=f"ownership/{self.project_name}/{portfolio_timestamp}/portfolio_epc_data.xlsx",
|
||||
header_row=0
|
||||
)
|
||||
|
||||
# 3) properties
|
||||
portfolio_properties = read_excel_from_s3(
|
||||
bucket_name=self.bucket,
|
||||
file_key=f"ownership/{self.project_name}/{portfolio_timestamp}/portfolio_properties.xlsx",
|
||||
header_row=0
|
||||
)
|
||||
|
||||
portfolio_epc_data["UPRN"].duplicated().sum()
|
||||
portfolio_properties["UPRN"].duplicated().sum()
|
||||
portfolio_properties[~portfolio_properties["UPRN"].astype(str).isin(portfolio_epc_data["UPRN"].astype(str))]
|
||||
|
||||
portfolio_properties[~portfolio_properties["UPRN"].astype(str).isin(portfolio_epc_data["UPRN"].astype(str))]
|
||||
|
||||
portfolio_epc_data.shape
|
||||
|
|
|
|||
|
|
@ -162,3 +162,6 @@ def app():
|
|||
"budget": None,
|
||||
}
|
||||
print(body)
|
||||
|
||||
# We now need a distinct step to prepare final outputs
|
||||
portfolio_timestamp = "2024-08-20 15:51:10.292075"
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue