mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
added step to remove owners with just 1 property
This commit is contained in:
parent
a153de51c3
commit
520aa430b7
3 changed files with 36 additions and 13 deletions
|
|
@ -933,6 +933,7 @@ class Ownership:
|
|||
)
|
||||
|
||||
pivot_counts = pivot_counts.sort_values("total_number_of_properties", ascending=False)
|
||||
pivot_counts = pivot_counts[pivot_counts["total_number_of_properties"] > 1]
|
||||
|
||||
pivot_counts["approx_value"] = self.average_property_value * pivot_counts["total_number_of_properties"]
|
||||
pivot_counts["cumulative_value"] = pivot_counts["approx_value"].cumsum()
|
||||
|
|
@ -1037,7 +1038,7 @@ class Ownership:
|
|||
|
||||
return asset_list
|
||||
|
||||
def create_final_outputs(self, portfolio_timestamp):
|
||||
def create_final_outputs(self, portfolio_timestamp, exclusion_uprns=None):
|
||||
"""
|
||||
Given the completed outputs of the matching process, this function creates the final outputs, after matching
|
||||
valuation data, and creates a "working" directory, which is our current view of the sfr portfolio. This means
|
||||
|
|
@ -1047,8 +1048,10 @@ class Ownership:
|
|||
:return:
|
||||
"""
|
||||
|
||||
exclusion_uprns = [] if exclusion_uprns is None else exclusion_uprns
|
||||
|
||||
# Step 1: Read in the valuations data
|
||||
valuations = read_excel_from_s3(
|
||||
valuatio_ns = read_excel_from_s3(
|
||||
bucket_name=self.bucket,
|
||||
file_key=f"ownership/{self.project_name}/sfr property valuations.xlsx",
|
||||
header_row=0
|
||||
|
|
@ -1075,10 +1078,24 @@ class Ownership:
|
|||
header_row=0
|
||||
)
|
||||
|
||||
portfolio_epc_data["UPRN"].duplicated().sum()
|
||||
portfolio_properties["UPRN"].duplicated().sum()
|
||||
portfolio_properties[~portfolio_properties["UPRN"].astype(str).isin(portfolio_epc_data["UPRN"].astype(str))]
|
||||
# Check they're the right size
|
||||
if portfolio_owners["total_number_of_properties"].sum() != portfolio_properties["UPRN"].nunique():
|
||||
raise ValueError("Portfolio owners and properties don't match")
|
||||
|
||||
portfolio_properties[~portfolio_properties["UPRN"].astype(str).isin(portfolio_epc_data["UPRN"].astype(str))]
|
||||
if portfolio_properties["UPRN"].nunique() != portfolio_epc_data["UPRN"].nunique():
|
||||
raise ValueError("Portfolio properties and epc data don't match")
|
||||
|
||||
portfolio_epc_data.shape
|
||||
# We make some final cuts based on UPRNs that at a later stage are found to be odd
|
||||
if portfolio_properties["UPRN"].isin(exclusion_uprns).sum():
|
||||
# Identify who the owners are for thes uprns
|
||||
owners = portfolio_properties[portfolio_properties["UPRN"].isin(exclusion_uprns)].groupby(
|
||||
"Company Registration No. (1)"
|
||||
)["UPRN"].nunique().reset_index().rename(
|
||||
columns={"UPRN": "number_of_properties_to_exclude"}
|
||||
)
|
||||
|
||||
min_owners_threshold = portfolio_owners["total_number_of_properties"].min()
|
||||
|
||||
portfolio_owners = portfolio_owners.merge(
|
||||
owners, how="left", on="Company Registration No. (1)", suffixes=("", "_excluded")
|
||||
)
|
||||
|
|
|
|||
|
|
@ -28,4 +28,8 @@ EXCLUDED_UPRNS = [
|
|||
100031592801,
|
||||
# Can't find reliable information to this property on zoopla/rightmove
|
||||
100031579087,
|
||||
# Can't find reliable information to this property on zoopla/rightmove
|
||||
200000877273,
|
||||
# Can't find reliable information to this property on zoopla/rightmove - seems like a post office!
|
||||
100071391639
|
||||
]
|
||||
|
|
|
|||
|
|
@ -163,11 +163,13 @@ def app():
|
|||
}
|
||||
print(body)
|
||||
|
||||
# We read in the current valuation data and identify if there are any uprns that need to be added
|
||||
previous_valuations = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/sfr/sfr property valuations.xlsx")
|
||||
missed = asset_list[~asset_list["uprn"].astype(str).isin(previous_valuations["uprn"].astype(str))]
|
||||
missed.to_csv("missed_valuations.csv")
|
||||
# # We read in the current valuation data and identify if there are any uprns that need to be added
|
||||
# previous_valuations = pd.read_excel(
|
||||
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/sfr/sfr property valuations.xlsx")
|
||||
# missed = asset_list[~asset_list["uprn"].astype(str).isin(previous_valuations["uprn"].astype(str))]
|
||||
# missed.to_csv("missed_valuations.csv")
|
||||
|
||||
# We now need a distinct step to prepare final outputs
|
||||
portfolio_timestamp = "2024-08-20 15:51:10.292075"
|
||||
portfolio_timestamp = "2024-08-20 18:53:08.326351"
|
||||
|
||||
exclusion_uprns = EXCLUDED_UPRNS
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue