diff --git a/etl/ownership/Ownership.py b/etl/ownership/Ownership.py index 5b421e7b..b05ef22a 100644 --- a/etl/ownership/Ownership.py +++ b/etl/ownership/Ownership.py @@ -1038,7 +1038,7 @@ class Ownership: return asset_list - def create_final_outputs(self, portfolio_timestamp, exclusion_uprns=None): + def create_final_outputs(self, portfolio_timestamp, storage_date, exclusion_uprns=None): """ Given the completed outputs of the matching process, this function creates the final outputs, after matching valuation data, and creates a "working" directory, which is our current view of the sfr portfolio. This means @@ -1051,7 +1051,7 @@ class Ownership: exclusion_uprns = [] if exclusion_uprns is None else exclusion_uprns # Step 1: Read in the valuations data - valuatio_ns = read_excel_from_s3( + valuations = read_excel_from_s3( bucket_name=self.bucket, file_key=f"ownership/{self.project_name}/sfr property valuations.xlsx", header_row=0 @@ -1087,15 +1087,40 @@ class Ownership: # We make some final cuts based on UPRNs that at a later stage are found to be odd if portfolio_properties["UPRN"].isin(exclusion_uprns).sum(): + raise Exception("Implement me!") # Identify who the owners are for thes uprns - owners = portfolio_properties[portfolio_properties["UPRN"].isin(exclusion_uprns)].groupby( - "Company Registration No. (1)" - )["UPRN"].nunique().reset_index().rename( - columns={"UPRN": "number_of_properties_to_exclude"} - ) + # owners = portfolio_properties[portfolio_properties["UPRN"].isin(exclusion_uprns)].groupby( + # "Company Registration No. (1)" + # )["UPRN"].nunique().reset_index().rename( + # columns={"UPRN": "number_of_properties_to_exclude"} + # ) + # + # min_owners_threshold = portfolio_owners["total_number_of_properties"].min() + # + # portfolio_owners = portfolio_owners.merge( + # owners, how="left", on="Company Registration No. (1)", suffixes=("", "_excluded") + # ) - min_owners_threshold = portfolio_owners["total_number_of_properties"].min() + # Step 2: Merge in the valuations data + portfolio_properties = portfolio_properties.merge( + valuations, how="left", on="UPRN" + ) - portfolio_owners = portfolio_owners.merge( - owners, how="left", on="Company Registration No. (1)", suffixes=("", "_excluded") - ) + # Step 3: Store the final outputs + save_excel_to_s3( + df=portfolio_owners, + bucket_name=self.bucket, + file_key=f"ownership/{self.project_name}/current/{storage_date}/portfolio_owners.xlsx", + ) + + save_excel_to_s3( + df=portfolio_properties, + bucket_name=self.bucket, + file_key=f"ownership/{self.project_name}/current/{storage_date}/portfolio_properties.xlsx", + ) + + save_excel_to_s3( + df=portfolio_epc_data, + bucket_name=self.bucket, + file_key=f"ownership/{self.project_name}/current/{storage_date}/portfolio_epc_data.xlsx", + ) diff --git a/etl/ownership/config.py b/etl/ownership/config.py index 3f153817..ac92693a 100644 --- a/etl/ownership/config.py +++ b/etl/ownership/config.py @@ -3,10 +3,10 @@ OWNERS_WHO_CANT_SELL = [ # Al Rayan - they're the senior lender, not able to sell "4483430", # Ultrabarn - they're unwilling to sell and will sort any retrofits themselves - "2794851" + "2794851", # Mountview - Anna spoke with someone from Mounview - they acquire tenancies and sell them as soon as they become # vacant. They have no immediate opportunities but we may come back and remove this - "328090" + "328090", ] EXCLUDED_UPRNS = [ diff --git a/etl/ownership/projects/midlands_portfolio/app.py b/etl/ownership/projects/midlands_portfolio/app.py index 8a2abe48..19a52357 100644 --- a/etl/ownership/projects/midlands_portfolio/app.py +++ b/etl/ownership/projects/midlands_portfolio/app.py @@ -1,4 +1,5 @@ -import pandas as pd +import datetime + from sqlalchemy.orm import sessionmaker from backend.app.db.connection import db_engine from backend.app.db.models.portfolio import Portfolio, PortfolioUsers @@ -170,6 +171,13 @@ def app(): # missed.to_csv("missed_valuations.csv") # We now need a distinct step to prepare final outputs - portfolio_timestamp = "2024-08-20 18:53:08.326351" + portfolio_timestamp = "2024-08-20 19:28:18.260205" exclusion_uprns = EXCLUDED_UPRNS + + # Create a date in the yyyy-mm-dd format to store the data against + storage_date = datetime.datetime.now().strftime("%Y-%m-%d") + + ownership_instance.create_final_outputs( + portfolio_timestamp=portfolio_timestamp, storage_date=storage_date, exclusion_uprns=EXCLUDED_UPRNS + )