Model/etl/ownership/projects/midlands_portfolio/app.py
2024-08-20 20:25:53 +01:00

181 lines
8.2 KiB
Python

import datetime
from sqlalchemy.orm import sessionmaker
from backend.app.db.connection import db_engine
from backend.app.db.models.portfolio import Portfolio, PortfolioUsers
from etl.ownership.Ownership import Ownership
from etl.ownership.config import OWNERS_WHO_CANT_SELL as EXCLUDED_OWNERS, EXCLUDED_UPRNS
from utils.s3 import save_csv_to_s3
# Set up the project configuration
USER_IDS = [
2, # Khalim
3, # Chenai
5, # Anna
30, # Patricia
]
EPC_PATHS = [
"local_data/all-domestic-certificates/domestic-E08000025-Birmingham/certificates.csv",
"local_data/all-domestic-certificates/domestic-E08000031-Wolverhampton/certificates.csv",
"local_data/all-domestic-certificates/domestic-E08000026-Coventry/certificates.csv",
"local_data/all-domestic-certificates/domestic-E06000016-Leicester/certificates.csv",
"local_data/all-domestic-certificates/domestic-E06000015-Derby/certificates.csv",
"local_data/all-domestic-certificates/domestic-E06000021-Stoke-on-Trent/certificates.csv",
"local_data/all-domestic-certificates/domestic-E06000018-Nottingham/certificates.csv",
"local_data/all-domestic-certificates/domestic-E07000154-Northampton/certificates.csv",
"local_data/all-domestic-certificates/domestic-E06000061-North-Northamptonshire/certificates.csv",
"local_data/all-domestic-certificates/domestic-E06000062-West-Northamptonshire/certificates.csv",
"local_data/all-domestic-certificates/domestic-E07000152-East-Northamptonshire/certificates.csv",
"local_data/all-domestic-certificates/domestic-E07000155-South-Northamptonshire/certificates.csv",
#
"local_data/all-domestic-certificates/domestic-E08000027-Dudley/certificates.csv",
"local_data/all-domestic-certificates/domestic-E08000029-Solihull/certificates.csv",
"local_data/all-domestic-certificates/domestic-E07000234-Bromsgrove/certificates.csv",
"local_data/all-domestic-certificates/domestic-E08000030-Walsall/certificates.csv",
"local_data/all-domestic-certificates/domestic-E08000028-Sandwell/certificates.csv",
"local_data/all-domestic-certificates/domestic-E06000019-Herefordshire-County-of/certificates.csv",
"local_data/all-domestic-certificates/domestic-E06000020-Telford-and-Wrekin/certificates.csv",
"local_data/all-domestic-certificates/domestic-E07000218-North-Warwickshire/certificates.csv",
"local_data/all-domestic-certificates/domestic-E07000222-Warwick/certificates.csv",
"local_data/all-domestic-certificates/domestic-E07000237-Worcester/certificates.csv",
# East midlands
"local_data/all-domestic-certificates/domestic-E07000035-Derbyshire-Dales/certificates.csv",
"local_data/all-domestic-certificates/domestic-E07000038-North-East-Derbyshire/certificates.csv",
"local_data/all-domestic-certificates/domestic-E07000039-South-Derbyshire/certificates.csv",
"local_data/all-domestic-certificates/domestic-E06000012-North-East-Lincolnshire/certificates.csv",
"local_data/all-domestic-certificates/domestic-E06000013-North-Lincolnshire/certificates.csv",
"local_data/all-domestic-certificates/domestic-E07000138-Lincoln/certificates.csv",
"local_data/all-domestic-certificates/domestic-E07000134-North-West-Leicestershire/certificates.csv",
"local_data/all-domestic-certificates/domestic-E06000017-Rutland/certificates.csv",
]
DOMESTIC_OWNERSHIP_PATH = "/Users/khalimconn-kowlessar/Downloads/CCOD_FULL_2024_07.csv"
OVERSEAS_OWNERSHIP_PATH = "/Users/khalimconn-kowlessar/Downloads/OCOD_FULL_2024_07.csv"
LAND_REGISTRY_PATH = "/Users/khalimconn-kowlessar/Downloads/pp-complete.csv"
PROJECT_NAME = "Midlands Portfolio"
DATA_BUCKET = "retrofit-data-dev"
# We use this as a rough figure, which helps us shape the portfolio
PROPERTY_VALUE_ESTIMATE = 200_000
# We want a 50m portfolio, but we create a bigger portfolio that needed, since properties will be filtered out
PORTFOLIO_VALUE = 75_000_000
def create_sfr_portfolio(project_name, user_ids, status, goal):
session = sessionmaker(bind=db_engine)()
try:
session.begin()
# Check for an existing portfolio by name
portfolio = session.query(Portfolio).filter_by(name=project_name).one_or_none()
if portfolio:
# Fetch the associated users
existing_user_ids = {
pu.user_id for pu in session.query(PortfolioUsers.user_id).filter_by(portfolioId=portfolio.id)
}
# Check if the specified user_ids match any existing associations
if existing_user_ids.intersection(set(user_ids)):
print("Portfolio already exists under this name, for specified users.")
else:
print("Portfolio already exists under this name, for different users.")
session.rollback() # No changes to be committed
return None # Optional: You could also update the user associations here if needed
return portfolio # Return the existing portfolio data
# If portfolio does not exist, create a new one with the provided status and goal
new_portfolio = Portfolio(name=project_name, status=status, goal=goal)
session.add(new_portfolio)
session.flush() # Ensures that 'id' is available before committing if needed
# Create new user associations in PortfolioUsers
for user_id in user_ids:
new_association = PortfolioUsers(user_id=user_id, portfolioId=new_portfolio.id) # corrected attribute name
session.add(new_association)
session.commit()
print(f"New portfolio created with ID: {new_portfolio.id}")
return new_portfolio
except Exception as e:
session.rollback() # Ensure no partial changes are committed
print(f"An error occurred: {e}")
raise
finally:
session.close()
def app():
epc_column_filters = {
"CURRENT_ENERGY_RATING": ["F", "G"]
}
ownership_instance = Ownership(
epc_paths=EPC_PATHS,
domestic_ownership_path=DOMESTIC_OWNERSHIP_PATH,
overseas_ownership_path=OVERSEAS_OWNERSHIP_PATH,
land_registry_path=LAND_REGISTRY_PATH,
project_name=PROJECT_NAME,
bucket=DATA_BUCKET,
average_property_value=PROPERTY_VALUE_ESTIMATE,
portfolio_value=PORTFOLIO_VALUE,
excluded_owners=EXCLUDED_OWNERS,
excluded_uprns=EXCLUDED_UPRNS
)
ownership_instance.pipeline(column_filters=epc_column_filters)
# Create the project, if a portfolio doesn't exist for the project name
# Create the asset list and the body of the portfolio
asset_list = ownership_instance.get_asset_list()
# Create the portfolio
# TODO: Wasn't working
# create_sfr_portfolio(project_name=PROJECT_NAME, user_ids=USER_IDS, status="scoping", goal="Increasing EPC")
portfolio_id = 99
user_id = 8
filename = f"{user_id}/{portfolio_id}/asset_list.csv"
save_csv_to_s3(
dataframe=asset_list,
bucket_name="retrofit-plan-inputs-dev",
file_name=filename
)
body = {
"portfolio_id": str(portfolio_id),
"housing_type": "Private",
"goal": "Increasing EPC",
"goal_value": "C",
"trigger_file_path": filename,
"already_installed_file_path": "",
"patches_file_path": "",
"non_invasive_recommendations_file_path": "",
"scenario_name": "Hit EPC C",
"multi_plan": True,
"exclusions": ["fireplace", "floor_insulation"],
"budget": None,
}
print(body)
# # We read in the current valuation data and identify if there are any uprns that need to be added
# previous_valuations = pd.read_excel(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/sfr/sfr property valuations.xlsx")
# missed = asset_list[~asset_list["uprn"].astype(str).isin(previous_valuations["uprn"].astype(str))]
# missed.to_csv("missed_valuations.csv")
# We now need a distinct step to prepare final outputs
portfolio_timestamp = "2024-08-20 19:51:33.884145"
# Create a date in the yyyy-mm-dd format to store the data against
storage_date = datetime.datetime.now().strftime("%Y-%m-%d")
ownership_instance.create_final_outputs(
portfolio_timestamp=portfolio_timestamp, storage_date=storage_date, exclusion_uprns=EXCLUDED_UPRNS
)