Model/etl/customers/urban_splash/asset_list.py
2024-02-22 10:58:19 +00:00

195 lines
7.2 KiB
Python

import os
import pandas as pd
from tqdm import tqdm
from dotenv import load_dotenv
from utils.s3 import read_excel_from_s3
from backend.SearchEpc import SearchEpc
from epc_api.client import EpcClient
from utils.s3 import save_csv_to_s3
# Read in the .env file in backend
load_dotenv(dotenv_path="backend/.env")
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
USER_ID = 8
PORTFOLIO_ID = 66
SECOND_SCENARIO_PORTFOLIO_ID = 65
# We also create a second portfolio for a subset of properties that do not meet the install requirements
# We drop these uprns from the first plan
second_portfolio_uprns = [
10070056840, 10070056846, 10070056847, 10070056843, 10070056848, 10070056844, 10070056849,
10070056829, 10070056920, 10023345463
]
def app():
"""
This application will read in the Urban Splash data, in the dev AWS account, and pre-process it. There are a
few issues with the file, including incorrect postcodes.
The customer is interested in the following:
- Getting properties to an EPC C
- Doing do within a budget of £5,000
:return:
"""
potential_postcodes = ["BD9 5BQ", "BD9 5BR", "BD9 5BN"]
raw_asset_list = read_excel_from_s3(
bucket_name="retrofit-datalake-dev",
file_key="customers/urban_splash/raw_asset_list/USRF - Velvet Mill EPC.xlsx",
header_row=2
)
# We have a series of apartment numbers that are "Apartment 001", "Apartment 002", etc. We need to convert these
# to "Apartment 1", "Apartment 2", etc.
raw_asset_list["address1"] = raw_asset_list["Unit Number"].str.replace(
"Apartment 00", "Apartment ", regex=True
)
raw_asset_list["address1"] = raw_asset_list["address1"].str.replace(
"Apartment 0", "Apartment ", regex=True
)
# For each entry in the asset list, we make an api call to the EPC database to get the EPC data. We'll retrieve the
# uprn for the property, as well as a nice address and postcode that we can use. We'll also try and deduce the
# likely wall construction, since many of the homes are new builds, based on their newest EPC
epc_data = []
processed_asset_list = []
for _, row in tqdm(raw_asset_list.iterrows(), total=len(raw_asset_list)):
newest_epc = None
idx = 0
while newest_epc is None:
postcode = potential_postcodes[idx]
searcher = SearchEpc(
address1=row.address1, postcode=postcode, auth_token=EPC_AUTH_TOKEN, os_api_key=""
)
searcher.find_property(skip_os=True)
if searcher.newest_epc is None:
if idx == len(potential_postcodes) - 1:
break
idx += 1
else:
newest_epc = searcher.newest_epc
if newest_epc is None:
raise Exception("FX ME")
if row["Beds"] == "Studio":
number_heated_rooms = 2
number_habitable_rooms = 2
else:
# Assume one room for communal space, one room for bathroom
number_heated_rooms = row["Beds"] + 2
number_habitable_rooms = row["Beds"] + 2
to_append = {
**row.to_dict(),
"uprn": newest_epc["uprn"],
"address": newest_epc["address1"],
"postcode": newest_epc["postcode"],
# "walls-description": newest_epc["walls-description"],
# "roof-description": newest_epc["roof-description"],
# "floor-description": newest_epc["floor-description"],
# "total-floor-area": newest_epc["total-floor-area"],
"full-address": newest_epc["address"],
"number-heated-rooms": number_heated_rooms,
"number-habitable-rooms": number_habitable_rooms,
}
processed_asset_list.append(to_append)
epc_data.append(newest_epc)
processed_asset_list_df = pd.DataFrame(processed_asset_list)
epc_data_df = pd.DataFrame(epc_data)
# We store this data
# Store the data in s3
filename = f"{USER_ID}/{PORTFOLIO_ID}/test_inputs.csv"
save_csv_to_s3(
dataframe=processed_asset_list_df[
~processed_asset_list_df["uprn"].astype(int).isin(second_portfolio_uprns)
],
bucket_name="retrofit-plan-inputs-dev",
file_name=filename
)
body = {
"portfolio_id": str(PORTFOLIO_ID),
"housing_type": "Private",
"goal": "Increase EPC",
"goal_value": "C",
"trigger_file_path": filename,
"budget": None,
}
print(body)
subset = processed_asset_list_df[
processed_asset_list_df["uprn"].astype(int).isin(second_portfolio_uprns)
]
filename2 = f"{USER_ID}/{SECOND_SCENARIO_PORTFOLIO_ID}/test_inputs.csv"
save_csv_to_s3(
dataframe=subset,
bucket_name="retrofit-plan-inputs-dev",
file_name=filename2
)
body = {
"portfolio_id": str(SECOND_SCENARIO_PORTFOLIO_ID),
"housing_type": "Private",
"goal": "Increase EPC",
"goal_value": "C",
"trigger_file_path": filename,
"budget": None,
}
print(body)
# Some basic analysis on the heating, heating controls and hot water systems
# All of the heating systems are rated very poor, poor or average. When it's average, they are all also
# "Room heaters, electric", but the house has "Programmer and appliance thermostats" for the heating controls.
# which is more efficient
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
# Heating
print(epc_data_df[["mainheat-description", "mainheatcont-description", "mainheat-energy-eff"]].drop_duplicates())
# mainheat-description mainheatcont-description mainheat-energy-eff
# 0 Room heaters, electric Programmer and room thermostat Very Poor
# 12 Room heaters, electric Programmer and appliance thermostats Average
# 20 Electric storage heaters, radiators Celect-type controls Poor
# Hot water
print(epc_data_df[["hotwater-description", "hot-water-energy-eff"]].drop_duplicates())
# hotwater-description hot-water-energy-eff
# 0 Electric immersion, standard tariff Very Poor
# 12 Electric immersion, off-peak Average
# We now retrieve EPCS for all of the properties that are in these postcodes very obviously for the velvet mill
# We'll use this information to get a sense of the likely wall/roof/floor construction for the properties
# client = EpcClient(auth_token=EPC_AUTH_TOKEN)
#
# neighbouring_epcs = []
# for pc in potential_postcodes:
# response = client.domestic.search(params={"postcode": pc}, size=1000)
# data = response["rows"]
#
# # keep just rows that are clearly for the velvet mill
# data = [x for x in data if "velvet" in x["address1"].lower()]
#
# neighbouring_epcs.extend(data)
#
# neighbouring_epcs_df = pd.DataFrame(neighbouring_epcs)
# neighbouring_epcs_df["walls-description"].value_counts()
# neighbouring_epcs_df["roof-description"].value_counts()
# neighbouring_epcs_df["floor-description"].value_counts()