mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
195 lines
7.2 KiB
Python
195 lines
7.2 KiB
Python
import os
|
|
|
|
import pandas as pd
|
|
from tqdm import tqdm
|
|
|
|
from dotenv import load_dotenv
|
|
from utils.s3 import read_excel_from_s3
|
|
from backend.SearchEpc import SearchEpc
|
|
from epc_api.client import EpcClient
|
|
from utils.s3 import save_csv_to_s3
|
|
|
|
# Read in the .env file in backend
|
|
load_dotenv(dotenv_path="backend/.env")
|
|
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
|
|
|
|
USER_ID = 8
|
|
PORTFOLIO_ID = 66
|
|
SECOND_SCENARIO_PORTFOLIO_ID = 65
|
|
|
|
# We also create a second portfolio for a subset of properties that do not meet the install requirements
|
|
# We drop these uprns from the first plan
|
|
second_portfolio_uprns = [
|
|
10070056840, 10070056846, 10070056847, 10070056843, 10070056848, 10070056844, 10070056849,
|
|
10070056829, 10070056920, 10023345463
|
|
]
|
|
|
|
|
|
def app():
|
|
"""
|
|
This application will read in the Urban Splash data, in the dev AWS account, and pre-process it. There are a
|
|
few issues with the file, including incorrect postcodes.
|
|
|
|
The customer is interested in the following:
|
|
- Getting properties to an EPC C
|
|
- Doing do within a budget of £5,000
|
|
:return:
|
|
"""
|
|
|
|
potential_postcodes = ["BD9 5BQ", "BD9 5BR", "BD9 5BN"]
|
|
|
|
raw_asset_list = read_excel_from_s3(
|
|
bucket_name="retrofit-datalake-dev",
|
|
file_key="customers/urban_splash/raw_asset_list/USRF - Velvet Mill EPC.xlsx",
|
|
header_row=2
|
|
)
|
|
|
|
# We have a series of apartment numbers that are "Apartment 001", "Apartment 002", etc. We need to convert these
|
|
# to "Apartment 1", "Apartment 2", etc.
|
|
raw_asset_list["address1"] = raw_asset_list["Unit Number"].str.replace(
|
|
"Apartment 00", "Apartment ", regex=True
|
|
)
|
|
raw_asset_list["address1"] = raw_asset_list["address1"].str.replace(
|
|
"Apartment 0", "Apartment ", regex=True
|
|
)
|
|
|
|
# For each entry in the asset list, we make an api call to the EPC database to get the EPC data. We'll retrieve the
|
|
# uprn for the property, as well as a nice address and postcode that we can use. We'll also try and deduce the
|
|
# likely wall construction, since many of the homes are new builds, based on their newest EPC
|
|
|
|
epc_data = []
|
|
processed_asset_list = []
|
|
for _, row in tqdm(raw_asset_list.iterrows(), total=len(raw_asset_list)):
|
|
|
|
newest_epc = None
|
|
idx = 0
|
|
|
|
while newest_epc is None:
|
|
postcode = potential_postcodes[idx]
|
|
searcher = SearchEpc(
|
|
address1=row.address1, postcode=postcode, auth_token=EPC_AUTH_TOKEN, os_api_key=""
|
|
)
|
|
searcher.find_property(skip_os=True)
|
|
|
|
if searcher.newest_epc is None:
|
|
if idx == len(potential_postcodes) - 1:
|
|
break
|
|
idx += 1
|
|
else:
|
|
newest_epc = searcher.newest_epc
|
|
|
|
if newest_epc is None:
|
|
raise Exception("FX ME")
|
|
|
|
if row["Beds"] == "Studio":
|
|
number_heated_rooms = 2
|
|
number_habitable_rooms = 2
|
|
else:
|
|
# Assume one room for communal space, one room for bathroom
|
|
number_heated_rooms = row["Beds"] + 2
|
|
number_habitable_rooms = row["Beds"] + 2
|
|
|
|
to_append = {
|
|
**row.to_dict(),
|
|
"uprn": newest_epc["uprn"],
|
|
"address": newest_epc["address1"],
|
|
"postcode": newest_epc["postcode"],
|
|
# "walls-description": newest_epc["walls-description"],
|
|
# "roof-description": newest_epc["roof-description"],
|
|
# "floor-description": newest_epc["floor-description"],
|
|
# "total-floor-area": newest_epc["total-floor-area"],
|
|
"full-address": newest_epc["address"],
|
|
"number-heated-rooms": number_heated_rooms,
|
|
"number-habitable-rooms": number_habitable_rooms,
|
|
}
|
|
|
|
processed_asset_list.append(to_append)
|
|
epc_data.append(newest_epc)
|
|
|
|
processed_asset_list_df = pd.DataFrame(processed_asset_list)
|
|
|
|
epc_data_df = pd.DataFrame(epc_data)
|
|
|
|
# We store this data
|
|
# Store the data in s3
|
|
filename = f"{USER_ID}/{PORTFOLIO_ID}/test_inputs.csv"
|
|
save_csv_to_s3(
|
|
dataframe=processed_asset_list_df[
|
|
~processed_asset_list_df["uprn"].astype(int).isin(second_portfolio_uprns)
|
|
],
|
|
bucket_name="retrofit-plan-inputs-dev",
|
|
file_name=filename
|
|
)
|
|
|
|
body = {
|
|
"portfolio_id": str(PORTFOLIO_ID),
|
|
"housing_type": "Private",
|
|
"goal": "Increase EPC",
|
|
"goal_value": "C",
|
|
"trigger_file_path": filename,
|
|
"budget": None,
|
|
}
|
|
print(body)
|
|
|
|
subset = processed_asset_list_df[
|
|
processed_asset_list_df["uprn"].astype(int).isin(second_portfolio_uprns)
|
|
]
|
|
|
|
filename2 = f"{USER_ID}/{SECOND_SCENARIO_PORTFOLIO_ID}/test_inputs.csv"
|
|
save_csv_to_s3(
|
|
dataframe=subset,
|
|
bucket_name="retrofit-plan-inputs-dev",
|
|
file_name=filename2
|
|
)
|
|
|
|
body = {
|
|
"portfolio_id": str(SECOND_SCENARIO_PORTFOLIO_ID),
|
|
"housing_type": "Private",
|
|
"goal": "Increase EPC",
|
|
"goal_value": "C",
|
|
"trigger_file_path": filename,
|
|
"budget": None,
|
|
}
|
|
print(body)
|
|
|
|
# Some basic analysis on the heating, heating controls and hot water systems
|
|
|
|
# All of the heating systems are rated very poor, poor or average. When it's average, they are all also
|
|
# "Room heaters, electric", but the house has "Programmer and appliance thermostats" for the heating controls.
|
|
# which is more efficient
|
|
pd.set_option('display.max_rows', 500)
|
|
pd.set_option('display.max_columns', 500)
|
|
pd.set_option('display.width', 1000)
|
|
|
|
# Heating
|
|
print(epc_data_df[["mainheat-description", "mainheatcont-description", "mainheat-energy-eff"]].drop_duplicates())
|
|
# mainheat-description mainheatcont-description mainheat-energy-eff
|
|
# 0 Room heaters, electric Programmer and room thermostat Very Poor
|
|
# 12 Room heaters, electric Programmer and appliance thermostats Average
|
|
# 20 Electric storage heaters, radiators Celect-type controls Poor
|
|
|
|
# Hot water
|
|
print(epc_data_df[["hotwater-description", "hot-water-energy-eff"]].drop_duplicates())
|
|
# hotwater-description hot-water-energy-eff
|
|
# 0 Electric immersion, standard tariff Very Poor
|
|
# 12 Electric immersion, off-peak Average
|
|
|
|
# We now retrieve EPCS for all of the properties that are in these postcodes very obviously for the velvet mill
|
|
# We'll use this information to get a sense of the likely wall/roof/floor construction for the properties
|
|
|
|
# client = EpcClient(auth_token=EPC_AUTH_TOKEN)
|
|
#
|
|
# neighbouring_epcs = []
|
|
# for pc in potential_postcodes:
|
|
# response = client.domestic.search(params={"postcode": pc}, size=1000)
|
|
# data = response["rows"]
|
|
#
|
|
# # keep just rows that are clearly for the velvet mill
|
|
# data = [x for x in data if "velvet" in x["address1"].lower()]
|
|
#
|
|
# neighbouring_epcs.extend(data)
|
|
#
|
|
# neighbouring_epcs_df = pd.DataFrame(neighbouring_epcs)
|
|
# neighbouring_epcs_df["walls-description"].value_counts()
|
|
# neighbouring_epcs_df["roof-description"].value_counts()
|
|
# neighbouring_epcs_df["floor-description"].value_counts()
|