mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
Adding epc data
This commit is contained in:
parent
95ff513f80
commit
0df3394c6c
1 changed files with 33 additions and 3 deletions
|
|
@ -13,7 +13,7 @@ import numpy as np
|
|||
import pandas as pd
|
||||
import time
|
||||
from utils.s3 import save_data_to_s3, read_excel_from_s3, read_from_s3, read_dataframe_from_s3_parquet, \
|
||||
save_dataframe_to_s3_parquet, save_pickle_to_s3
|
||||
save_dataframe_to_s3_parquet, save_pickle_to_s3, read_pickle_from_s3
|
||||
from sklearn.cluster import KMeans
|
||||
from sklearn.preprocessing import StandardScaler, OneHotEncoder
|
||||
from sklearn.compose import ColumnTransformer
|
||||
|
|
@ -1976,7 +1976,7 @@ def updated_version():
|
|||
# Read in data
|
||||
########################################################################
|
||||
asset_list = read_asset_list()
|
||||
asset_list = merge_uprn_to_asset_list(asset_list)
|
||||
asset_list, uprn_lookup_2 = merge_uprn_to_asset_list(asset_list)
|
||||
|
||||
# Read in the properties that have been included in Osmosis' wave 2.1
|
||||
osmosis_wave_2_1_asset_ids, osmosis_wave_2_1 = read_omosis_wave_2_1()
|
||||
|
|
@ -2043,6 +2043,8 @@ def updated_version():
|
|||
right_on="Address ID"
|
||||
)
|
||||
|
||||
# Pull in the EPC data
|
||||
|
||||
|
||||
def read_asset_list():
|
||||
asset_list = pd.read_excel(
|
||||
|
|
@ -2185,7 +2187,7 @@ def merge_uprn_to_asset_list(asset_list):
|
|||
on=["internal_id", "external_address_id"]
|
||||
)
|
||||
|
||||
return asset_list
|
||||
return asset_list, uprn_lookup_2
|
||||
|
||||
|
||||
def read_omosis_wave_2_1():
|
||||
|
|
@ -2234,3 +2236,31 @@ def read_stonewater_asset_data():
|
|||
priority_postcodes = priority_postcodes["Postcode"].tolist()
|
||||
|
||||
return priority_postcodes, previous_waves_address_id, master_sheet
|
||||
|
||||
|
||||
def read_epc_data(uprn_lookup_2):
|
||||
epc_data = json.loads(
|
||||
read_from_s3(
|
||||
bucket_name="retrofit-data-dev",
|
||||
s3_file_name="customers/Stonewater/clustering/epc_data.json"
|
||||
)
|
||||
)
|
||||
epc_data = pd.DataFrame(epc_data)
|
||||
|
||||
epc_data["uprn"] = np.where(
|
||||
epc_data["internal_id"] == 1091,
|
||||
83143766,
|
||||
epc_data["uprn"]
|
||||
)
|
||||
|
||||
# We drop come EPCS
|
||||
epc_data = epc_data[epc_data["internal_id"].isin(uprn_lookup_2["internal_id"].values)]
|
||||
|
||||
epc_data_batch_2 = read_pickle_from_s3(
|
||||
s3_file_name="customers/Stonewater/clustering/epc_data_batch_2.pkl",
|
||||
bucket_name="retrofit-data-dev"
|
||||
)
|
||||
|
||||
complete_epcs = pd.concat([epc_data, epc_data_batch_2])
|
||||
|
||||
return complete_epcs
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue