Model/model_data/app.py
2023-06-16 11:43:24 +01:00

127 lines
4.5 KiB
Python

import pandas as pd
from tqdm import tqdm
from model_data.temp_inputs import input_data
from model_data.Property import Property
from model_data.config import EPC_AUTH_TOKEN
from epc_api.client import EpcClient
from model_data.downloader import pagenated_epc_download
from model_data.EpcClean import EpcClean
def handler():
# To begin with, the input data is a list of dictionaries, however we would read this file in
epc_client = EpcClient(auth_token=EPC_AUTH_TOKEN)
input_properties = [
Property(postcode=config['postcode'], address1=config['address1'], epc_client=epc_client)
for config in input_data
]
for p in input_properties:
p.search_address_epc()
local_authorities = {p.data['local-authority'] for p in input_properties}
data = []
for la in tqdm(local_authorities):
data.extend(
pagenated_epc_download(
client=epc_client,
params={"local-authority": la},
page_size=5000,
n_pages=10,
)
)
cleaner = EpcClean(data)
cleaner.clean()
import pickle
import os
with open(os.path.abspath(os.path.dirname(__file__)) + "/data.pkl", "rb") as f:
data = pickle.load(f)
postcodes = [x["postcode"].upper() for x in data]
address_meta = [
{
"postcode": x["postcode"].upper(),
"address1": x["address1"].upper(),
"address2": x["address2"].upper(),
"address3": x["address3"].upper(),
"address": x["address"],
"uprn": x["uprn"]
} for x in data
]
# For testing:
# from model_data.epc_attributes.HotWaterAttributes import HotWaterAttributes
# from collections import Counter
# count = Counter([x["main-fuel"] for x in data])
# descriptions = {x["hotwater-description"] for x in data}
# out = []
# for description in descriptions:
# res = HotWaterAttributes(description).process()
# out.append(
# {
# "original_description": description,
# **res
# }
# )
# df = pd.DataFrame(out)
# df = df.sort_values("original_description")
# df = df.reset_index(drop=True)
#
# import numpy as np
# idx = 1
# record = df[df.index == idx].to_dict("records")[0]
# record = {k: v for k, v in record.items() if v not in [None, np.nan]}
# from pprint import pprint
# pprint(record)
#
# # Issues:
# # 1) '2207 Time and temperature zone control' - we don't pick up any reference to the fact this is a time and
# # temperature zone control
# # and we only pick up temperature zone control at the moment. Can we capture this too
# # 2) 'Charging system linked to use of community heating, programmer and at least two room stats' - what are room
# # stats and how should
# # we capture this?
#
# df.to_dict("records")
## Land registry
from model_data.LandRegistryClient import LandRegistryClient
import os
land_registry_client = LandRegistryClient(
paths=[
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-monthly-update-new-version.csv",
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2022 (1).csv",
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2021.csv",
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2020.csv",
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2019.csv",
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2018.csv",
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2017-part1.csv",
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2017-part2.csv",
],
addresses=address_meta
)
from dbfread import DBF
import os
from tqdm import tqdm
import pandas as pd
borehole_file = os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/borehole/borehole.dbf"
table = DBF(borehole_file)
borehole_data = [x for x in tqdm(table, total=len(table))]
# Let's take a sample
borehole_sample = borehole_data[:1000]
df = pd.DataFrame(borehole_sample)
import geopandas as gpd
borehole_shape_file = os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/borehole/borehole.shp"
boreholes = gpd.read_file(borehole_shape_file)