mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
124 lines
4.5 KiB
Python
124 lines
4.5 KiB
Python
import pandas as pd
|
|
from pprint import pprint
|
|
from tqdm import tqdm
|
|
import os
|
|
from model_data.BoreholeClient import BoreholeClient
|
|
from model_data.LandRegistryClient import LandRegistryClient
|
|
|
|
from model_data.temp_inputs import input_data
|
|
from model_data.Property import Property
|
|
from model_data.config import EPC_AUTH_TOKEN
|
|
from epc_api.client import EpcClient
|
|
from model_data.downloader import pagenated_epc_download
|
|
from model_data.EpcClean import EpcClean
|
|
from model_data.OpenUprnClient import OpenUprnClient
|
|
|
|
LAND_REGISTRY_PATHS = [
|
|
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-monthly-update-new-version.csv",
|
|
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2022 (1).csv",
|
|
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2021.csv",
|
|
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2020.csv",
|
|
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2019.csv",
|
|
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2018.csv",
|
|
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2017-part1.csv",
|
|
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2017-part2.csv",
|
|
]
|
|
|
|
|
|
def handler():
|
|
# To begin with, the input data is a list of dictionaries, however we would read this file in
|
|
|
|
epc_client = EpcClient(auth_token=EPC_AUTH_TOKEN)
|
|
|
|
input_properties = [
|
|
Property(postcode=config['postcode'], address1=config['address1'], epc_client=epc_client)
|
|
for config in input_data
|
|
]
|
|
|
|
for p in input_properties:
|
|
p.search_address_epc()
|
|
|
|
uprns = [p.data['uprn'] for p in input_properties]
|
|
|
|
open_uprn_client = OpenUprnClient(
|
|
path=os.path.abspath(
|
|
os.path.dirname(__file__)
|
|
) + "/model_data/local_data/osopenuprn_202306_csv/osopenuprn_202305.csv",
|
|
uprns=uprns
|
|
)
|
|
open_uprn_client.read()
|
|
|
|
# What's going on here?
|
|
# We're using Ordinance Survey Open Uprn data
|
|
# to find the coordinates of each address, which we will then be able to use at a later stage
|
|
for p in input_properties:
|
|
p.get_coordinates(open_uprn_client)
|
|
|
|
pprint(input_properties[0].coordinates)
|
|
|
|
local_authorities = {p.data['local-authority'] for p in input_properties}
|
|
|
|
data = []
|
|
for la in tqdm(local_authorities):
|
|
data.extend(
|
|
pagenated_epc_download(
|
|
client=epc_client,
|
|
params={"local-authority": la},
|
|
page_size=5000,
|
|
n_pages=10,
|
|
)
|
|
)
|
|
|
|
cleaner = EpcClean(data)
|
|
|
|
cleaner.clean()
|
|
|
|
# example cleaned data
|
|
# Why do we need this stuff?
|
|
# https://docs.google.com/spreadsheets/d/1ek9ItDv7xHwFm_FK6B0PyOBwvi6U4qRPuncBsVlCHUA/edit#gid=0
|
|
cleaner.cleaned.keys()
|
|
floors = pd.DataFrame(cleaner.cleaned['floor-description'])
|
|
walls = pd.DataFrame(cleaner.cleaned['walls-description'])
|
|
hotwater = pd.DataFrame(cleaner.cleaned['hotwater-description'])
|
|
mainheat = pd.DataFrame(cleaner.cleaned["mainheat-description"])
|
|
|
|
address_meta = [
|
|
{
|
|
"postcode": x["postcode"].upper(),
|
|
"address1": x["address1"].upper(),
|
|
"address2": x["address2"].upper(),
|
|
"address3": x["address3"].upper(),
|
|
"address": x["address"],
|
|
"uprn": x["uprn"]
|
|
} for x in data
|
|
]
|
|
|
|
# Land registry
|
|
land_registry_client = LandRegistryClient(
|
|
paths=LAND_REGISTRY_PATHS,
|
|
addresses=address_meta
|
|
)
|
|
land_registry_client.read()
|
|
|
|
# Borehole
|
|
borehole_client = BoreholeClient(
|
|
path=os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/borehole/borehole.dbf"
|
|
)
|
|
borehole_client.read()
|
|
|
|
# There are ~1.4 million entries in this dataset and so we firstly want to reduce the number of
|
|
# entries in here if possible before we produce any form of comparison between our properties, to infer
|
|
# the distance from the property to the nearest borehole
|
|
|
|
# Let's take a sample
|
|
borehold_compare_to = borehole_client.data[0]
|
|
property = input_properties[0]
|
|
|
|
# for each property, find the nearest borehole
|
|
# This is just an example, looking at the distance from a property to a borehole
|
|
dist_m, dist_km = borehole_client.distance_between_bng_coords(
|
|
x1_bng=property.coordinates["x_coordinate"],
|
|
y1_bng=property.coordinates["y_coordinate"],
|
|
x2_bng=borehold_compare_to["X"],
|
|
y2_bng=borehold_compare_to["Y"],
|
|
)
|