mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
tidying up app script
This commit is contained in:
parent
1577d90263
commit
b75fedf3ac
2 changed files with 79 additions and 134 deletions
56
model_data/BoreholeClient.py
Normal file
56
model_data/BoreholeClient.py
Normal file
|
|
@ -0,0 +1,56 @@
|
|||
import math
|
||||
from tqdm import tqdm
|
||||
from dbfread import DBF
|
||||
from utils import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
class BoreholeClient:
|
||||
"""
|
||||
Data dictionary: This description is based on the information presented in the following
|
||||
Geological articles:
|
||||
https://nora.nerc.ac.uk/id/eprint/509366/1/IR04115.pdf
|
||||
https://shop.bgs.ac.uk/Resources/Shop/doc/info/Borehole_Abbreviations.pdf?_ga=2.246788941.895115819.1686912089
|
||||
-542796874.1686912089
|
||||
https://core.ac.uk/download/63732.pdf
|
||||
|
||||
|
||||
QS - Borehole identifier information
|
||||
NUMB - Borehole identifier information
|
||||
BSUFF - Borehole identifier information
|
||||
REGNO
|
||||
RT - Borehole identifier information
|
||||
GRID_REFER
|
||||
EASTING - British National Grid coordinates
|
||||
NORTHING - British National Grid coordinates
|
||||
X - British National Grid coordinates - same as EASTING but has a float typing
|
||||
Y - British National Grid coordinates - same as NORTHING but has a float typing
|
||||
CONFIDENTI
|
||||
STRTHEIGHT
|
||||
NAME
|
||||
LENGTH
|
||||
BGS_ID
|
||||
DATE_KNOWN
|
||||
DATE_K_TYP
|
||||
DATE_ENTER
|
||||
AGS_LOG_UR
|
||||
"""
|
||||
|
||||
def __init__(self, path):
|
||||
self.path: str = path
|
||||
self.data = None
|
||||
|
||||
def read(self):
|
||||
logger.info("Reading in borehole data")
|
||||
table = DBF(self.path)
|
||||
borehole_data = [x for x in tqdm(table, total=len(table))]
|
||||
|
||||
self.data = borehole_data
|
||||
|
||||
@staticmethod
|
||||
def distance_between_bng_coords(x1_bng, y1_bng, x2_bng, y2_bng):
|
||||
# Calculate the Euclidean distance between the points
|
||||
distance_m = math.sqrt((x2_bng - x1_bng) ** 2 + (y2_bng - y1_bng) ** 2)
|
||||
distance_km = distance_m / 1000 # convert meters to kilometers
|
||||
return distance_m, distance_km
|
||||
|
|
@ -1,6 +1,8 @@
|
|||
import pandas as pd
|
||||
from tqdm import tqdm
|
||||
import os
|
||||
from BoreholeClient import BoreholeClient
|
||||
from model_data.LandRegistryClient import LandRegistryClient
|
||||
|
||||
from model_data.temp_inputs import input_data
|
||||
from model_data.Property import Property
|
||||
|
|
@ -10,6 +12,17 @@ from model_data.downloader import pagenated_epc_download
|
|||
from model_data.EpcClean import EpcClean
|
||||
from model_data.OpenUprnClient import OpenUprnClient
|
||||
|
||||
LAND_REGISTRY_PATHS = [
|
||||
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-monthly-update-new-version.csv",
|
||||
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2022 (1).csv",
|
||||
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2021.csv",
|
||||
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2020.csv",
|
||||
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2019.csv",
|
||||
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2018.csv",
|
||||
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2017-part1.csv",
|
||||
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2017-part2.csv",
|
||||
]
|
||||
|
||||
|
||||
def handler():
|
||||
# To begin with, the input data is a list of dictionaries, however we would read this file in
|
||||
|
|
@ -54,11 +67,6 @@ def handler():
|
|||
|
||||
cleaner.clean()
|
||||
|
||||
import pickle
|
||||
with open(os.path.abspath(os.path.dirname(__file__)) + "/data.pkl", "rb") as f:
|
||||
data = pickle.load(f)
|
||||
|
||||
postcodes = [x["postcode"].upper() for x in data]
|
||||
address_meta = [
|
||||
{
|
||||
"postcode": x["postcode"].upper(),
|
||||
|
|
@ -70,151 +78,32 @@ def handler():
|
|||
} for x in data
|
||||
]
|
||||
|
||||
# For testing:
|
||||
# from model_data.epc_attributes.HotWaterAttributes import HotWaterAttributes
|
||||
# from collections import Counter
|
||||
# count = Counter([x["main-fuel"] for x in data])
|
||||
# descriptions = {x["hotwater-description"] for x in data}
|
||||
# out = []
|
||||
# for description in descriptions:
|
||||
# res = HotWaterAttributes(description).process()
|
||||
# out.append(
|
||||
# {
|
||||
# "original_description": description,
|
||||
# **res
|
||||
# }
|
||||
# )
|
||||
# df = pd.DataFrame(out)
|
||||
# df = df.sort_values("original_description")
|
||||
# df = df.reset_index(drop=True)
|
||||
#
|
||||
# import numpy as np
|
||||
# idx = 1
|
||||
# record = df[df.index == idx].to_dict("records")[0]
|
||||
# record = {k: v for k, v in record.items() if v not in [None, np.nan]}
|
||||
# from pprint import pprint
|
||||
# pprint(record)
|
||||
#
|
||||
# # Issues:
|
||||
# # 1) '2207 Time and temperature zone control' - we don't pick up any reference to the fact this is a time and
|
||||
# # temperature zone control
|
||||
# # and we only pick up temperature zone control at the moment. Can we capture this too
|
||||
# # 2) 'Charging system linked to use of community heating, programmer and at least two room stats' - what are room
|
||||
# # stats and how should
|
||||
# # we capture this?
|
||||
#
|
||||
# df.to_dict("records")
|
||||
|
||||
# Land registry
|
||||
from model_data.LandRegistryClient import LandRegistryClient
|
||||
|
||||
land_registry_client = LandRegistryClient(
|
||||
paths=[
|
||||
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-monthly-update-new-version.csv",
|
||||
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2022 (1).csv",
|
||||
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2021.csv",
|
||||
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2020.csv",
|
||||
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2019.csv",
|
||||
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2018.csv",
|
||||
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2017-part1.csv",
|
||||
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2017-part2.csv",
|
||||
],
|
||||
paths=LAND_REGISTRY_PATHS,
|
||||
addresses=address_meta
|
||||
)
|
||||
land_registry_client.read()
|
||||
|
||||
from dbfread import DBF
|
||||
|
||||
borehole_file = os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/borehole/borehole.dbf"
|
||||
table = DBF(borehole_file)
|
||||
borehole_data = [x for x in tqdm(table, total=len(table))]
|
||||
# Borehole
|
||||
borehole_client = BoreholeClient(
|
||||
path=os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/borehole/borehole.dbf"
|
||||
)
|
||||
borehole_client.read()
|
||||
|
||||
# There are ~1.4 million entries in this dataset and so we firstly want to reduce the number of
|
||||
# entries in here if possible before we produce any form of comparison between our properties, to infer
|
||||
# the distance from the property to the nearest borehole
|
||||
|
||||
# Let's take a sample
|
||||
borehole_sample = borehole_data[:1000]
|
||||
df = pd.DataFrame(borehole_sample)
|
||||
borehold_compare_to = borehole_client.data[0]
|
||||
property = input_properties[0]
|
||||
|
||||
# for each property, find the nearest borehole
|
||||
|
||||
# Data dictionary: This description is based on the information presented in the following
|
||||
# Geological articles:
|
||||
# https://nora.nerc.ac.uk/id/eprint/509366/1/IR04115.pdf
|
||||
# https://shop.bgs.ac.uk/Resources/Shop/doc/info/Borehole_Abbreviations.pdf?_ga=2.246788941.895115819.1686912089
|
||||
# -542796874.1686912089
|
||||
# https://core.ac.uk/download/63732.pdf
|
||||
#
|
||||
#
|
||||
# QS - Borehole identifier information
|
||||
# NUMB - Borehole identifier information
|
||||
# BSUFF - Borehole identifier information
|
||||
# REGNO
|
||||
# RT - Borehole identifier information
|
||||
# GRID_REFER
|
||||
# EASTING - British National Grid coordinates
|
||||
# NORTHING - British National Grid coordinates
|
||||
# X - British National Grid coordinates - same as EASTING but has a float typing
|
||||
# Y - British National Grid coordinates - same as NORTHING but has a float typing
|
||||
# CONFIDENTI
|
||||
# STRTHEIGHT
|
||||
# NAME
|
||||
# LENGTH
|
||||
# BGS_ID
|
||||
# DATE_KNOWN
|
||||
# DATE_K_TYP
|
||||
# DATE_ENTER
|
||||
# AGS_LOG_UR
|
||||
|
||||
from pyproj import Proj, transform, Geod
|
||||
|
||||
def distance_between_coords(longitude, latitude, x_bng, y_bng):
|
||||
# Define the projections
|
||||
wgs84 = Proj(init='epsg:4326') # WGS84 (longitude, latitude)
|
||||
bng = Proj(init='epsg:27700') # British National Grid
|
||||
|
||||
# Convert (longitude, latitude) to BNG coordinates
|
||||
x, y = transform(wgs84, bng, longitude, latitude)
|
||||
|
||||
# Define a geographic measure object
|
||||
g = Geod(ellps='WGS84')
|
||||
|
||||
# Calculate the distance between the points
|
||||
# Note: Pyproj's 'Geod.inv' method returns azimuths and distance.
|
||||
# We're only interested in distance here, so we only keep the third result
|
||||
_, _, distance = g.inv(x, y, x_bng, y_bng)
|
||||
|
||||
return distance
|
||||
|
||||
def distance_between_bng_coords(x1_bng, y1_bng, x2_bng, y2_bng):
|
||||
# Define a geographic measure object
|
||||
g = Geod(ellps='airy') # Airy ellipsoid is used by the British National Grid
|
||||
|
||||
# Calculate the distance between the points
|
||||
# Note: Pyproj's 'Geod.inv' method returns azimuths and distance.
|
||||
# We're only interested in distance here, so we only keep the third result
|
||||
_, _, distance = g.inv(x1_bng, y1_bng, x2_bng, y2_bng)
|
||||
|
||||
return distance
|
||||
|
||||
import math
|
||||
|
||||
import math
|
||||
|
||||
def distance_between_bng_coords(x1_bng, y1_bng, x2_bng, y2_bng):
|
||||
# Calculate the Euclidean distance between the points
|
||||
distance_m = math.sqrt((x2_bng - x1_bng) ** 2 + (y2_bng - y1_bng) ** 2)
|
||||
distance_km = distance_m / 1000 # convert meters to kilometers
|
||||
return distance_m, distance_km
|
||||
|
||||
property = input_properties[0]
|
||||
|
||||
borehold_compare_to = borehole_data[0]
|
||||
|
||||
dist_m, dist_km = distance_between_bng_coords(
|
||||
dist_m, dist_km = borehole_client.distance_between_bng_coords(
|
||||
x1_bng=property.coordinates["x_coordinate"],
|
||||
y1_bng=property.coordinates["y_coordinate"],
|
||||
x2_bng=borehold_compare_to["X"],
|
||||
y2_bng=borehold_compare_to["Y"],
|
||||
)
|
||||
# ground source heat pump.
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue