From 0fa3c29253bb89759b38b919df20373df1424f14 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 16 Jun 2023 17:29:48 +0100 Subject: [PATCH] Tidy up app script removing geopandas, geopy --- model_data/app.py | 60 +++++++++++++------------------------ model_data/requirements.txt | 3 +- 2 files changed, 22 insertions(+), 41 deletions(-) diff --git a/model_data/app.py b/model_data/app.py index f03d283d..23fc49fe 100644 --- a/model_data/app.py +++ b/model_data/app.py @@ -128,22 +128,34 @@ def handler(): table = DBF(borehole_file) borehole_data = [x for x in tqdm(table, total=len(table))] + # There are ~1.4 million entries in this dataset and so we firstly want to reduce the number of + # entries in here if possible before we produce any form of comparison between our properties, to infer + # the distance from the property to the nearest borehole + # Let's take a sample borehole_sample = borehole_data[:1000] df = pd.DataFrame(borehole_sample) # for each property, find the nearest borehole - # Data dictionary: - # QS - # NUMB - # BSUFF + + # Data dictionary: This description is based on the information presented in the following + # Geological articles: + # https://nora.nerc.ac.uk/id/eprint/509366/1/IR04115.pdf + # https://shop.bgs.ac.uk/Resources/Shop/doc/info/Borehole_Abbreviations.pdf?_ga=2.246788941.895115819.1686912089 + # -542796874.1686912089 + # https://core.ac.uk/download/63732.pdf + # + # + # QS - Borehole identifier information + # NUMB - Borehole identifier information + # BSUFF - Borehole identifier information # REGNO - # RT + # RT - Borehole identifier information # GRID_REFER - # EASTING - # NORTHING - # X - # Y + # EASTING - British National Grid coordinates + # NORTHING - British National Grid coordinates + # X - British National Grid coordinates - same as EASTING but has a float typing + # Y - British National Grid coordinates - same as NORTHING but has a float typing # CONFIDENTI # STRTHEIGHT # NAME @@ -153,33 +165,3 @@ def handler(): # DATE_K_TYP # DATE_ENTER # AGS_LOG_UR - - import geopandas as gpd - - borehole_shape_file = os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/borehole/borehole.shp" - boreholes = gpd.read_file(borehole_shape_file) - - from geopy.geocoders import Nominatim - from geopy.extra.rate_limiter import RateLimiter - - geolocator = Nominatim(user_agent="myGeocoder") - - # The rate limiter will ensure you're not making too many requests too quickly - geocode = RateLimiter(geolocator.geocode, min_delay_seconds=0.5) - - for address_config in input_data: - location = geocode(" ".join([address_config["address1"], address_config["postcode"]])) - geocode("28 distillery wharf, regtta lane, hammersmith, london, w6 9bf") - - print((location.latitude, location.longitude)) - - from geopy.geocoders import Nominatim - from geopy.extra.rate_limiter import RateLimiter - - geolocator = Nominatim(user_agent="myGeocoder") - - geocode = RateLimiter(geolocator.geocode, min_delay_seconds=1) - - # Full address - location = geocode(", ".join([p.data["address"], p.data["postcode"]])) - print((location.latitude, location.longitude)) diff --git a/model_data/requirements.txt b/model_data/requirements.txt index 9888d372..a21cdf6e 100644 --- a/model_data/requirements.txt +++ b/model_data/requirements.txt @@ -10,5 +10,4 @@ pytest-mock fuzzywuzzy python-Levenshtein dbfread -geopandas -geopy \ No newline at end of file +pyproj \ No newline at end of file