impementing spatial data

This commit is contained in:
Khalim Conn-Kowlessar 2023-10-05 17:56:28 +01:00
parent b6b4d0dd75
commit 6cc84e95bf
6 changed files with 71 additions and 15 deletions

View file

@ -1,11 +1,19 @@
from datetime import datetime
import re
import os
import pandas as pd
from utils.logger import setup_logger
from utils.s3 import read_dataframe_from_s3_parquet
from epc_api.client import EpcClient
from BaseUtility import Definitions
from recommendations.rdsap_tables import england_wales_age_band_lookup
ENVIRONMENT = os.environ.get('ENVIRONMENT', 'dev')
EPC_AUTH_TOKEN = os.environ.get('EPC_AUTH_TOKEN')
DATA_BUCKET = os.environ.get('DATA_BUCKET', 'retrofit-data-dev' if ENVIRONMENT == 'dev' else None)
logger = setup_logger()
class Property(Definitions):
@ -39,8 +47,9 @@ class Property(Definitions):
self.postcode = postcode
self.address1 = address1
self.data = data
self.uprn = None
self.full_sap_epc = None
self.in_conservation_area = None
self.in_conservation_area, self.is_listed, self.is_heritage = None, None, None
self.year_built = None
self.number_of_rooms = None
@ -88,6 +97,7 @@ class Property(Definitions):
response["rows"] = newest_response
self.data = response["rows"][0]
self.uprn = int(self.data["uprn"])
def set_coordinates(self, coordinates):
"""
@ -279,12 +289,14 @@ class Property(Definitions):
self.age_band = england_wales_age_band_lookup[self.data["construction-age-band"]]
def set_is_in_conservation_area(self, in_conservation_area):
def set_spatial(self, spatial: pd.DataFrame):
"""
Sets whether the property is in a conservation area given the output of the ConservationAreaClient
:param in_conservation_area: string value, indicating whether the property is in a conservation area
:param spatial: Dataframe, containing the spatial data for the property
"""
self.in_conservation_area = in_conservation_area
self.in_conservation_area = spatial["conservation_status"].values[0]
self.is_listed = spatial["is_listed"].values[0]
self.is_heritage = spatial["is_heritage"].values[0]
def set_year_built(self):
"""
@ -463,3 +475,35 @@ class Property(Definitions):
# however this is something we'll need to use Verisk data for
self.floor_area = float(self.data["total-floor-area"])
def get_spatial_data(self):
"""
Given a property's UPRN, this method will pull the associated spatial data from s3
:return:
"""
if self.uprn is None:
raise ValueError("URPN is not set, run search_address_epc")
# We get the filenames
filenames = read_dataframe_from_s3_parquet(
bucket_name=DATA_BUCKET, file_key="spatial/filename_meta.parquet"
)
# We get the file name for the uprn
filtered_df = filenames[(filenames['lower'] <= self.uprn) & (filenames['upper'] >= self.uprn)]
if filtered_df.empty:
logger.warning("Could not find file containing UPRNS")
return None
filename = filtered_df.iloc[0]['filenames']
spatial_data = read_dataframe_from_s3_parquet(
bucket_name=DATA_BUCKET, file_key=f"spatial/{filename}"
)
spatial = spatial_data[spatial_data["UPRN"] == self.uprn]
# Pull out spatial features
self.set_spatial(spatial)

View file

@ -1,6 +1,8 @@
from backend.app.db.models.materials import Material
from functools import lru_cache
@lru_cache(maxsize=128)
def get_materials(session):
"""
This function will retrieve all materials from the database.

View file

@ -92,18 +92,11 @@ async def trigger_plan(body: PlanTriggerRequest):
if not input_properties:
return Response(status_code=204)
logger.info("Getting EPC, coordinates and conservation area data")
logger.info("Getting EPC, and spatial data")
for p in input_properties:
p.search_address_epc()
p.set_year_built()
coordinate_data = [x for x in open_uprn_data if x['UPRN'] == int(p.data['uprn'])][0]
p.set_coordinates(coordinate_data)
in_conservation_area = [x for x in in_conservation_area_data if x['uprn'] == int(p.data['uprn'])][0].get(
"is_in_conservation_area"
)
p.set_is_in_conservation_area(in_conservation_area)
p.get_spatial_data()
# The materials data could be cached or local so we don't need to make
# consistent requests to the backend for

View file

@ -22,7 +22,7 @@ from recommendations.recommendation_utils import (
get_wall_type
)
DATA_DIRECTORY = Path(__file__).parent / "model_data" / "simulation_system" / "data" / "all-domestic-certificates"
DATA_DIRECTORY = Path(__file__).parent / "local_data" / "all-domestic-certificates"
def get_cleaned():
@ -400,6 +400,8 @@ def app():
data_processor = DataProcessor(filepath=filepath)
df = data_processor.pre_process()
df[df["WALLS_DESCRIPTION"].str.contains("Cavity")]["WALLS_DESCRIPTION"].unique()
cleaning_averages = data_processor.make_cleaning_averages()
# We have some odd cases with missing constituency so we fill

12
input_property_list.csv Normal file
View file

@ -0,0 +1,12 @@
address,postcode,Notes,,,,
28 Distillery Wharf,W6 9bf,,,,,
Flat 14 Godley V C House,E2 0LP,,,,,
49 Elderfield Road,E5 0LF,,,,,
26 Stanhope Road,N6 5NG,,,,,
Flat 3 Frederick Building,N1 4BD,,,,,
Flat 4 Frederick Building,N1 4BD,,,,,
"Flat 28, 22 Adelina Grove",E1 3BX,,,,,
"Flat 39, 239 Long Lane",SE1 4PT,,,,,
"1, Westview, Someday",LE14 2QH,This property has an unfilled cavity,,,,
"59, Ashdale",CM23 4EB,This property has a partially filled cavity,,,,
88 Cleveland Avenue,DL3 7BE,This property has a filled cavity,,,,
1 address postcode Notes
2 28 Distillery Wharf W6 9bf
3 Flat 14 Godley V C House E2 0LP
4 49 Elderfield Road E5 0LF
5 26 Stanhope Road N6 5NG
6 Flat 3 Frederick Building N1 4BD
7 Flat 4 Frederick Building N1 4BD
8 Flat 28, 22 Adelina Grove E1 3BX
9 Flat 39, 239 Long Lane SE1 4PT
10 1, Westview, Someday LE14 2QH This property has an unfilled cavity
11 59, Ashdale CM23 4EB This property has a partially filled cavity
12 88 Cleveland Avenue DL3 7BE This property has a filled cavity

View file

@ -99,8 +99,11 @@ def read_dataframe_from_s3_parquet(bucket_name, file_key):
:return: A pandas DataFrame.
"""
if bucket_name is None:
raise ValueError("Bucket name is None when trying to read dataframe from parquet")
if not file_key.endswith(".parquet"):
raise logger.warning("This file doesn't look like a parquet file")
raise ValueError("This file doesn't look like a parquet file")
parquet_buffer = read_io_from_s3(
bucket_name=bucket_name,