mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
impementing spatial data
This commit is contained in:
parent
b6b4d0dd75
commit
6cc84e95bf
6 changed files with 71 additions and 15 deletions
|
|
@ -1,11 +1,19 @@
|
|||
from datetime import datetime
|
||||
import re
|
||||
import os
|
||||
import pandas as pd
|
||||
|
||||
from utils.logger import setup_logger
|
||||
from utils.s3 import read_dataframe_from_s3_parquet
|
||||
from epc_api.client import EpcClient
|
||||
from BaseUtility import Definitions
|
||||
from recommendations.rdsap_tables import england_wales_age_band_lookup
|
||||
|
||||
ENVIRONMENT = os.environ.get('ENVIRONMENT', 'dev')
|
||||
EPC_AUTH_TOKEN = os.environ.get('EPC_AUTH_TOKEN')
|
||||
DATA_BUCKET = os.environ.get('DATA_BUCKET', 'retrofit-data-dev' if ENVIRONMENT == 'dev' else None)
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
class Property(Definitions):
|
||||
|
|
@ -39,8 +47,9 @@ class Property(Definitions):
|
|||
self.postcode = postcode
|
||||
self.address1 = address1
|
||||
self.data = data
|
||||
self.uprn = None
|
||||
self.full_sap_epc = None
|
||||
self.in_conservation_area = None
|
||||
self.in_conservation_area, self.is_listed, self.is_heritage = None, None, None
|
||||
self.year_built = None
|
||||
self.number_of_rooms = None
|
||||
|
||||
|
|
@ -88,6 +97,7 @@ class Property(Definitions):
|
|||
response["rows"] = newest_response
|
||||
|
||||
self.data = response["rows"][0]
|
||||
self.uprn = int(self.data["uprn"])
|
||||
|
||||
def set_coordinates(self, coordinates):
|
||||
"""
|
||||
|
|
@ -279,12 +289,14 @@ class Property(Definitions):
|
|||
|
||||
self.age_band = england_wales_age_band_lookup[self.data["construction-age-band"]]
|
||||
|
||||
def set_is_in_conservation_area(self, in_conservation_area):
|
||||
def set_spatial(self, spatial: pd.DataFrame):
|
||||
"""
|
||||
Sets whether the property is in a conservation area given the output of the ConservationAreaClient
|
||||
:param in_conservation_area: string value, indicating whether the property is in a conservation area
|
||||
:param spatial: Dataframe, containing the spatial data for the property
|
||||
"""
|
||||
self.in_conservation_area = in_conservation_area
|
||||
self.in_conservation_area = spatial["conservation_status"].values[0]
|
||||
self.is_listed = spatial["is_listed"].values[0]
|
||||
self.is_heritage = spatial["is_heritage"].values[0]
|
||||
|
||||
def set_year_built(self):
|
||||
"""
|
||||
|
|
@ -463,3 +475,35 @@ class Property(Definitions):
|
|||
# however this is something we'll need to use Verisk data for
|
||||
|
||||
self.floor_area = float(self.data["total-floor-area"])
|
||||
|
||||
def get_spatial_data(self):
|
||||
|
||||
"""
|
||||
Given a property's UPRN, this method will pull the associated spatial data from s3
|
||||
:return:
|
||||
"""
|
||||
|
||||
if self.uprn is None:
|
||||
raise ValueError("URPN is not set, run search_address_epc")
|
||||
|
||||
# We get the filenames
|
||||
filenames = read_dataframe_from_s3_parquet(
|
||||
bucket_name=DATA_BUCKET, file_key="spatial/filename_meta.parquet"
|
||||
)
|
||||
|
||||
# We get the file name for the uprn
|
||||
filtered_df = filenames[(filenames['lower'] <= self.uprn) & (filenames['upper'] >= self.uprn)]
|
||||
if filtered_df.empty:
|
||||
logger.warning("Could not find file containing UPRNS")
|
||||
return None
|
||||
|
||||
filename = filtered_df.iloc[0]['filenames']
|
||||
|
||||
spatial_data = read_dataframe_from_s3_parquet(
|
||||
bucket_name=DATA_BUCKET, file_key=f"spatial/{filename}"
|
||||
)
|
||||
|
||||
spatial = spatial_data[spatial_data["UPRN"] == self.uprn]
|
||||
|
||||
# Pull out spatial features
|
||||
self.set_spatial(spatial)
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
from backend.app.db.models.materials import Material
|
||||
from functools import lru_cache
|
||||
|
||||
|
||||
@lru_cache(maxsize=128)
|
||||
def get_materials(session):
|
||||
"""
|
||||
This function will retrieve all materials from the database.
|
||||
|
|
|
|||
|
|
@ -92,18 +92,11 @@ async def trigger_plan(body: PlanTriggerRequest):
|
|||
if not input_properties:
|
||||
return Response(status_code=204)
|
||||
|
||||
logger.info("Getting EPC, coordinates and conservation area data")
|
||||
logger.info("Getting EPC, and spatial data")
|
||||
for p in input_properties:
|
||||
p.search_address_epc()
|
||||
p.set_year_built()
|
||||
|
||||
coordinate_data = [x for x in open_uprn_data if x['UPRN'] == int(p.data['uprn'])][0]
|
||||
p.set_coordinates(coordinate_data)
|
||||
|
||||
in_conservation_area = [x for x in in_conservation_area_data if x['uprn'] == int(p.data['uprn'])][0].get(
|
||||
"is_in_conservation_area"
|
||||
)
|
||||
p.set_is_in_conservation_area(in_conservation_area)
|
||||
p.get_spatial_data()
|
||||
|
||||
# The materials data could be cached or local so we don't need to make
|
||||
# consistent requests to the backend for
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ from recommendations.recommendation_utils import (
|
|||
get_wall_type
|
||||
)
|
||||
|
||||
DATA_DIRECTORY = Path(__file__).parent / "model_data" / "simulation_system" / "data" / "all-domestic-certificates"
|
||||
DATA_DIRECTORY = Path(__file__).parent / "local_data" / "all-domestic-certificates"
|
||||
|
||||
|
||||
def get_cleaned():
|
||||
|
|
@ -400,6 +400,8 @@ def app():
|
|||
data_processor = DataProcessor(filepath=filepath)
|
||||
|
||||
df = data_processor.pre_process()
|
||||
df[df["WALLS_DESCRIPTION"].str.contains("Cavity")]["WALLS_DESCRIPTION"].unique()
|
||||
|
||||
cleaning_averages = data_processor.make_cleaning_averages()
|
||||
|
||||
# We have some odd cases with missing constituency so we fill
|
||||
|
|
|
|||
12
input_property_list.csv
Normal file
12
input_property_list.csv
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
address,postcode,Notes,,,,
|
||||
28 Distillery Wharf,W6 9bf,,,,,
|
||||
Flat 14 Godley V C House,E2 0LP,,,,,
|
||||
49 Elderfield Road,E5 0LF,,,,,
|
||||
26 Stanhope Road,N6 5NG,,,,,
|
||||
Flat 3 Frederick Building,N1 4BD,,,,,
|
||||
Flat 4 Frederick Building,N1 4BD,,,,,
|
||||
"Flat 28, 22 Adelina Grove",E1 3BX,,,,,
|
||||
"Flat 39, 239 Long Lane",SE1 4PT,,,,,
|
||||
"1, Westview, Someday",LE14 2QH,This property has an unfilled cavity,,,,
|
||||
"59, Ashdale",CM23 4EB,This property has a partially filled cavity,,,,
|
||||
88 Cleveland Avenue,DL3 7BE,This property has a filled cavity,,,,
|
||||
|
|
|
@ -99,8 +99,11 @@ def read_dataframe_from_s3_parquet(bucket_name, file_key):
|
|||
:return: A pandas DataFrame.
|
||||
"""
|
||||
|
||||
if bucket_name is None:
|
||||
raise ValueError("Bucket name is None when trying to read dataframe from parquet")
|
||||
|
||||
if not file_key.endswith(".parquet"):
|
||||
raise logger.warning("This file doesn't look like a parquet file")
|
||||
raise ValueError("This file doesn't look like a parquet file")
|
||||
|
||||
parquet_buffer = read_io_from_s3(
|
||||
bucket_name=bucket_name,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue