diff --git a/backend/Property.py b/backend/Property.py index f5f6a191..a59103df 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -1,11 +1,19 @@ from datetime import datetime import re import os +import pandas as pd + +from utils.logger import setup_logger +from utils.s3 import read_dataframe_from_s3_parquet from epc_api.client import EpcClient from BaseUtility import Definitions from recommendations.rdsap_tables import england_wales_age_band_lookup +ENVIRONMENT = os.environ.get('ENVIRONMENT', 'dev') EPC_AUTH_TOKEN = os.environ.get('EPC_AUTH_TOKEN') +DATA_BUCKET = os.environ.get('DATA_BUCKET', 'retrofit-data-dev' if ENVIRONMENT == 'dev' else None) + +logger = setup_logger() class Property(Definitions): @@ -39,8 +47,9 @@ class Property(Definitions): self.postcode = postcode self.address1 = address1 self.data = data + self.uprn = None self.full_sap_epc = None - self.in_conservation_area = None + self.in_conservation_area, self.is_listed, self.is_heritage = None, None, None self.year_built = None self.number_of_rooms = None @@ -88,6 +97,7 @@ class Property(Definitions): response["rows"] = newest_response self.data = response["rows"][0] + self.uprn = int(self.data["uprn"]) def set_coordinates(self, coordinates): """ @@ -279,12 +289,14 @@ class Property(Definitions): self.age_band = england_wales_age_band_lookup[self.data["construction-age-band"]] - def set_is_in_conservation_area(self, in_conservation_area): + def set_spatial(self, spatial: pd.DataFrame): """ Sets whether the property is in a conservation area given the output of the ConservationAreaClient - :param in_conservation_area: string value, indicating whether the property is in a conservation area + :param spatial: Dataframe, containing the spatial data for the property """ - self.in_conservation_area = in_conservation_area + self.in_conservation_area = spatial["conservation_status"].values[0] + self.is_listed = spatial["is_listed"].values[0] + self.is_heritage = spatial["is_heritage"].values[0] def set_year_built(self): """ @@ -463,3 +475,35 @@ class Property(Definitions): # however this is something we'll need to use Verisk data for self.floor_area = float(self.data["total-floor-area"]) + + def get_spatial_data(self): + + """ + Given a property's UPRN, this method will pull the associated spatial data from s3 + :return: + """ + + if self.uprn is None: + raise ValueError("URPN is not set, run search_address_epc") + + # We get the filenames + filenames = read_dataframe_from_s3_parquet( + bucket_name=DATA_BUCKET, file_key="spatial/filename_meta.parquet" + ) + + # We get the file name for the uprn + filtered_df = filenames[(filenames['lower'] <= self.uprn) & (filenames['upper'] >= self.uprn)] + if filtered_df.empty: + logger.warning("Could not find file containing UPRNS") + return None + + filename = filtered_df.iloc[0]['filenames'] + + spatial_data = read_dataframe_from_s3_parquet( + bucket_name=DATA_BUCKET, file_key=f"spatial/{filename}" + ) + + spatial = spatial_data[spatial_data["UPRN"] == self.uprn] + + # Pull out spatial features + self.set_spatial(spatial) diff --git a/backend/app/db/functions/materials_functions.py b/backend/app/db/functions/materials_functions.py index a9995cf4..f4c38aed 100644 --- a/backend/app/db/functions/materials_functions.py +++ b/backend/app/db/functions/materials_functions.py @@ -1,6 +1,8 @@ from backend.app.db.models.materials import Material +from functools import lru_cache +@lru_cache(maxsize=128) def get_materials(session): """ This function will retrieve all materials from the database. diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index aeb5235a..d2e5b4f3 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -92,18 +92,11 @@ async def trigger_plan(body: PlanTriggerRequest): if not input_properties: return Response(status_code=204) - logger.info("Getting EPC, coordinates and conservation area data") + logger.info("Getting EPC, and spatial data") for p in input_properties: p.search_address_epc() p.set_year_built() - - coordinate_data = [x for x in open_uprn_data if x['UPRN'] == int(p.data['uprn'])][0] - p.set_coordinates(coordinate_data) - - in_conservation_area = [x for x in in_conservation_area_data if x['uprn'] == int(p.data['uprn'])][0].get( - "is_in_conservation_area" - ) - p.set_is_in_conservation_area(in_conservation_area) + p.get_spatial_data() # The materials data could be cached or local so we don't need to make # consistent requests to the backend for diff --git a/etl/property_change/app.py b/etl/property_change/app.py index 570de202..605c9a93 100644 --- a/etl/property_change/app.py +++ b/etl/property_change/app.py @@ -22,7 +22,7 @@ from recommendations.recommendation_utils import ( get_wall_type ) -DATA_DIRECTORY = Path(__file__).parent / "model_data" / "simulation_system" / "data" / "all-domestic-certificates" +DATA_DIRECTORY = Path(__file__).parent / "local_data" / "all-domestic-certificates" def get_cleaned(): @@ -400,6 +400,8 @@ def app(): data_processor = DataProcessor(filepath=filepath) df = data_processor.pre_process() + df[df["WALLS_DESCRIPTION"].str.contains("Cavity")]["WALLS_DESCRIPTION"].unique() + cleaning_averages = data_processor.make_cleaning_averages() # We have some odd cases with missing constituency so we fill diff --git a/input_property_list.csv b/input_property_list.csv new file mode 100644 index 00000000..097a6b23 --- /dev/null +++ b/input_property_list.csv @@ -0,0 +1,12 @@ +address,postcode,Notes,,,, +28 Distillery Wharf,W6 9bf,,,,, +Flat 14 Godley V C House,E2 0LP,,,,, +49 Elderfield Road,E5 0LF,,,,, +26 Stanhope Road,N6 5NG,,,,, +Flat 3 Frederick Building,N1 4BD,,,,, +Flat 4 Frederick Building,N1 4BD,,,,, +"Flat 28, 22 Adelina Grove",E1 3BX,,,,, +"Flat 39, 239 Long Lane",SE1 4PT,,,,, +"1, Westview, Someday",LE14 2QH,This property has an unfilled cavity,,,, +"59, Ashdale",CM23 4EB,This property has a partially filled cavity,,,, +88 Cleveland Avenue,DL3 7BE,This property has a filled cavity,,,, \ No newline at end of file diff --git a/utils/s3.py b/utils/s3.py index be0aa008..7414da3f 100644 --- a/utils/s3.py +++ b/utils/s3.py @@ -99,8 +99,11 @@ def read_dataframe_from_s3_parquet(bucket_name, file_key): :return: A pandas DataFrame. """ + if bucket_name is None: + raise ValueError("Bucket name is None when trying to read dataframe from parquet") + if not file_key.endswith(".parquet"): - raise logger.warning("This file doesn't look like a parquet file") + raise ValueError("This file doesn't look like a parquet file") parquet_buffer = read_io_from_s3( bucket_name=bucket_name,