diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 02e669a5..9562af86 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -20,7 +20,7 @@ from backend.app.db.functions.property_functions import ( update_or_create_property_spatial_details ) from backend.app.db.functions.recommendations_functions import ( - create_plan, create_plan_recommendations, upload_recommendations, create_scenario + create_plan, upload_recommendations, create_scenario ) from backend.app.db.functions.energy_assessment_functions import get_latest_assessment_by_uprn from backend.app.db.models.portfolio import rating_lookup @@ -32,7 +32,6 @@ from backend.app.utils import epc_to_sap_lower_bound, sap_to_epc from backend.ml_models.api import ModelApi from backend.Property import Property from backend.apis.GoogleSolarApi import GoogleSolarApi -from etl.solar.SolarPhotoSupply import SolarPhotoSupply from recommendations.optimiser.CostOptimiser import CostOptimiser from recommendations.optimiser.GainOptimiser import GainOptimiser @@ -42,7 +41,10 @@ from recommendations.Mds import Mds from utils.logger import setup_logger from utils.s3 import read_dataframe_from_s3_parquet, read_csv_from_s3 from backend.ml_models.Valuation import PropertyValuation + from etl.bill_savings.EnergyConsumptionModel import EnergyConsumptionModel +from etl.spatial.OpenUprnClient import OpenUprnClient +from etl.solar.SolarPhotoSupply import SolarPhotoSupply logger = setup_logger() @@ -414,9 +416,6 @@ async def trigger_plan(body: PlanTriggerRequest): materials = get_materials(session) cleaned = get_cleaned() - uprn_filenames = read_dataframe_from_s3_parquet( - bucket_name=get_settings().DATA_BUCKET, file_key="spatial/filename_meta.parquet" - ) solar_api_client = GoogleSolarApi(api_key=get_settings().GOOGLE_SOLAR_API_KEY) dataset_version = "2024-07-08" @@ -559,33 +558,11 @@ async def trigger_plan(body: PlanTriggerRequest): extract_ids=False ) - # TODO: Move this/tidy it up - uprn_map = {} - for uprn in [p.uprn for p in input_properties]: - filtered_df = uprn_filenames[ - (uprn_filenames["lower"] <= int(uprn)) - & (uprn_filenames["upper"] >= int(uprn)) - ] - if filtered_df["filenames"].values[0] in uprn_map: - uprn_map[filtered_df["filenames"].values[0]].append(int(uprn)) - else: - uprn_map[filtered_df["filenames"].values[0]] = [int(uprn)] - - for filename, associated_uprn in tqdm(uprn_map.items(), total=len(uprn_map)): - # Read in the file - spatial_data = read_dataframe_from_s3_parquet( - bucket_name="retrofit-data-dev", file_key=f"spatial/{filename}" - ) - - spatial_df = spatial_data[spatial_data["UPRN"].isin(associated_uprn)] - for p in input_properties: - if p.uprn in associated_uprn: - p.set_spatial(spatial_df[spatial_df["UPRN"] == p.uprn]) + # Insert the spatial data + input_properties = OpenUprnClient.set_spatial_data(input_properties, bucket_name=get_settings().DATA_BUCKET) logger.info("Getting spatial data") for p in tqdm(input_properties): - if p.spatial is None: - raise Exception("Missed setting of spatial data for a property") p.get_components( cleaned=cleaned, energy_consumption_client=energy_consumption_client, diff --git a/etl/bill_savings/KwhData.py b/etl/bill_savings/KwhData.py new file mode 100644 index 00000000..e69de29b diff --git a/etl/spatial/OpenUprnClient.py b/etl/spatial/OpenUprnClient.py index 7392c4ac..198f9945 100644 --- a/etl/spatial/OpenUprnClient.py +++ b/etl/spatial/OpenUprnClient.py @@ -3,7 +3,8 @@ from tqdm import tqdm import pandas as pd import geopandas as gpd from utils.logger import setup_logger -from utils.s3 import read_io_from_s3, save_dataframe_to_s3_parquet +from utils.s3 import read_io_from_s3, save_dataframe_to_s3_parquet, read_dataframe_from_s3_parquet +from backend.Property import Property logger = setup_logger() @@ -116,3 +117,44 @@ class OpenUprnClient: file_key=file_key, bucket_name=bucket_name ) + + @staticmethod + def set_spatial_data(input_properties: list[Property], bucket_name): + """ + Given a list of properties, this method will set the spatial data for each property + The method will look for the minimal set of uprn datasets that it needs to read in to get all of the spatial + data for the properties + """ + + uprn_filenames = read_dataframe_from_s3_parquet( + bucket_name=bucket_name, file_key="spatial/filename_meta.parquet" + ) + + uprn_map = {} + for uprn in [p.uprn for p in input_properties]: + filtered_df = uprn_filenames[ + (uprn_filenames["lower"] <= int(uprn)) + & (uprn_filenames["upper"] >= int(uprn)) + ] + if filtered_df["filenames"].values[0] in uprn_map: + uprn_map[filtered_df["filenames"].values[0]].append(int(uprn)) + else: + uprn_map[filtered_df["filenames"].values[0]] = [int(uprn)] + + for filename, associated_uprn in tqdm(uprn_map.items(), total=len(uprn_map)): + # Read in the file + spatial_data = read_dataframe_from_s3_parquet( + bucket_name="retrofit-data-dev", file_key=f"spatial/{filename}" + ) + + spatial_df = spatial_data[spatial_data["UPRN"].isin(associated_uprn)] + for p in input_properties: + if p.uprn in associated_uprn: + p.set_spatial(spatial_df[spatial_df["UPRN"] == p.uprn]) + + # Perform a final check to ensure that all properties have spatial data + for p in input_properties: + if p.spatial is None: + raise Exception(f"Property with UPRN {p.uprn} does not have spatial data") + + return input_properties