tidied up kwh model data

This commit is contained in:
Khalim Conn-Kowlessar 2024-08-09 10:26:25 +01:00
parent 935cfb24cf
commit 66d2a401e8
3 changed files with 49 additions and 30 deletions

View file

@ -20,7 +20,7 @@ from backend.app.db.functions.property_functions import (
update_or_create_property_spatial_details
)
from backend.app.db.functions.recommendations_functions import (
create_plan, create_plan_recommendations, upload_recommendations, create_scenario
create_plan, upload_recommendations, create_scenario
)
from backend.app.db.functions.energy_assessment_functions import get_latest_assessment_by_uprn
from backend.app.db.models.portfolio import rating_lookup
@ -32,7 +32,6 @@ from backend.app.utils import epc_to_sap_lower_bound, sap_to_epc
from backend.ml_models.api import ModelApi
from backend.Property import Property
from backend.apis.GoogleSolarApi import GoogleSolarApi
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
from recommendations.optimiser.CostOptimiser import CostOptimiser
from recommendations.optimiser.GainOptimiser import GainOptimiser
@ -42,7 +41,10 @@ from recommendations.Mds import Mds
from utils.logger import setup_logger
from utils.s3 import read_dataframe_from_s3_parquet, read_csv_from_s3
from backend.ml_models.Valuation import PropertyValuation
from etl.bill_savings.EnergyConsumptionModel import EnergyConsumptionModel
from etl.spatial.OpenUprnClient import OpenUprnClient
from etl.solar.SolarPhotoSupply import SolarPhotoSupply
logger = setup_logger()
@ -414,9 +416,6 @@ async def trigger_plan(body: PlanTriggerRequest):
materials = get_materials(session)
cleaned = get_cleaned()
uprn_filenames = read_dataframe_from_s3_parquet(
bucket_name=get_settings().DATA_BUCKET, file_key="spatial/filename_meta.parquet"
)
solar_api_client = GoogleSolarApi(api_key=get_settings().GOOGLE_SOLAR_API_KEY)
dataset_version = "2024-07-08"
@ -559,33 +558,11 @@ async def trigger_plan(body: PlanTriggerRequest):
extract_ids=False
)
# TODO: Move this/tidy it up
uprn_map = {}
for uprn in [p.uprn for p in input_properties]:
filtered_df = uprn_filenames[
(uprn_filenames["lower"] <= int(uprn))
& (uprn_filenames["upper"] >= int(uprn))
]
if filtered_df["filenames"].values[0] in uprn_map:
uprn_map[filtered_df["filenames"].values[0]].append(int(uprn))
else:
uprn_map[filtered_df["filenames"].values[0]] = [int(uprn)]
for filename, associated_uprn in tqdm(uprn_map.items(), total=len(uprn_map)):
# Read in the file
spatial_data = read_dataframe_from_s3_parquet(
bucket_name="retrofit-data-dev", file_key=f"spatial/{filename}"
)
spatial_df = spatial_data[spatial_data["UPRN"].isin(associated_uprn)]
for p in input_properties:
if p.uprn in associated_uprn:
p.set_spatial(spatial_df[spatial_df["UPRN"] == p.uprn])
# Insert the spatial data
input_properties = OpenUprnClient.set_spatial_data(input_properties, bucket_name=get_settings().DATA_BUCKET)
logger.info("Getting spatial data")
for p in tqdm(input_properties):
if p.spatial is None:
raise Exception("Missed setting of spatial data for a property")
p.get_components(
cleaned=cleaned,
energy_consumption_client=energy_consumption_client,

View file

View file

@ -3,7 +3,8 @@ from tqdm import tqdm
import pandas as pd
import geopandas as gpd
from utils.logger import setup_logger
from utils.s3 import read_io_from_s3, save_dataframe_to_s3_parquet
from utils.s3 import read_io_from_s3, save_dataframe_to_s3_parquet, read_dataframe_from_s3_parquet
from backend.Property import Property
logger = setup_logger()
@ -116,3 +117,44 @@ class OpenUprnClient:
file_key=file_key,
bucket_name=bucket_name
)
@staticmethod
def set_spatial_data(input_properties: list[Property], bucket_name):
"""
Given a list of properties, this method will set the spatial data for each property
The method will look for the minimal set of uprn datasets that it needs to read in to get all of the spatial
data for the properties
"""
uprn_filenames = read_dataframe_from_s3_parquet(
bucket_name=bucket_name, file_key="spatial/filename_meta.parquet"
)
uprn_map = {}
for uprn in [p.uprn for p in input_properties]:
filtered_df = uprn_filenames[
(uprn_filenames["lower"] <= int(uprn))
& (uprn_filenames["upper"] >= int(uprn))
]
if filtered_df["filenames"].values[0] in uprn_map:
uprn_map[filtered_df["filenames"].values[0]].append(int(uprn))
else:
uprn_map[filtered_df["filenames"].values[0]] = [int(uprn)]
for filename, associated_uprn in tqdm(uprn_map.items(), total=len(uprn_map)):
# Read in the file
spatial_data = read_dataframe_from_s3_parquet(
bucket_name="retrofit-data-dev", file_key=f"spatial/{filename}"
)
spatial_df = spatial_data[spatial_data["UPRN"].isin(associated_uprn)]
for p in input_properties:
if p.uprn in associated_uprn:
p.set_spatial(spatial_df[spatial_df["UPRN"] == p.uprn])
# Perform a final check to ensure that all properties have spatial data
for p in input_properties:
if p.spatial is None:
raise Exception(f"Property with UPRN {p.uprn} does not have spatial data")
return input_properties