From 3d28af415349d44a84bb58a740f0890cfc3c6a5b Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Mon, 21 Oct 2024 17:04:37 +0100 Subject: [PATCH] updating plan trigger for new pydantic --- .idea/Model.iml | 2 +- .idea/misc.xml | 2 +- backend/app/config.py | 2 +- backend/app/plan/schemas.py | 140 +++++++-------------- backend/requirements/requirements.txt | 16 +++ etl/bill_savings/EnergyConsumptionModel.py | 56 ++++----- etl/spatial/OpenUprnClient.py | 12 -- etl/spatial/app.py | 13 +- 8 files changed, 103 insertions(+), 140 deletions(-) diff --git a/.idea/Model.iml b/.idea/Model.iml index cfc6ba61..df6c4faa 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -7,7 +7,7 @@ - + diff --git a/.idea/misc.xml b/.idea/misc.xml index 4caca8d5..50cad4ca 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -3,7 +3,7 @@ - + diff --git a/backend/app/config.py b/backend/app/config.py index 9aaa0a52..21e8f21c 100644 --- a/backend/app/config.py +++ b/backend/app/config.py @@ -1,5 +1,5 @@ from functools import lru_cache -from pydantic import BaseSettings +from pydantic_settings import BaseSettings class Settings(BaseSettings): diff --git a/backend/app/plan/schemas.py b/backend/app/plan/schemas.py index e0c5f35d..c1a51769 100644 --- a/backend/app/plan/schemas.py +++ b/backend/app/plan/schemas.py @@ -1,60 +1,25 @@ -from pydantic import BaseModel, conlist, validator -from typing import Optional +from pydantic import BaseModel, Field, BeforeValidator, field_validator +from typing import Annotated, List, Optional +# Example constants for validation TYPICAL_MEASURE_TYPES = [ - "wall_insulation", - "roof_insulation", - "ventilation", - "floor_insulation", - "windows", - "fireplace", - "heating", - "hot_water", - "low_energy_lighting", - "secondary_heating", - "solar_pv" + "wall_insulation", "roof_insulation", "ventilation", "floor_insulation", + "windows", "fireplace", "heating", "hot_water", "low_energy_lighting", + "secondary_heating", "solar_pv" ] SPECIFIC_MEASURES = [ - # Specific measures - # Walls - "internal_wall_insulation", - "external_wall_insulation", - "cavity_wall_insulation", - # Roof - "loft_insulation", - "flat_roof_insulation", - "room_roof_insulation", - # Floor - "suspended_floor_insulation", - "solid_floor_insulation", - # Heating - "boiler_upgrade", - "high_heat_retention_storage_heater", - "air_source_heat_pump", - "secondary_heating", - # Solar - "solar_pv", - # Windows Glazing - "double_glazing", - "secondary_glazing", - # Mechanical ventilation - "ventilation", - # Other - "low_energy_lighting", - "fireplace", - "hot_water", + "internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation", + "loft_insulation", "flat_roof_insulation", "room_roof_insulation", + "suspended_floor_insulation", "solid_floor_insulation", + "boiler_upgrade", "high_heat_retention_storage_heater", "air_source_heat_pump", + "secondary_heating", "solar_pv", "double_glazing", "secondary_glazing", + "ventilation", "low_energy_lighting", "fireplace", "hot_water" ] NON_INVASIVE_SPECIFIC_MEASURES = [ - # Specific measures that will typically come from an energy assessment - "trickle_vents", - "draught_proofing", - "mixed_glazing", # This covers partial double glazing and secondary glazing - "cavity_extract_and_refill", - # Indicates that there is one (need to handle the case where there are multiple) - # extension that requires cavity wall insulation - "extension_cavity_wall_insulation", + "trickle_vents", "draught_proofing", "mixed_glazing", "cavity_extract_and_refill", + "extension_cavity_wall_insulation" ] # This allows us to extend high level categories for measures such as "wall_insulation" to the specific measures @@ -70,11 +35,37 @@ MEASURE_MAP = { "heating_controls": ["roomstat_programmer_trvs", "time_temperature_zone_control"] } +VALID_GOALS = ["Increasing EPC"] +VALID_HOUSING_TYPES = ["Social", "Private"] + + +# Define the validation function for inclusions/exclusions +def check_inclusion_or_exclusion(value: str) -> str: + if value not in TYPICAL_MEASURE_TYPES + SPECIFIC_MEASURES + NON_INVASIVE_SPECIFIC_MEASURES: + raise ValueError(f"{value} is not an allowed inclusion") + return value + + +def check_goals(value: str) -> str: + assert value in VALID_GOALS, f"{value} is not a valid goal" + return value + + +def check_housing_type(value: str) -> str: + assert value in VALID_HOUSING_TYPES, f"{value} is not a valid housing type" + return value + + +# Use Annotated with BeforeValidator for each list item validation +InclusionOrExclusionItem = Annotated[str, BeforeValidator(check_inclusion_or_exclusion)] +Goal = Annotated[str, BeforeValidator(check_goals)] +HousingType = Annotated[str, BeforeValidator(check_housing_type)] + class PlanTriggerRequest(BaseModel): budget: Optional[float] = None - goal: str - housing_type: str + goal: Goal + housing_type: HousingType goal_value: str portfolio_id: int trigger_file_path: str @@ -82,53 +73,10 @@ class PlanTriggerRequest(BaseModel): patches_file_path: Optional[str] = None non_invasive_recommendations_file_path: Optional[str] = None valuation_file_path: Optional[str] = None - exclusions: Optional[conlist(str, min_items=1)] = None - inclusions: Optional[conlist(str, min_items=1)] = None + exclusions: Optional[List[InclusionOrExclusionItem]] = Field(default=None, min_length=1) + inclusions: Optional[List[InclusionOrExclusionItem]] = Field(default=None, min_length=1) scenario_name: Optional[str] = "" - # If true, will allow us to create multiple plans for the same portfolio, whereas if this is false, if this property - # exists in the portfolio, it will be ignored multi_plan: Optional[bool] = False - - # if False, allows optimisation to be switched off optimise: Optional[bool] = True - - # If True, uses default u-values for models default_u_values: Optional[bool] = True - - _allowed_goals = {"Increasing EPC"} - - _allowed_housing_types = {"Social", "Private"} - - # Validator to ensure exclusions are within the pre-defined possibilities - @validator('exclusions', each_item=True) - def check_exclusions(cls, v): - if v not in TYPICAL_MEASURE_TYPES + SPECIFIC_MEASURES + NON_INVASIVE_SPECIFIC_MEASURES: - raise ValueError(f"{v} is not an allowed exclusion") - return v - - @validator('inclusions', each_item=True) - def check_inclusions(cls, v): - if v not in TYPICAL_MEASURE_TYPES + SPECIFIC_MEASURES + NON_INVASIVE_SPECIFIC_MEASURES: - raise ValueError(f"{v} is not an allowed inclusion") - return v - - # Validator to ensure that the goal is within the pre-defined possibilities - @validator('goal') - def check_goal(cls, v): - if v not in cls._allowed_goals: - raise ValueError(f"{v} is not a valid goal") - return v - - # Validator to ensure that the housing type is within the pre-defined possibilities - @validator('housing_type') - def check_housing_type(cls, v): - if v not in cls._allowed_housing_types: - raise ValueError(f"{v} is not a valid housing type") - return v - - -class MdsRequest(PlanTriggerRequest): - # When creating the mds report, we allow an optional list of measures to select from. If this is passed, it will - # cause the service to select the optimal package from the list of measures - measures: Optional[conlist(str, min_items=1)] = None diff --git a/backend/requirements/requirements.txt b/backend/requirements/requirements.txt index f1fa45cb..82e44fcf 100644 --- a/backend/requirements/requirements.txt +++ b/backend/requirements/requirements.txt @@ -8,3 +8,19 @@ tqdm==4.66.5 # fastapi fastapi==0.115.2 sqlalchemy==2.0.36 +pydantic-settings==2.6.0 +psycopg2-binary==2.9.10 +python-jose==3.3.0 +cryptography==43.0.3 +# AWS +boto3==1.35.44 +# ML, Data, Data Science +usaddress==0.5.11 +epc-api-python==1.0.2 +fuzzywuzzy==0.18.0 +python-Levenshtein==0.26.0 +textblob==0.18.0.post0 +msgpack==1.1.0 +scikit-learn==1.5.2 +cffi==1.15.1 +mip==1.15.0 diff --git a/etl/bill_savings/EnergyConsumptionModel.py b/etl/bill_savings/EnergyConsumptionModel.py index 4daf2b31..153f4ee2 100644 --- a/etl/bill_savings/EnergyConsumptionModel.py +++ b/etl/bill_savings/EnergyConsumptionModel.py @@ -1,6 +1,6 @@ import pandas as pd import numpy as np -from xgboost import XGBRegressor +# from xgboost import XGBRegressor from sklearn.model_selection import train_test_split from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error from sklearn.feature_selection import RFECV @@ -278,33 +278,33 @@ class EnergyConsumptionModel: logger.info(f"Feature selection completed for target {target}") - def init_model(self, feature_selection=False): - - if feature_selection: - # Set up a smaller model to work it - return XGBRegressor( - objective='reg:squarederror', - n_estimators=50, - learning_rate=0.05, - max_depth=6, - subsample=0.8, - colsample_bytree=0.8, - reg_alpha=0.1, - reg_lambda=0.1 - ) - - return XGBRegressor( - objective='reg:squarederror', - n_estimators=1000, - learning_rate=0.05, - max_depth=6, - min_child_weight=3, - subsample=0.8, - colsample_bytree=0.8, - reg_alpha=0.1, - reg_lambda=0.1 - # n_jobs=self.n_jobs - ) + # def init_model(self, feature_selection=False): + # + # if feature_selection: + # # Set up a smaller model to work it + # return XGBRegressor( + # objective='reg:squarederror', + # n_estimators=50, + # learning_rate=0.05, + # max_depth=6, + # subsample=0.8, + # colsample_bytree=0.8, + # reg_alpha=0.1, + # reg_lambda=0.1 + # ) + # + # return XGBRegressor( + # objective='reg:squarederror', + # n_estimators=1000, + # learning_rate=0.05, + # max_depth=6, + # min_child_weight=3, + # subsample=0.8, + # colsample_bytree=0.8, + # reg_alpha=0.1, + # reg_lambda=0.1 + # # n_jobs=self.n_jobs + # ) def fit_model(self, target): """Fits the model to the training data and removes zero-importance features.""" diff --git a/etl/spatial/OpenUprnClient.py b/etl/spatial/OpenUprnClient.py index 5c43347a..c0cd3992 100644 --- a/etl/spatial/OpenUprnClient.py +++ b/etl/spatial/OpenUprnClient.py @@ -1,7 +1,6 @@ import os from tqdm import tqdm import pandas as pd -import geopandas as gpd from utils.logger import setup_logger from utils.s3 import read_io_from_s3, save_dataframe_to_s3_parquet, read_dataframe_from_s3_parquet from backend.Property import Property @@ -86,17 +85,6 @@ class OpenUprnClient: return filename return None - @staticmethod - def convert_bng_data_to_gpd(df): - - gpd_data = gpd.GeoDataFrame( - df, - geometry=gpd.points_from_xy(df.X_COORDINATE, df.Y_COORDINATE), - crs="EPSG:27700" # British National Grid - ) - - return gpd_data - def save_filenames_to_s3(self, bucket_name): """ Save the filenames to s3 diff --git a/etl/spatial/app.py b/etl/spatial/app.py index d58509dd..e8055432 100644 --- a/etl/spatial/app.py +++ b/etl/spatial/app.py @@ -6,6 +6,7 @@ our database for querying from other services import os from tqdm import tqdm import pandas as pd +import geopandas as gpd from etl.spatial.ConservationAreaClient import ConservationAreaClient from etl.spatial.OpenUprnClient import OpenUprnClient from etl.spatial.SpecialBuildingsClient import SpecialBuildingsClient @@ -25,6 +26,16 @@ HISTORIC_ENGLAND_HERITAGE_BUILDINGS_PATHNAME = \ logger = setup_logger() +def convert_bng_data_to_gpd(df): + gpd_data = gpd.GeoDataFrame( + df, + geometry=gpd.points_from_xy(df.X_COORDINATE, df.Y_COORDINATE), + crs="EPSG:27700" # British National Grid + ) + + return gpd_data + + def app(): """ This application uses the conservation area datasets to determine if a UPRN is @@ -85,7 +96,7 @@ def app(): to_loop_over = open_uprn_client.data.groupby("filename") for filename, uprn_df in tqdm(open_uprn_client.data.groupby("filename"), total=len(to_loop_over)): - uprn_gdf = OpenUprnClient.convert_bng_data_to_gpd(uprn_df) + uprn_gdf = convert_bng_data_to_gpd(uprn_df) uprn_gdf = conservation_area_client.is_in_conservation_area_vectorised(uprn_gdf=uprn_gdf) uprn_gdf = special_buildings_client.is_listed_building_vectorised(uprn_gdf=uprn_gdf)