From 3d28af415349d44a84bb58a740f0890cfc3c6a5b Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Mon, 21 Oct 2024 17:04:37 +0100
Subject: [PATCH] updating plan trigger for new pydantic

---
 .idea/Model.iml                            |   2 +-
 .idea/misc.xml                             |   2 +-
 backend/app/config.py                      |   2 +-
 backend/app/plan/schemas.py                | 140 +++++++--------------
 backend/requirements/requirements.txt      |  16 +++
 etl/bill_savings/EnergyConsumptionModel.py |  56 ++++-----
 etl/spatial/OpenUprnClient.py              |  12 --
 etl/spatial/app.py                         |  13 +-
 8 files changed, 103 insertions(+), 140 deletions(-)
diff --git a/.idea/Model.iml b/.idea/Model.iml
index cfc6ba61..df6c4faa 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Fastapi-Backend" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Fastapi-backend" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
   <component name="PyNamespacePackagesService">
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 4caca8d5..50cad4ca 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
   <component name="Black">
     <option name="sdkName" value="Python 3.10 (backend)" />
   </component>
-  <component name="ProjectRootManager" version="2" project-jdk-name="Fastapi-Backend" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Fastapi-backend" project-jdk-type="Python SDK" />
   <component name="PyCharmProfessionalAdvertiser">
     <option name="shown" value="true" />
   </component>
diff --git a/backend/app/config.py b/backend/app/config.py
index 9aaa0a52..21e8f21c 100644
--- a/backend/app/config.py
+++ b/backend/app/config.py
@@ -1,5 +1,5 @@
 from functools import lru_cache
-from pydantic import BaseSettings
+from pydantic_settings import BaseSettings
 
 
 class Settings(BaseSettings):
diff --git a/backend/app/plan/schemas.py b/backend/app/plan/schemas.py
index e0c5f35d..c1a51769 100644
--- a/backend/app/plan/schemas.py
+++ b/backend/app/plan/schemas.py
@@ -1,60 +1,25 @@
-from pydantic import BaseModel, conlist, validator
-from typing import Optional
+from pydantic import BaseModel, Field, BeforeValidator, field_validator
+from typing import Annotated, List, Optional
 
+# Example constants for validation
 TYPICAL_MEASURE_TYPES = [
-    "wall_insulation",
-    "roof_insulation",
-    "ventilation",
-    "floor_insulation",
-    "windows",
-    "fireplace",
-    "heating",
-    "hot_water",
-    "low_energy_lighting",
-    "secondary_heating",
-    "solar_pv"
+    "wall_insulation", "roof_insulation", "ventilation", "floor_insulation",
+    "windows", "fireplace", "heating", "hot_water", "low_energy_lighting",
+    "secondary_heating", "solar_pv"
 ]
 
 SPECIFIC_MEASURES = [
-    # Specific measures
-    # Walls
-    "internal_wall_insulation",
-    "external_wall_insulation",
-    "cavity_wall_insulation",
-    # Roof
-    "loft_insulation",
-    "flat_roof_insulation",
-    "room_roof_insulation",
-    # Floor
-    "suspended_floor_insulation",
-    "solid_floor_insulation",
-    # Heating
-    "boiler_upgrade",
-    "high_heat_retention_storage_heater",
-    "air_source_heat_pump",
-    "secondary_heating",
-    # Solar
-    "solar_pv",
-    # Windows Glazing
-    "double_glazing",
-    "secondary_glazing",
-    # Mechanical ventilation
-    "ventilation",
-    # Other
-    "low_energy_lighting",
-    "fireplace",
-    "hot_water",
+    "internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation",
+    "loft_insulation", "flat_roof_insulation", "room_roof_insulation",
+    "suspended_floor_insulation", "solid_floor_insulation",
+    "boiler_upgrade", "high_heat_retention_storage_heater", "air_source_heat_pump",
+    "secondary_heating", "solar_pv", "double_glazing", "secondary_glazing",
+    "ventilation", "low_energy_lighting", "fireplace", "hot_water"
 ]
 
 NON_INVASIVE_SPECIFIC_MEASURES = [
-    # Specific measures that will typically come from an energy assessment
-    "trickle_vents",
-    "draught_proofing",
-    "mixed_glazing",  # This covers partial double glazing and secondary glazing
-    "cavity_extract_and_refill",
-    # Indicates that there is one (need to handle the case where there are multiple)
-    # extension that requires cavity wall insulation
-    "extension_cavity_wall_insulation",
+    "trickle_vents", "draught_proofing", "mixed_glazing", "cavity_extract_and_refill",
+    "extension_cavity_wall_insulation"
 ]
 
 # This allows us to extend high level categories for measures such as "wall_insulation" to the specific measures
@@ -70,11 +35,37 @@ MEASURE_MAP = {
     "heating_controls": ["roomstat_programmer_trvs", "time_temperature_zone_control"]
 }
 
+VALID_GOALS = ["Increasing EPC"]
+VALID_HOUSING_TYPES = ["Social", "Private"]
+
+
+# Define the validation function for inclusions/exclusions
+def check_inclusion_or_exclusion(value: str) -> str:
+    if value not in TYPICAL_MEASURE_TYPES + SPECIFIC_MEASURES + NON_INVASIVE_SPECIFIC_MEASURES:
+        raise ValueError(f"{value} is not an allowed inclusion")
+    return value
+
+
+def check_goals(value: str) -> str:
+    assert value in VALID_GOALS, f"{value} is not a valid goal"
+    return value
+
+
+def check_housing_type(value: str) -> str:
+    assert value in VALID_HOUSING_TYPES, f"{value} is not a valid housing type"
+    return value
+
+
+# Use Annotated with BeforeValidator for each list item validation
+InclusionOrExclusionItem = Annotated[str, BeforeValidator(check_inclusion_or_exclusion)]
+Goal = Annotated[str, BeforeValidator(check_goals)]
+HousingType = Annotated[str, BeforeValidator(check_housing_type)]
+
 
 class PlanTriggerRequest(BaseModel):
     budget: Optional[float] = None
-    goal: str
-    housing_type: str
+    goal: Goal
+    housing_type: HousingType
     goal_value: str
     portfolio_id: int
     trigger_file_path: str
@@ -82,53 +73,10 @@ class PlanTriggerRequest(BaseModel):
     patches_file_path: Optional[str] = None
     non_invasive_recommendations_file_path: Optional[str] = None
     valuation_file_path: Optional[str] = None
-    exclusions: Optional[conlist(str, min_items=1)] = None
-    inclusions: Optional[conlist(str, min_items=1)] = None
+    exclusions: Optional[List[InclusionOrExclusionItem]] = Field(default=None, min_length=1)
+    inclusions: Optional[List[InclusionOrExclusionItem]] = Field(default=None, min_length=1)
 
     scenario_name: Optional[str] = ""
-    # If true, will allow us to create multiple plans for the same portfolio, whereas if this is false, if this property
-    # exists in the portfolio, it will be ignored
     multi_plan: Optional[bool] = False
-
-    # if False, allows optimisation to be switched off
     optimise: Optional[bool] = True
-
-    # If True, uses default u-values for models
     default_u_values: Optional[bool] = True
-
-    _allowed_goals = {"Increasing EPC"}
-
-    _allowed_housing_types = {"Social", "Private"}
-
-    # Validator to ensure exclusions are within the pre-defined possibilities
-    @validator('exclusions', each_item=True)
-    def check_exclusions(cls, v):
-        if v not in TYPICAL_MEASURE_TYPES + SPECIFIC_MEASURES + NON_INVASIVE_SPECIFIC_MEASURES:
-            raise ValueError(f"{v} is not an allowed exclusion")
-        return v
-
-    @validator('inclusions', each_item=True)
-    def check_inclusions(cls, v):
-        if v not in TYPICAL_MEASURE_TYPES + SPECIFIC_MEASURES + NON_INVASIVE_SPECIFIC_MEASURES:
-            raise ValueError(f"{v} is not an allowed inclusion")
-        return v
-
-    # Validator to ensure that the goal is within the pre-defined possibilities
-    @validator('goal')
-    def check_goal(cls, v):
-        if v not in cls._allowed_goals:
-            raise ValueError(f"{v} is not a valid goal")
-        return v
-
-    # Validator to ensure that the housing type is within the pre-defined possibilities
-    @validator('housing_type')
-    def check_housing_type(cls, v):
-        if v not in cls._allowed_housing_types:
-            raise ValueError(f"{v} is not a valid housing type")
-        return v
-
-
-class MdsRequest(PlanTriggerRequest):
-    # When creating the mds report, we allow an optional list of measures to select from. If this is passed, it will
-    # cause the service to select the optimal package from the list of measures
-    measures: Optional[conlist(str, min_items=1)] = None
diff --git a/backend/requirements/requirements.txt b/backend/requirements/requirements.txt
index f1fa45cb..82e44fcf 100644
--- a/backend/requirements/requirements.txt
+++ b/backend/requirements/requirements.txt
@@ -8,3 +8,19 @@ tqdm==4.66.5
 # fastapi
 fastapi==0.115.2
 sqlalchemy==2.0.36
+pydantic-settings==2.6.0
+psycopg2-binary==2.9.10
+python-jose==3.3.0
+cryptography==43.0.3
+# AWS
+boto3==1.35.44
+# ML, Data, Data Science
+usaddress==0.5.11
+epc-api-python==1.0.2
+fuzzywuzzy==0.18.0
+python-Levenshtein==0.26.0
+textblob==0.18.0.post0
+msgpack==1.1.0
+scikit-learn==1.5.2
+cffi==1.15.1
+mip==1.15.0
diff --git a/etl/bill_savings/EnergyConsumptionModel.py b/etl/bill_savings/EnergyConsumptionModel.py
index 4daf2b31..153f4ee2 100644
--- a/etl/bill_savings/EnergyConsumptionModel.py
+++ b/etl/bill_savings/EnergyConsumptionModel.py
@@ -1,6 +1,6 @@
 import pandas as pd
 import numpy as np
-from xgboost import XGBRegressor
+# from xgboost import XGBRegressor
 from sklearn.model_selection import train_test_split
 from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
 from sklearn.feature_selection import RFECV
@@ -278,33 +278,33 @@ class EnergyConsumptionModel:
 
         logger.info(f"Feature selection completed for target {target}")
 
-    def init_model(self, feature_selection=False):
-
-        if feature_selection:
-            # Set up a smaller model to work it
-            return XGBRegressor(
-                objective='reg:squarederror',
-                n_estimators=50,
-                learning_rate=0.05,
-                max_depth=6,
-                subsample=0.8,
-                colsample_bytree=0.8,
-                reg_alpha=0.1,
-                reg_lambda=0.1
-            )
-
-        return XGBRegressor(
-            objective='reg:squarederror',
-            n_estimators=1000,
-            learning_rate=0.05,
-            max_depth=6,
-            min_child_weight=3,
-            subsample=0.8,
-            colsample_bytree=0.8,
-            reg_alpha=0.1,
-            reg_lambda=0.1
-            # n_jobs=self.n_jobs
-        )
+    # def init_model(self, feature_selection=False):
+    #
+    #     if feature_selection:
+    #         # Set up a smaller model to work it
+    #         return XGBRegressor(
+    #             objective='reg:squarederror',
+    #             n_estimators=50,
+    #             learning_rate=0.05,
+    #             max_depth=6,
+    #             subsample=0.8,
+    #             colsample_bytree=0.8,
+    #             reg_alpha=0.1,
+    #             reg_lambda=0.1
+    #         )
+    #
+    #     return XGBRegressor(
+    #         objective='reg:squarederror',
+    #         n_estimators=1000,
+    #         learning_rate=0.05,
+    #         max_depth=6,
+    #         min_child_weight=3,
+    #         subsample=0.8,
+    #         colsample_bytree=0.8,
+    #         reg_alpha=0.1,
+    #         reg_lambda=0.1
+    #         # n_jobs=self.n_jobs
+    #     )
 
     def fit_model(self, target):
         """Fits the model to the training data and removes zero-importance features."""
diff --git a/etl/spatial/OpenUprnClient.py b/etl/spatial/OpenUprnClient.py
index 5c43347a..c0cd3992 100644
--- a/etl/spatial/OpenUprnClient.py
+++ b/etl/spatial/OpenUprnClient.py
@@ -1,7 +1,6 @@
 import os
 from tqdm import tqdm
 import pandas as pd
-import geopandas as gpd
 from utils.logger import setup_logger
 from utils.s3 import read_io_from_s3, save_dataframe_to_s3_parquet, read_dataframe_from_s3_parquet
 from backend.Property import Property
@@ -86,17 +85,6 @@ class OpenUprnClient:
                 return filename
         return None
 
-    @staticmethod
-    def convert_bng_data_to_gpd(df):
-
-        gpd_data = gpd.GeoDataFrame(
-            df,
-            geometry=gpd.points_from_xy(df.X_COORDINATE, df.Y_COORDINATE),
-            crs="EPSG:27700"  # British National Grid
-        )
-
-        return gpd_data
-
     def save_filenames_to_s3(self, bucket_name):
         """
         Save the filenames to s3
diff --git a/etl/spatial/app.py b/etl/spatial/app.py
index d58509dd..e8055432 100644
--- a/etl/spatial/app.py
+++ b/etl/spatial/app.py
@@ -6,6 +6,7 @@ our database for querying from other services
 import os
 from tqdm import tqdm
 import pandas as pd
+import geopandas as gpd
 from etl.spatial.ConservationAreaClient import ConservationAreaClient
 from etl.spatial.OpenUprnClient import OpenUprnClient
 from etl.spatial.SpecialBuildingsClient import SpecialBuildingsClient
@@ -25,6 +26,16 @@ HISTORIC_ENGLAND_HERITAGE_BUILDINGS_PATHNAME = \
 logger = setup_logger()
 
 
+def convert_bng_data_to_gpd(df):
+    gpd_data = gpd.GeoDataFrame(
+        df,
+        geometry=gpd.points_from_xy(df.X_COORDINATE, df.Y_COORDINATE),
+        crs="EPSG:27700"  # British National Grid
+    )
+
+    return gpd_data
+
+
 def app():
     """
     This application uses the conservation area datasets to determine if a UPRN is
@@ -85,7 +96,7 @@ def app():
     to_loop_over = open_uprn_client.data.groupby("filename")
 
     for filename, uprn_df in tqdm(open_uprn_client.data.groupby("filename"), total=len(to_loop_over)):
-        uprn_gdf = OpenUprnClient.convert_bng_data_to_gpd(uprn_df)
+        uprn_gdf = convert_bng_data_to_gpd(uprn_df)
 
         uprn_gdf = conservation_area_client.is_in_conservation_area_vectorised(uprn_gdf=uprn_gdf)
         uprn_gdf = special_buildings_client.is_listed_building_vectorised(uprn_gdf=uprn_gdf)