diff --git a/.idea/Model.iml b/.idea/Model.iml
index cfc6ba61..df6c4faa 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
-
+
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 4caca8d5..50cad4ca 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -3,7 +3,7 @@
-
+
diff --git a/backend/app/config.py b/backend/app/config.py
index 9aaa0a52..21e8f21c 100644
--- a/backend/app/config.py
+++ b/backend/app/config.py
@@ -1,5 +1,5 @@
from functools import lru_cache
-from pydantic import BaseSettings
+from pydantic_settings import BaseSettings
class Settings(BaseSettings):
diff --git a/backend/app/plan/schemas.py b/backend/app/plan/schemas.py
index e0c5f35d..c1a51769 100644
--- a/backend/app/plan/schemas.py
+++ b/backend/app/plan/schemas.py
@@ -1,60 +1,25 @@
-from pydantic import BaseModel, conlist, validator
-from typing import Optional
+from pydantic import BaseModel, Field, BeforeValidator, field_validator
+from typing import Annotated, List, Optional
+# Example constants for validation
TYPICAL_MEASURE_TYPES = [
- "wall_insulation",
- "roof_insulation",
- "ventilation",
- "floor_insulation",
- "windows",
- "fireplace",
- "heating",
- "hot_water",
- "low_energy_lighting",
- "secondary_heating",
- "solar_pv"
+ "wall_insulation", "roof_insulation", "ventilation", "floor_insulation",
+ "windows", "fireplace", "heating", "hot_water", "low_energy_lighting",
+ "secondary_heating", "solar_pv"
]
SPECIFIC_MEASURES = [
- # Specific measures
- # Walls
- "internal_wall_insulation",
- "external_wall_insulation",
- "cavity_wall_insulation",
- # Roof
- "loft_insulation",
- "flat_roof_insulation",
- "room_roof_insulation",
- # Floor
- "suspended_floor_insulation",
- "solid_floor_insulation",
- # Heating
- "boiler_upgrade",
- "high_heat_retention_storage_heater",
- "air_source_heat_pump",
- "secondary_heating",
- # Solar
- "solar_pv",
- # Windows Glazing
- "double_glazing",
- "secondary_glazing",
- # Mechanical ventilation
- "ventilation",
- # Other
- "low_energy_lighting",
- "fireplace",
- "hot_water",
+ "internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation",
+ "loft_insulation", "flat_roof_insulation", "room_roof_insulation",
+ "suspended_floor_insulation", "solid_floor_insulation",
+ "boiler_upgrade", "high_heat_retention_storage_heater", "air_source_heat_pump",
+ "secondary_heating", "solar_pv", "double_glazing", "secondary_glazing",
+ "ventilation", "low_energy_lighting", "fireplace", "hot_water"
]
NON_INVASIVE_SPECIFIC_MEASURES = [
- # Specific measures that will typically come from an energy assessment
- "trickle_vents",
- "draught_proofing",
- "mixed_glazing", # This covers partial double glazing and secondary glazing
- "cavity_extract_and_refill",
- # Indicates that there is one (need to handle the case where there are multiple)
- # extension that requires cavity wall insulation
- "extension_cavity_wall_insulation",
+ "trickle_vents", "draught_proofing", "mixed_glazing", "cavity_extract_and_refill",
+ "extension_cavity_wall_insulation"
]
# This allows us to extend high level categories for measures such as "wall_insulation" to the specific measures
@@ -70,11 +35,37 @@ MEASURE_MAP = {
"heating_controls": ["roomstat_programmer_trvs", "time_temperature_zone_control"]
}
+VALID_GOALS = ["Increasing EPC"]
+VALID_HOUSING_TYPES = ["Social", "Private"]
+
+
+# Define the validation function for inclusions/exclusions
+def check_inclusion_or_exclusion(value: str) -> str:
+ if value not in TYPICAL_MEASURE_TYPES + SPECIFIC_MEASURES + NON_INVASIVE_SPECIFIC_MEASURES:
+ raise ValueError(f"{value} is not an allowed inclusion")
+ return value
+
+
+def check_goals(value: str) -> str:
+ assert value in VALID_GOALS, f"{value} is not a valid goal"
+ return value
+
+
+def check_housing_type(value: str) -> str:
+ assert value in VALID_HOUSING_TYPES, f"{value} is not a valid housing type"
+ return value
+
+
+# Use Annotated with BeforeValidator for each list item validation
+InclusionOrExclusionItem = Annotated[str, BeforeValidator(check_inclusion_or_exclusion)]
+Goal = Annotated[str, BeforeValidator(check_goals)]
+HousingType = Annotated[str, BeforeValidator(check_housing_type)]
+
class PlanTriggerRequest(BaseModel):
budget: Optional[float] = None
- goal: str
- housing_type: str
+ goal: Goal
+ housing_type: HousingType
goal_value: str
portfolio_id: int
trigger_file_path: str
@@ -82,53 +73,10 @@ class PlanTriggerRequest(BaseModel):
patches_file_path: Optional[str] = None
non_invasive_recommendations_file_path: Optional[str] = None
valuation_file_path: Optional[str] = None
- exclusions: Optional[conlist(str, min_items=1)] = None
- inclusions: Optional[conlist(str, min_items=1)] = None
+ exclusions: Optional[List[InclusionOrExclusionItem]] = Field(default=None, min_length=1)
+ inclusions: Optional[List[InclusionOrExclusionItem]] = Field(default=None, min_length=1)
scenario_name: Optional[str] = ""
- # If true, will allow us to create multiple plans for the same portfolio, whereas if this is false, if this property
- # exists in the portfolio, it will be ignored
multi_plan: Optional[bool] = False
-
- # if False, allows optimisation to be switched off
optimise: Optional[bool] = True
-
- # If True, uses default u-values for models
default_u_values: Optional[bool] = True
-
- _allowed_goals = {"Increasing EPC"}
-
- _allowed_housing_types = {"Social", "Private"}
-
- # Validator to ensure exclusions are within the pre-defined possibilities
- @validator('exclusions', each_item=True)
- def check_exclusions(cls, v):
- if v not in TYPICAL_MEASURE_TYPES + SPECIFIC_MEASURES + NON_INVASIVE_SPECIFIC_MEASURES:
- raise ValueError(f"{v} is not an allowed exclusion")
- return v
-
- @validator('inclusions', each_item=True)
- def check_inclusions(cls, v):
- if v not in TYPICAL_MEASURE_TYPES + SPECIFIC_MEASURES + NON_INVASIVE_SPECIFIC_MEASURES:
- raise ValueError(f"{v} is not an allowed inclusion")
- return v
-
- # Validator to ensure that the goal is within the pre-defined possibilities
- @validator('goal')
- def check_goal(cls, v):
- if v not in cls._allowed_goals:
- raise ValueError(f"{v} is not a valid goal")
- return v
-
- # Validator to ensure that the housing type is within the pre-defined possibilities
- @validator('housing_type')
- def check_housing_type(cls, v):
- if v not in cls._allowed_housing_types:
- raise ValueError(f"{v} is not a valid housing type")
- return v
-
-
-class MdsRequest(PlanTriggerRequest):
- # When creating the mds report, we allow an optional list of measures to select from. If this is passed, it will
- # cause the service to select the optimal package from the list of measures
- measures: Optional[conlist(str, min_items=1)] = None
diff --git a/backend/requirements/requirements.txt b/backend/requirements/requirements.txt
index f1fa45cb..82e44fcf 100644
--- a/backend/requirements/requirements.txt
+++ b/backend/requirements/requirements.txt
@@ -8,3 +8,19 @@ tqdm==4.66.5
# fastapi
fastapi==0.115.2
sqlalchemy==2.0.36
+pydantic-settings==2.6.0
+psycopg2-binary==2.9.10
+python-jose==3.3.0
+cryptography==43.0.3
+# AWS
+boto3==1.35.44
+# ML, Data, Data Science
+usaddress==0.5.11
+epc-api-python==1.0.2
+fuzzywuzzy==0.18.0
+python-Levenshtein==0.26.0
+textblob==0.18.0.post0
+msgpack==1.1.0
+scikit-learn==1.5.2
+cffi==1.15.1
+mip==1.15.0
diff --git a/etl/bill_savings/EnergyConsumptionModel.py b/etl/bill_savings/EnergyConsumptionModel.py
index 4daf2b31..153f4ee2 100644
--- a/etl/bill_savings/EnergyConsumptionModel.py
+++ b/etl/bill_savings/EnergyConsumptionModel.py
@@ -1,6 +1,6 @@
import pandas as pd
import numpy as np
-from xgboost import XGBRegressor
+# from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
from sklearn.feature_selection import RFECV
@@ -278,33 +278,33 @@ class EnergyConsumptionModel:
logger.info(f"Feature selection completed for target {target}")
- def init_model(self, feature_selection=False):
-
- if feature_selection:
- # Set up a smaller model to work it
- return XGBRegressor(
- objective='reg:squarederror',
- n_estimators=50,
- learning_rate=0.05,
- max_depth=6,
- subsample=0.8,
- colsample_bytree=0.8,
- reg_alpha=0.1,
- reg_lambda=0.1
- )
-
- return XGBRegressor(
- objective='reg:squarederror',
- n_estimators=1000,
- learning_rate=0.05,
- max_depth=6,
- min_child_weight=3,
- subsample=0.8,
- colsample_bytree=0.8,
- reg_alpha=0.1,
- reg_lambda=0.1
- # n_jobs=self.n_jobs
- )
+ # def init_model(self, feature_selection=False):
+ #
+ # if feature_selection:
+ # # Set up a smaller model to work it
+ # return XGBRegressor(
+ # objective='reg:squarederror',
+ # n_estimators=50,
+ # learning_rate=0.05,
+ # max_depth=6,
+ # subsample=0.8,
+ # colsample_bytree=0.8,
+ # reg_alpha=0.1,
+ # reg_lambda=0.1
+ # )
+ #
+ # return XGBRegressor(
+ # objective='reg:squarederror',
+ # n_estimators=1000,
+ # learning_rate=0.05,
+ # max_depth=6,
+ # min_child_weight=3,
+ # subsample=0.8,
+ # colsample_bytree=0.8,
+ # reg_alpha=0.1,
+ # reg_lambda=0.1
+ # # n_jobs=self.n_jobs
+ # )
def fit_model(self, target):
"""Fits the model to the training data and removes zero-importance features."""
diff --git a/etl/spatial/OpenUprnClient.py b/etl/spatial/OpenUprnClient.py
index 5c43347a..c0cd3992 100644
--- a/etl/spatial/OpenUprnClient.py
+++ b/etl/spatial/OpenUprnClient.py
@@ -1,7 +1,6 @@
import os
from tqdm import tqdm
import pandas as pd
-import geopandas as gpd
from utils.logger import setup_logger
from utils.s3 import read_io_from_s3, save_dataframe_to_s3_parquet, read_dataframe_from_s3_parquet
from backend.Property import Property
@@ -86,17 +85,6 @@ class OpenUprnClient:
return filename
return None
- @staticmethod
- def convert_bng_data_to_gpd(df):
-
- gpd_data = gpd.GeoDataFrame(
- df,
- geometry=gpd.points_from_xy(df.X_COORDINATE, df.Y_COORDINATE),
- crs="EPSG:27700" # British National Grid
- )
-
- return gpd_data
-
def save_filenames_to_s3(self, bucket_name):
"""
Save the filenames to s3
diff --git a/etl/spatial/app.py b/etl/spatial/app.py
index d58509dd..e8055432 100644
--- a/etl/spatial/app.py
+++ b/etl/spatial/app.py
@@ -6,6 +6,7 @@ our database for querying from other services
import os
from tqdm import tqdm
import pandas as pd
+import geopandas as gpd
from etl.spatial.ConservationAreaClient import ConservationAreaClient
from etl.spatial.OpenUprnClient import OpenUprnClient
from etl.spatial.SpecialBuildingsClient import SpecialBuildingsClient
@@ -25,6 +26,16 @@ HISTORIC_ENGLAND_HERITAGE_BUILDINGS_PATHNAME = \
logger = setup_logger()
+def convert_bng_data_to_gpd(df):
+ gpd_data = gpd.GeoDataFrame(
+ df,
+ geometry=gpd.points_from_xy(df.X_COORDINATE, df.Y_COORDINATE),
+ crs="EPSG:27700" # British National Grid
+ )
+
+ return gpd_data
+
+
def app():
"""
This application uses the conservation area datasets to determine if a UPRN is
@@ -85,7 +96,7 @@ def app():
to_loop_over = open_uprn_client.data.groupby("filename")
for filename, uprn_df in tqdm(open_uprn_client.data.groupby("filename"), total=len(to_loop_over)):
- uprn_gdf = OpenUprnClient.convert_bng_data_to_gpd(uprn_df)
+ uprn_gdf = convert_bng_data_to_gpd(uprn_df)
uprn_gdf = conservation_area_client.is_in_conservation_area_vectorised(uprn_gdf=uprn_gdf)
uprn_gdf = special_buildings_client.is_listed_building_vectorised(uprn_gdf=uprn_gdf)