updating plan trigger for new pydantic

This commit is contained in:
Khalim Conn-Kowlessar 2024-10-21 17:04:37 +01:00
parent ed8cbac431
commit 3d28af4153
8 changed files with 103 additions and 140 deletions

2
.idea/Model.iml generated
View file

@ -7,7 +7,7 @@
<sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/recommendations" isTestSource="false" />
</content>
<orderEntry type="jdk" jdkName="Fastapi-Backend" jdkType="Python SDK" />
<orderEntry type="jdk" jdkName="Fastapi-backend" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
<component name="PyNamespacePackagesService">

2
.idea/misc.xml generated
View file

@ -3,7 +3,7 @@
<component name="Black">
<option name="sdkName" value="Python 3.10 (backend)" />
</component>
<component name="ProjectRootManager" version="2" project-jdk-name="Fastapi-Backend" project-jdk-type="Python SDK" />
<component name="ProjectRootManager" version="2" project-jdk-name="Fastapi-backend" project-jdk-type="Python SDK" />
<component name="PyCharmProfessionalAdvertiser">
<option name="shown" value="true" />
</component>

View file

@ -1,5 +1,5 @@
from functools import lru_cache
from pydantic import BaseSettings
from pydantic_settings import BaseSettings
class Settings(BaseSettings):

View file

@ -1,60 +1,25 @@
from pydantic import BaseModel, conlist, validator
from typing import Optional
from pydantic import BaseModel, Field, BeforeValidator, field_validator
from typing import Annotated, List, Optional
# Example constants for validation
TYPICAL_MEASURE_TYPES = [
"wall_insulation",
"roof_insulation",
"ventilation",
"floor_insulation",
"windows",
"fireplace",
"heating",
"hot_water",
"low_energy_lighting",
"secondary_heating",
"solar_pv"
"wall_insulation", "roof_insulation", "ventilation", "floor_insulation",
"windows", "fireplace", "heating", "hot_water", "low_energy_lighting",
"secondary_heating", "solar_pv"
]
SPECIFIC_MEASURES = [
# Specific measures
# Walls
"internal_wall_insulation",
"external_wall_insulation",
"cavity_wall_insulation",
# Roof
"loft_insulation",
"flat_roof_insulation",
"room_roof_insulation",
# Floor
"suspended_floor_insulation",
"solid_floor_insulation",
# Heating
"boiler_upgrade",
"high_heat_retention_storage_heater",
"air_source_heat_pump",
"secondary_heating",
# Solar
"solar_pv",
# Windows Glazing
"double_glazing",
"secondary_glazing",
# Mechanical ventilation
"ventilation",
# Other
"low_energy_lighting",
"fireplace",
"hot_water",
"internal_wall_insulation", "external_wall_insulation", "cavity_wall_insulation",
"loft_insulation", "flat_roof_insulation", "room_roof_insulation",
"suspended_floor_insulation", "solid_floor_insulation",
"boiler_upgrade", "high_heat_retention_storage_heater", "air_source_heat_pump",
"secondary_heating", "solar_pv", "double_glazing", "secondary_glazing",
"ventilation", "low_energy_lighting", "fireplace", "hot_water"
]
NON_INVASIVE_SPECIFIC_MEASURES = [
# Specific measures that will typically come from an energy assessment
"trickle_vents",
"draught_proofing",
"mixed_glazing", # This covers partial double glazing and secondary glazing
"cavity_extract_and_refill",
# Indicates that there is one (need to handle the case where there are multiple)
# extension that requires cavity wall insulation
"extension_cavity_wall_insulation",
"trickle_vents", "draught_proofing", "mixed_glazing", "cavity_extract_and_refill",
"extension_cavity_wall_insulation"
]
# This allows us to extend high level categories for measures such as "wall_insulation" to the specific measures
@ -70,11 +35,37 @@ MEASURE_MAP = {
"heating_controls": ["roomstat_programmer_trvs", "time_temperature_zone_control"]
}
VALID_GOALS = ["Increasing EPC"]
VALID_HOUSING_TYPES = ["Social", "Private"]
# Define the validation function for inclusions/exclusions
def check_inclusion_or_exclusion(value: str) -> str:
if value not in TYPICAL_MEASURE_TYPES + SPECIFIC_MEASURES + NON_INVASIVE_SPECIFIC_MEASURES:
raise ValueError(f"{value} is not an allowed inclusion")
return value
def check_goals(value: str) -> str:
assert value in VALID_GOALS, f"{value} is not a valid goal"
return value
def check_housing_type(value: str) -> str:
assert value in VALID_HOUSING_TYPES, f"{value} is not a valid housing type"
return value
# Use Annotated with BeforeValidator for each list item validation
InclusionOrExclusionItem = Annotated[str, BeforeValidator(check_inclusion_or_exclusion)]
Goal = Annotated[str, BeforeValidator(check_goals)]
HousingType = Annotated[str, BeforeValidator(check_housing_type)]
class PlanTriggerRequest(BaseModel):
budget: Optional[float] = None
goal: str
housing_type: str
goal: Goal
housing_type: HousingType
goal_value: str
portfolio_id: int
trigger_file_path: str
@ -82,53 +73,10 @@ class PlanTriggerRequest(BaseModel):
patches_file_path: Optional[str] = None
non_invasive_recommendations_file_path: Optional[str] = None
valuation_file_path: Optional[str] = None
exclusions: Optional[conlist(str, min_items=1)] = None
inclusions: Optional[conlist(str, min_items=1)] = None
exclusions: Optional[List[InclusionOrExclusionItem]] = Field(default=None, min_length=1)
inclusions: Optional[List[InclusionOrExclusionItem]] = Field(default=None, min_length=1)
scenario_name: Optional[str] = ""
# If true, will allow us to create multiple plans for the same portfolio, whereas if this is false, if this property
# exists in the portfolio, it will be ignored
multi_plan: Optional[bool] = False
# if False, allows optimisation to be switched off
optimise: Optional[bool] = True
# If True, uses default u-values for models
default_u_values: Optional[bool] = True
_allowed_goals = {"Increasing EPC"}
_allowed_housing_types = {"Social", "Private"}
# Validator to ensure exclusions are within the pre-defined possibilities
@validator('exclusions', each_item=True)
def check_exclusions(cls, v):
if v not in TYPICAL_MEASURE_TYPES + SPECIFIC_MEASURES + NON_INVASIVE_SPECIFIC_MEASURES:
raise ValueError(f"{v} is not an allowed exclusion")
return v
@validator('inclusions', each_item=True)
def check_inclusions(cls, v):
if v not in TYPICAL_MEASURE_TYPES + SPECIFIC_MEASURES + NON_INVASIVE_SPECIFIC_MEASURES:
raise ValueError(f"{v} is not an allowed inclusion")
return v
# Validator to ensure that the goal is within the pre-defined possibilities
@validator('goal')
def check_goal(cls, v):
if v not in cls._allowed_goals:
raise ValueError(f"{v} is not a valid goal")
return v
# Validator to ensure that the housing type is within the pre-defined possibilities
@validator('housing_type')
def check_housing_type(cls, v):
if v not in cls._allowed_housing_types:
raise ValueError(f"{v} is not a valid housing type")
return v
class MdsRequest(PlanTriggerRequest):
# When creating the mds report, we allow an optional list of measures to select from. If this is passed, it will
# cause the service to select the optimal package from the list of measures
measures: Optional[conlist(str, min_items=1)] = None

View file

@ -8,3 +8,19 @@ tqdm==4.66.5
# fastapi
fastapi==0.115.2
sqlalchemy==2.0.36
pydantic-settings==2.6.0
psycopg2-binary==2.9.10
python-jose==3.3.0
cryptography==43.0.3
# AWS
boto3==1.35.44
# ML, Data, Data Science
usaddress==0.5.11
epc-api-python==1.0.2
fuzzywuzzy==0.18.0
python-Levenshtein==0.26.0
textblob==0.18.0.post0
msgpack==1.1.0
scikit-learn==1.5.2
cffi==1.15.1
mip==1.15.0

View file

@ -1,6 +1,6 @@
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
# from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_percentage_error
from sklearn.feature_selection import RFECV
@ -278,33 +278,33 @@ class EnergyConsumptionModel:
logger.info(f"Feature selection completed for target {target}")
def init_model(self, feature_selection=False):
if feature_selection:
# Set up a smaller model to work it
return XGBRegressor(
objective='reg:squarederror',
n_estimators=50,
learning_rate=0.05,
max_depth=6,
subsample=0.8,
colsample_bytree=0.8,
reg_alpha=0.1,
reg_lambda=0.1
)
return XGBRegressor(
objective='reg:squarederror',
n_estimators=1000,
learning_rate=0.05,
max_depth=6,
min_child_weight=3,
subsample=0.8,
colsample_bytree=0.8,
reg_alpha=0.1,
reg_lambda=0.1
# n_jobs=self.n_jobs
)
# def init_model(self, feature_selection=False):
#
# if feature_selection:
# # Set up a smaller model to work it
# return XGBRegressor(
# objective='reg:squarederror',
# n_estimators=50,
# learning_rate=0.05,
# max_depth=6,
# subsample=0.8,
# colsample_bytree=0.8,
# reg_alpha=0.1,
# reg_lambda=0.1
# )
#
# return XGBRegressor(
# objective='reg:squarederror',
# n_estimators=1000,
# learning_rate=0.05,
# max_depth=6,
# min_child_weight=3,
# subsample=0.8,
# colsample_bytree=0.8,
# reg_alpha=0.1,
# reg_lambda=0.1
# # n_jobs=self.n_jobs
# )
def fit_model(self, target):
"""Fits the model to the training data and removes zero-importance features."""

View file

@ -1,7 +1,6 @@
import os
from tqdm import tqdm
import pandas as pd
import geopandas as gpd
from utils.logger import setup_logger
from utils.s3 import read_io_from_s3, save_dataframe_to_s3_parquet, read_dataframe_from_s3_parquet
from backend.Property import Property
@ -86,17 +85,6 @@ class OpenUprnClient:
return filename
return None
@staticmethod
def convert_bng_data_to_gpd(df):
gpd_data = gpd.GeoDataFrame(
df,
geometry=gpd.points_from_xy(df.X_COORDINATE, df.Y_COORDINATE),
crs="EPSG:27700" # British National Grid
)
return gpd_data
def save_filenames_to_s3(self, bucket_name):
"""
Save the filenames to s3

View file

@ -6,6 +6,7 @@ our database for querying from other services
import os
from tqdm import tqdm
import pandas as pd
import geopandas as gpd
from etl.spatial.ConservationAreaClient import ConservationAreaClient
from etl.spatial.OpenUprnClient import OpenUprnClient
from etl.spatial.SpecialBuildingsClient import SpecialBuildingsClient
@ -25,6 +26,16 @@ HISTORIC_ENGLAND_HERITAGE_BUILDINGS_PATHNAME = \
logger = setup_logger()
def convert_bng_data_to_gpd(df):
gpd_data = gpd.GeoDataFrame(
df,
geometry=gpd.points_from_xy(df.X_COORDINATE, df.Y_COORDINATE),
crs="EPSG:27700" # British National Grid
)
return gpd_data
def app():
"""
This application uses the conservation area datasets to determine if a UPRN is
@ -85,7 +96,7 @@ def app():
to_loop_over = open_uprn_client.data.groupby("filename")
for filename, uprn_df in tqdm(open_uprn_client.data.groupby("filename"), total=len(to_loop_over)):
uprn_gdf = OpenUprnClient.convert_bng_data_to_gpd(uprn_df)
uprn_gdf = convert_bng_data_to_gpd(uprn_df)
uprn_gdf = conservation_area_client.is_in_conservation_area_vectorised(uprn_gdf=uprn_gdf)
uprn_gdf = special_buildings_client.is_listed_building_vectorised(uprn_gdf=uprn_gdf)