Adding scenario structure to backend

This commit is contained in:
Khalim Conn-Kowlessar 2024-07-30 17:42:36 +01:00
parent 996c71c189
commit cadbd4f48a
7 changed files with 268 additions and 112 deletions

View file

@ -77,12 +77,14 @@ class Property:
non_invasive_recommendations=None,
measures=None,
energy_assessment=None,
is_new=True,
**kwargs
):
self.epc_record = epc_record
self.id = id
self.is_new = is_new
self.address = address
self.postcode = postcode

View file

@ -30,7 +30,7 @@ def aggregate_portfolio_recommendations(
**aggregated_data
}
# Get the portfolio and update the fields
# Get the portfolio and update the fields. This data needs to be stored against the plan, not the portfolio
portfolio = session.query(Portfolio).filter_by(id=portfolio_id).one()
# Update the data
for key, value in aggregates_dict.items():

View file

@ -1,8 +1,11 @@
from sqlalchemy import insert, delete
from sqlalchemy.orm import Session
from backend.app.db.models.recommendations import Plan, Recommendation, RecommendationMaterials, PlanRecommendations
from backend.app.db.models.portfolio import PropertyModel, PropertyTargetsModel, PropertyDetailsMeter, \
PropertyDetailsEpcModel
from backend.app.db.models.recommendations import (
Plan, Recommendation, RecommendationMaterials, PlanRecommendations, Scenario
)
from backend.app.db.models.portfolio import (
PropertyModel, PropertyTargetsModel, PropertyDetailsMeter, PropertyDetailsEpcModel
)
def create_plan(session: Session, plan):
@ -19,6 +22,20 @@ def create_plan(session: Session, plan):
return new_plan.id
def create_scenario(session: Session, scenario):
"""
This function will create a record for the scenario in the database if it does not exist.
:param session: The database session
:param scenario: dictionary of data representing a scenario to be created
"""
new_scenario = Scenario(**scenario)
session.add(new_scenario)
session.flush()
return new_scenario.id
def create_recommendation(session: Session, recommendation):
"""
This function will create a record for the recommendation in the database if it does not exist.
@ -148,6 +165,9 @@ def clear_portfolio(session: Session, portfolio_id: int):
# Delete all Plans associated with the portfolio
session.execute(delete(Plan).where(Plan.portfolio_id == portfolio_id))
# Delete all Scenarios associated with the portfolio
session.execute(delete(Scenario).where(Scenario.portfolio_id == portfolio_id))
# Delete all Recommendations associated with the properties
session.execute(delete(Recommendation).where(Recommendation.property_id.in_(property_ids)))

View file

@ -50,6 +50,7 @@ class Plan(Base):
__tablename__ = 'plan'
id = Column(BigInteger, primary_key=True, autoincrement=True)
name = Column(String, nullable=True, default="")
portfolio_id = Column(BigInteger, ForeignKey(Portfolio.id), nullable=False)
property_id = Column(BigInteger, ForeignKey(PropertyModel.id), nullable=False)
created_at = Column(TIMESTAMP, nullable=False, server_default=func.now())
@ -65,3 +66,21 @@ class PlanRecommendations(Base):
id = Column(BigInteger, primary_key=True, autoincrement=True)
plan_id = Column(BigInteger, ForeignKey('plan.id'), nullable=False)
recommendation_id = Column(BigInteger, ForeignKey('recommendation.id'), nullable=False)
class Scenario(Base):
__tablename__ = 'scenario'
id = Column(BigInteger, primary_key=True, autoincrement=True)
name = Column(String, nullable=False)
created_at = Column(TIMESTAMP, nullable=False, server_default=func.now())
budget = Column(Float)
portfolio_id = Column(BigInteger, ForeignKey(Portfolio.id), nullable=False)
housing_type = Column(String, nullable=False)
goal = Column(String, nullable=False)
trigger_file_path = Column(String, nullable=False)
already_installed_file_path = Column(String)
patches_file_path = Column(String)
non_invasive_recommendations_file_path = Column(String)
exclusions = Column(String)
multi_plan = Column(Boolean, default=False)

View file

@ -20,7 +20,7 @@ from backend.app.db.functions.property_functions import (
update_or_create_property_spatial_details
)
from backend.app.db.functions.recommendations_functions import (
create_plan, create_plan_recommendations, upload_recommendations
create_plan, create_plan_recommendations, upload_recommendations, create_scenario
)
from backend.app.db.functions.energy_assessment_functions import get_latest_assessment_by_uprn
from backend.app.db.models.portfolio import rating_lookup
@ -354,16 +354,17 @@ async def trigger_plan(body: PlanTriggerRequest):
property_id, is_new = create_property(
session, body.portfolio_id, epc_searcher.address_clean, epc_searcher.postcode_clean, epc_searcher.uprn
)
if not is_new:
if not is_new and not body.multi_plan:
continue
create_property_targets(
session,
property_id=property_id,
portfolio_id=body.portfolio_id,
epc_target=body.goal_value,
heat_demand_target=None
)
if is_new:
create_property_targets(
session,
property_id=property_id,
portfolio_id=body.portfolio_id,
epc_target=body.goal_value,
heat_demand_target=None
)
# If we have an energy assessment in place, that is newer than all of the previous EPCs, we use that.
# Otherwise, we use the newest EPC
@ -396,6 +397,7 @@ async def trigger_plan(body: PlanTriggerRequest):
input_properties.append(
Property(
id=property_id,
is_new=is_new,
address=epc_searcher.address_clean,
postcode=epc_searcher.postcode_clean,
epc_record=prepared_epc,
@ -409,6 +411,25 @@ async def trigger_plan(body: PlanTriggerRequest):
if not input_properties:
return Response(status_code=204)
# If we have any work to do, we create a new scenario
scenario = create_scenario(
session=session,
scenario={
"name": body.scenario_name,
"created_at": created_at,
"budget": body.budget,
"portfolio_id": body.portfolio_id,
"housing_type": body.housing_type,
"goal": body.goal,
"trigger_file_path": body.trigger_file_path,
"already_installed_file_path": body.already_installed_file_path,
"patches_file_path": body.patches_file_path,
"non_invasive_recommendations_file_path": body.non_invasive_recommendations_file_path,
"exclusions": body.exclusions,
"multi_plan": body.multi_plan
}
)
# The materials data could be cached or local so we don't need to make
# consistent requests to the backend for
# the same data
@ -734,18 +755,19 @@ async def trigger_plan(body: PlanTriggerRequest):
valuations = PropertyValuation.estimate(property_instance=p, target_epc=new_epc)
property_value_increase_ranges[p.id] = valuations
property_details_epc = p.get_property_details_epc(
portfolio_id=body.portfolio_id, rating_lookup=rating_lookup,
)
create_property_details_epc(session, property_details_epc)
update_or_create_property_spatial_details(session, p.uprn, p.spatial)
if p.is_new:
property_details_epc = p.get_property_details_epc(
portfolio_id=body.portfolio_id, rating_lookup=rating_lookup,
)
create_property_details_epc(session, property_details_epc)
property_data = p.get_full_property_data(current_valuation=valuations["current_value"])
update_property_data(
session, property_id=p.id, portfolio_id=body.portfolio_id, property_data=property_data
)
update_or_create_property_spatial_details(session, p.uprn, p.spatial)
property_data = p.get_full_property_data(current_valuation=valuations["current_value"])
update_property_data(
session, property_id=p.id, portfolio_id=body.portfolio_id, property_data=property_data
)
if not recommendations_to_upload:
continue
@ -753,7 +775,8 @@ async def trigger_plan(body: PlanTriggerRequest):
new_plan_id = create_plan(session, {
"portfolio_id": body.portfolio_id,
"property_id": p.id,
"is_default": True,
"is_default": True if p.is_new else False,
"name": body.scenario_name,
"valuation_increase_lower_bound": (
valuations["lower_bound_increased_value"] - valuations["current_value"]
),
@ -807,6 +830,8 @@ async def trigger_plan(body: PlanTriggerRequest):
aggregate_portfolio_recommendations(
session,
portfolio_id=body.portfolio_id,
multi_plan=body.multi_plan,
total_valuation_increase=total_valuation_increase,
labour_days=labour_days,
aggregated_data=aggregated_data
@ -941,6 +966,7 @@ async def build_mds(body: MdsRequest):
# already_installed=property_already_installed,
# non_invasive_recommendations=property_non_invasive_recommendations,
measures=measures,
is_new=is_new,
**Property.extract_kwargs(config)
)
)

View file

@ -13,6 +13,10 @@ class PlanTriggerRequest(BaseModel):
patches_file_path: Optional[str] = None
non_invasive_recommendations_file_path: Optional[str] = None
exclusions: Optional[conlist(str, min_items=1)] = None
scenario_name: Optional[str] = ""
# If true, will allow us to create multiple plans for the same portfolio, whereas if this is false, if this property
# exists in the portfolio, it will be ignored
multi_plan: Optional[bool] = False
# Pre-defined list of possibilities for exclusions
_allowed_exclusions = {

View file

@ -10,11 +10,97 @@ from io import BytesIO
logger = setup_logger()
SURVEYORS = "JAFFERSONS ENERGY CONSULTANTS"
PROJECT_CODE = "VDE001"
BUCKET = "retrofit-energy-assessments-dev"
PORTFOLIO_ID = 86
USER_ID = 8
SCENARIOS = {
86: {
"project_code": "VDE001",
"surveyor": "JAFFERSONS ENERGY CONSULTANTS",
"bodies": [
# Scenario A: Cavity wall insulation
{
"portfolio_id": str(86),
"housing_type": "Private",
"goal": "Increase EPC",
"goal_value": "A",
"trigger_file_path": "",
"already_installed_file_path": "",
"patches_file_path": "",
"non_invasive_recommendations_file_path": "",
"exclusions": ["floor_insulation", "fireplace", "solar_pv", "heating"],
"budget": None,
"scenario_name": "Low Hanging Fruit",
"multi_plan": True,
},
# Scenario B: CWI, Solar PV, AHSP
{
"portfolio_id": str(86),
"housing_type": "Private",
"goal": "Increase EPC",
"goal_value": "A",
"trigger_file_path": "",
"already_installed_file_path": "",
"patches_file_path": "",
"non_invasive_recommendations_file_path": "",
"exclusions": ["floor_insulation", "fireplace"],
"budget": None,
"Scenario Name": "Deep Retrofit",
"multi_plan": True,
},
# Scenario C, CWI, floor insulation, PV, AHSP
{
"portfolio_id": str(86),
"housing_type": "Private",
"goal": "Increase EPC",
"goal_value": "A",
"trigger_file_path": "",
"already_installed_file_path": "",
"patches_file_path": "",
"non_invasive_recommendations_file_path": "",
"exclusions": ["fireplace"],
"budget": None,
"Scenario Name": "Whole House Retrofit",
"multi_plan": True,
}
]
},
87: {
"project_code": "VDE002",
"surveyor": "JAFFERSONS ENERGY CONSULTANTS",
"bodies": [
# Scenario A: Solar PV, AHSP
{
"portfolio_id": str(87),
"housing_type": "Private",
"goal": "Increase EPC",
"goal_value": "A",
"trigger_file_path": "",
"already_installed_file_path": "",
"patches_file_path": "",
"non_invasive_recommendations_file_path": "",
"exclusions": ["floor_insulation", "fireplace"],
"budget": None,
"Scenario Name": "Deep Retrofit",
"multi_plan": True,
},
# Scenario B, floor insulation, PV, AHSP
{
"portfolio_id": str(87),
"housing_type": "Private",
"goal": "Increase EPC",
"goal_value": "A",
"trigger_file_path": "",
"already_installed_file_path": "",
"patches_file_path": "",
"non_invasive_recommendations_file_path": "",
"exclusions": ["fireplace"],
"budget": None,
"Scenario Name": "Whole House Retrofit",
"multi_plan": True,
}
]
}
}
def main():
@ -34,103 +120,102 @@ def main():
# TODO: Store the project code in the database
#
energy_assessments = list_files_and_subfolders_in_s3_folder(
bucket_name=BUCKET, folder_name=f"{SURVEYORS}/{PROJECT_CODE}/"
)
logger.info(f"Found {len(energy_assessments)} energy assessments for {SURVEYORS} and {PROJECT_CODE}")
assessments_map = {}
for assessment in energy_assessments:
uploaded_xmls = list_xmls_in_s3_folder(
bucket_name=BUCKET, folder_name=os.path.join(assessment, "docs & plans")
for scenario_config in SCENARIOS.values():
energy_assessments = list_files_and_subfolders_in_s3_folder(
bucket_name=BUCKET, folder_name=f"{scenario_config['surveyor']}/{scenario_config['project_code']}/"
)
uprn = int(assessment.rstrip("/").split("/")[-1])
assessments_map[uprn] = uploaded_xmls
logger.info(f"Exatracted XMLS for the energy assessments")
# TODO: IF we have many uploads, we can do them in a batch so we don't try and upload huge amounts of data to
# the database at onece
# TODO: We now have detailed information about primary and secondary walls, so we should use this information
# in our recommendations when we have it
# For example, for 77 Peryn Road, W3 7LT, the energy assessment has a main dwelling and two extensions, where
# the physical dimensions and the fabric of each building is constructed in a way as if each building is
# separate. We should use this information to make recommendations that are specific to each building
# part, though the problem here is that while the fabric and dimensions are separate, the actual SAP, CO2, etc
# figures span across the entire property.
# Idea: We can collect all of this information by building part and store it separately in the database
# against the uprn. We can have key data for the EPC, but then also additional data for each building
# part. We can then use this data to make recommendations that are specific to each building part
# We should probably re-think this data model, so we break up the data in a more considered fasion and produce
# the underlying EPC data as a summary of the building parts. Not only do we have data against the main
# dwelling and extensions, but we also have multiple windows with individiaul pieces of information that
# we can use to make recommendations. We should store this data in a way that we can easily access it and
# use it to make recommendations (e.g. we should have a Windows table)
# For each property, we download the xmls and extract the data
database_data = []
for uprn, xmls in assessments_map.items():
extracted_data = {}
for xml in xmls:
xml_data = read_from_s3(bucket_name=BUCKET, s3_file_name=xml)
xml_data_io = BytesIO(xml_data)
xml_parser = XmlParser(
file=xml_data_io,
filekey=os.path.join(f"s3://{BUCKET}", xml),
uprn=uprn,
surveyor_company=SURVEYORS,
logger.info(
f"Found {len(energy_assessments)} energy assessments for {scenario_config['surveyor']} and "
f"{scenario_config['project_code']}"
)
assessments_map = {}
for assessment in energy_assessments:
uploaded_xmls = list_xmls_in_s3_folder(
bucket_name=BUCKET, folder_name=os.path.join(assessment, "docs & plans")
)
xml_parser.run()
if xml_parser.is_lig:
logger.info(f"Extracted data from {xml}")
extracted_epc = xml_parser.epc
extracted_additional_data = xml_parser.additional_data
uprn = int(assessment.rstrip("/").split("/")[-1])
assessments_map[uprn] = uploaded_xmls
data_to_update = {
**extracted_epc, **extracted_additional_data
}
logger.info(f"Exatracted XMLS for the energy assessments")
# We need to update the keys to match the database schema - i.e. we should replace all hyphens with
# underscores
data_to_update = {k.replace("-", "_"): v for k, v in data_to_update.items()}
# TODO: IF we have many uploads, we can do them in a batch so we don't try and upload huge amounts of data to
# the database at onece
extracted_data.update(data_to_update)
# TODO: We now have detailed information about primary and secondary walls, so we should use this information
# in our recommendations when we have it
# For example, for 77 Peryn Road, W3 7LT, the energy assessment has a main dwelling and two extensions,
# where
# the physical dimensions and the fabric of each building is constructed in a way as if each building is
# separate. We should use this information to make recommendations that are specific to each building
# part, though the problem here is that while the fabric and dimensions are separate, the actual SAP,
# CO2, etc
# figures span across the entire property.
# Idea: We can collect all of this information by building part and store it separately in the database
# against the uprn. We can have key data for the EPC, but then also additional data for each
# building
# part. We can then use this data to make recommendations that are specific to each building part
# We should probably re-think this data model, so we break up the data in a more considered fasion and
# produce
# the underlying EPC data as a summary of the building parts. Not only do we have data against the main
# dwelling and extensions, but we also have multiple windows with individiaul pieces of information that
# we can use to make recommendations. We should store this data in a way that we can easily access it and
# use it to make recommendations (e.g. we should have a Windows table)
database_data.append(extracted_data)
# For each property, we download the xmls and extract the data
database_data = []
for uprn, xmls in assessments_map.items():
extracted_data = {}
for xml in xmls:
xml_data = read_from_s3(bucket_name=BUCKET, s3_file_name=xml)
xml_data_io = BytesIO(xml_data)
xml_parser = XmlParser(
file=xml_data_io,
filekey=os.path.join(f"s3://{BUCKET}", xml),
uprn=uprn,
surveyor_company=scenario_config["surveyor"],
)
xml_parser.run()
if xml_parser.is_lig:
logger.info(f"Extracted data from {xml}")
extracted_epc = xml_parser.epc
extracted_additional_data = xml_parser.additional_data
logger.info("Uploading data to the database")
session = sessionmaker(bind=db_engine)()
bulk_insert_energy_assessments(session, database_data)
session.close()
data_to_update = {
**extracted_epc, **extracted_additional_data
}
# Create the asset list
asset_list = [
{"uprn": x["uprn"], "address": x["address1"], "postcode": x["postcode"]} for x in database_data
]
asset_list = pd.DataFrame(asset_list)
# We need to update the keys to match the database schema - i.e. we should replace all hyphens with
# underscores
data_to_update = {k.replace("-", "_"): v for k, v in data_to_update.items()}
# Store the asset list in s3
filename = f"{USER_ID}/{PORTFOLIO_ID}/non_intrusives.csv"
save_csv_to_s3(
dataframe=asset_list,
bucket_name="retrofit-plan-inputs-dev",
file_name=filename
)
extracted_data.update(data_to_update)
body = {
"portfolio_id": str(PORTFOLIO_ID),
"housing_type": "Private",
"goal": "Increase EPC",
"goal_value": "A",
"trigger_file_path": filename,
"already_installed_file_path": "",
"patches_file_path": "",
"non_invasive_recommendations_file_path": "",
"exclusions": ["floor_insulation", "fireplace"],
"budget": None,
}
print(body)
database_data.append(extracted_data)
logger.info("Uploading data to the database")
session = sessionmaker(bind=db_engine)()
bulk_insert_energy_assessments(session, database_data)
session.close()
# Create the asset list
asset_list = [
{"uprn": x["uprn"], "address": x["address1"], "postcode": x["postcode"]} for x in database_data
]
asset_list = pd.DataFrame(asset_list)
# Store the asset list in s3
filename = f"{USER_ID}/{scenario_config['bodies'][0]['portfolio_id']}/non_intrusives.csv"
save_csv_to_s3(
dataframe=asset_list,
bucket_name="retrofit-plan-inputs-dev",
file_name=filename
)
for body in scenario_config["bodies"]:
body["trigger_file_path"] = filename
print(body)
# TODO: In order to get the full data associated to the heating system, we need to download and parse the pcdb which
# can be found here: https://www.ncm-pcdb.org.uk/pcdb/pcdb10.dat