Adding scenario structure to backend

2026-07-27 23:35:01 +00:00 · 2024-07-30 17:42:36 +01:00 · 2024-07-30 17:42:36 +01:00 · cadbd4f48a
commit cadbd4f48a
parent 996c71c189
7 changed files with 268 additions and 112 deletions
--- a/backend/Property.py
+++ b/backend/Property.py
@ -77,12 +77,14 @@ class Property:
        non_invasive_recommendations=None,
        measures=None,
        energy_assessment=None,
+        is_new=True,
        **kwargs
    ):

        self.epc_record = epc_record

        self.id = id
+        self.is_new = is_new

        self.address = address
        self.postcode = postcode
--- a/backend/app/db/functions/portfolio_functions.py
+++ b/backend/app/db/functions/portfolio_functions.py
@ -30,7 +30,7 @@ def aggregate_portfolio_recommendations(
        **aggregated_data
    }

-    # Get the portfolio and update the fields
+    # Get the portfolio and update the fields. This data needs to be stored against the plan, not the portfolio
    portfolio = session.query(Portfolio).filter_by(id=portfolio_id).one()
    # Update the data
    for key, value in aggregates_dict.items():
--- a/backend/app/db/functions/recommendations_functions.py
+++ b/backend/app/db/functions/recommendations_functions.py
@ -1,8 +1,11 @@
 from sqlalchemy import insert, delete
 from sqlalchemy.orm import Session
-from backend.app.db.models.recommendations import Plan, Recommendation, RecommendationMaterials, PlanRecommendations
-from backend.app.db.models.portfolio import PropertyModel, PropertyTargetsModel, PropertyDetailsMeter, \
-    PropertyDetailsEpcModel
+from backend.app.db.models.recommendations import (
+    Plan, Recommendation, RecommendationMaterials, PlanRecommendations, Scenario
+)
+from backend.app.db.models.portfolio import (
+    PropertyModel, PropertyTargetsModel, PropertyDetailsMeter, PropertyDetailsEpcModel
+)


 def create_plan(session: Session, plan):
@ -19,6 +22,20 @@ def create_plan(session: Session, plan):
    return new_plan.id


+def create_scenario(session: Session, scenario):
+    """
+    This function will create a record for the scenario in the database if it does not exist.
+    :param session: The database session
+    :param scenario: dictionary of data representing a scenario to be created
+    """
+
+    new_scenario = Scenario(**scenario)
+    session.add(new_scenario)
+    session.flush()
+
+    return new_scenario.id
+
+
 def create_recommendation(session: Session, recommendation):
    """
    This function will create a record for the recommendation in the database if it does not exist.
@ -148,6 +165,9 @@ def clear_portfolio(session: Session, portfolio_id: int):
    # Delete all Plans associated with the portfolio
    session.execute(delete(Plan).where(Plan.portfolio_id == portfolio_id))

+    # Delete all Scenarios associated with the portfolio
+    session.execute(delete(Scenario).where(Scenario.portfolio_id == portfolio_id))
+
    # Delete all Recommendations associated with the properties
    session.execute(delete(Recommendation).where(Recommendation.property_id.in_(property_ids)))

--- a/backend/app/db/models/recommendations.py
+++ b/backend/app/db/models/recommendations.py
@ -50,6 +50,7 @@ class Plan(Base):
    __tablename__ = 'plan'

    id = Column(BigInteger, primary_key=True, autoincrement=True)
+    name = Column(String, nullable=True, default="")
    portfolio_id = Column(BigInteger, ForeignKey(Portfolio.id), nullable=False)
    property_id = Column(BigInteger, ForeignKey(PropertyModel.id), nullable=False)
    created_at = Column(TIMESTAMP, nullable=False, server_default=func.now())
@ -65,3 +66,21 @@ class PlanRecommendations(Base):
    id = Column(BigInteger, primary_key=True, autoincrement=True)
    plan_id = Column(BigInteger, ForeignKey('plan.id'), nullable=False)
    recommendation_id = Column(BigInteger, ForeignKey('recommendation.id'), nullable=False)
+
+
+class Scenario(Base):
+    __tablename__ = 'scenario'
+
+    id = Column(BigInteger, primary_key=True, autoincrement=True)
+    name = Column(String, nullable=False)
+    created_at = Column(TIMESTAMP, nullable=False, server_default=func.now())
+    budget = Column(Float)
+    portfolio_id = Column(BigInteger, ForeignKey(Portfolio.id), nullable=False)
+    housing_type = Column(String, nullable=False)
+    goal = Column(String, nullable=False)
+    trigger_file_path = Column(String, nullable=False)
+    already_installed_file_path = Column(String)
+    patches_file_path = Column(String)
+    non_invasive_recommendations_file_path = Column(String)
+    exclusions = Column(String)
+    multi_plan = Column(Boolean, default=False)
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@ -20,7 +20,7 @@ from backend.app.db.functions.property_functions import (
    update_or_create_property_spatial_details
 )
 from backend.app.db.functions.recommendations_functions import (
-    create_plan, create_plan_recommendations, upload_recommendations
+    create_plan, create_plan_recommendations, upload_recommendations, create_scenario
 )
 from backend.app.db.functions.energy_assessment_functions import get_latest_assessment_by_uprn
 from backend.app.db.models.portfolio import rating_lookup
@ -354,16 +354,17 @@ async def trigger_plan(body: PlanTriggerRequest):
            property_id, is_new = create_property(
                session, body.portfolio_id, epc_searcher.address_clean, epc_searcher.postcode_clean, epc_searcher.uprn
            )
-            if not is_new:
+            if not is_new and not body.multi_plan:
                continue

-            create_property_targets(
-                session,
-                property_id=property_id,
-                portfolio_id=body.portfolio_id,
-                epc_target=body.goal_value,
-                heat_demand_target=None
-            )
+            if is_new:
+                create_property_targets(
+                    session,
+                    property_id=property_id,
+                    portfolio_id=body.portfolio_id,
+                    epc_target=body.goal_value,
+                    heat_demand_target=None
+                )

            # If we have an energy assessment in place, that is newer than all of the previous EPCs, we use that.
            # Otherwise, we use the newest EPC
@ -396,6 +397,7 @@ async def trigger_plan(body: PlanTriggerRequest):
            input_properties.append(
                Property(
                    id=property_id,
+                    is_new=is_new,
                    address=epc_searcher.address_clean,
                    postcode=epc_searcher.postcode_clean,
                    epc_record=prepared_epc,
@ -409,6 +411,25 @@ async def trigger_plan(body: PlanTriggerRequest):
        if not input_properties:
            return Response(status_code=204)

+        # If we have any work to do, we create a new scenario
+        scenario = create_scenario(
+            session=session,
+            scenario={
+                "name": body.scenario_name,
+                "created_at": created_at,
+                "budget": body.budget,
+                "portfolio_id": body.portfolio_id,
+                "housing_type": body.housing_type,
+                "goal": body.goal,
+                "trigger_file_path": body.trigger_file_path,
+                "already_installed_file_path": body.already_installed_file_path,
+                "patches_file_path": body.patches_file_path,
+                "non_invasive_recommendations_file_path": body.non_invasive_recommendations_file_path,
+                "exclusions": body.exclusions,
+                "multi_plan": body.multi_plan
+            }
+        )
+
        # The materials data could be cached or local so we don't need to make
        # consistent requests to the backend for
        # the same data
@ -734,18 +755,19 @@ async def trigger_plan(body: PlanTriggerRequest):

                    valuations = PropertyValuation.estimate(property_instance=p, target_epc=new_epc)
                    property_value_increase_ranges[p.id] = valuations
-                    
-                    property_details_epc = p.get_property_details_epc(
-                        portfolio_id=body.portfolio_id, rating_lookup=rating_lookup,
-                    )
-                    create_property_details_epc(session, property_details_epc)

-                    update_or_create_property_spatial_details(session, p.uprn, p.spatial)
+                    if p.is_new:
+                        property_details_epc = p.get_property_details_epc(
+                            portfolio_id=body.portfolio_id, rating_lookup=rating_lookup,
+                        )
+                        create_property_details_epc(session, property_details_epc)

-                    property_data = p.get_full_property_data(current_valuation=valuations["current_value"])
-                    update_property_data(
-                        session, property_id=p.id, portfolio_id=body.portfolio_id, property_data=property_data
-                    )
+                        update_or_create_property_spatial_details(session, p.uprn, p.spatial)
+
+                        property_data = p.get_full_property_data(current_valuation=valuations["current_value"])
+                        update_property_data(
+                            session, property_id=p.id, portfolio_id=body.portfolio_id, property_data=property_data
+                        )

                    if not recommendations_to_upload:
                        continue
@ -753,7 +775,8 @@ async def trigger_plan(body: PlanTriggerRequest):
                    new_plan_id = create_plan(session, {
                        "portfolio_id": body.portfolio_id,
                        "property_id": p.id,
-                        "is_default": True,
+                        "is_default": True if p.is_new else False,
+                        "name": body.scenario_name,
                        "valuation_increase_lower_bound": (
                            valuations["lower_bound_increased_value"] - valuations["current_value"]
                        ),
@ -807,6 +830,8 @@ async def trigger_plan(body: PlanTriggerRequest):
        aggregate_portfolio_recommendations(
            session,
            portfolio_id=body.portfolio_id,
+            multi_plan=body.multi_plan,
+
            total_valuation_increase=total_valuation_increase,
            labour_days=labour_days,
            aggregated_data=aggregated_data
@ -941,6 +966,7 @@ async def build_mds(body: MdsRequest):
                    # already_installed=property_already_installed,
                    # non_invasive_recommendations=property_non_invasive_recommendations,
                    measures=measures,
+                    is_new=is_new,
                    **Property.extract_kwargs(config)
                )
            )
--- a/backend/app/plan/schemas.py
+++ b/backend/app/plan/schemas.py
@ -13,6 +13,10 @@ class PlanTriggerRequest(BaseModel):
    patches_file_path: Optional[str] = None
    non_invasive_recommendations_file_path: Optional[str] = None
    exclusions: Optional[conlist(str, min_items=1)] = None
+    scenario_name: Optional[str] = ""
+    # If true, will allow us to create multiple plans for the same portfolio, whereas if this is false, if this property
+    # exists in the portfolio, it will be ignored
+    multi_plan: Optional[bool] = False

    # Pre-defined list of possibilities for exclusions
    _allowed_exclusions = {
--- a/etl/xml_survey_extraction/app.py
+++ b/etl/xml_survey_extraction/app.py
@ -10,11 +10,97 @@ from io import BytesIO

 logger = setup_logger()

-SURVEYORS = "JAFFERSONS ENERGY CONSULTANTS"
-PROJECT_CODE = "VDE001"
 BUCKET = "retrofit-energy-assessments-dev"
-PORTFOLIO_ID = 86
 USER_ID = 8
+SCENARIOS = {
+    86: {
+        "project_code": "VDE001",
+        "surveyor": "JAFFERSONS ENERGY CONSULTANTS",
+        "bodies": [
+            # Scenario A: Cavity wall insulation
+            {
+                "portfolio_id": str(86),
+                "housing_type": "Private",
+                "goal": "Increase EPC",
+                "goal_value": "A",
+                "trigger_file_path": "",
+                "already_installed_file_path": "",
+                "patches_file_path": "",
+                "non_invasive_recommendations_file_path": "",
+                "exclusions": ["floor_insulation", "fireplace", "solar_pv", "heating"],
+                "budget": None,
+                "scenario_name": "Low Hanging Fruit",
+                "multi_plan": True,
+            },
+            # Scenario B: CWI, Solar PV, AHSP
+            {
+                "portfolio_id": str(86),
+                "housing_type": "Private",
+                "goal": "Increase EPC",
+                "goal_value": "A",
+                "trigger_file_path": "",
+                "already_installed_file_path": "",
+                "patches_file_path": "",
+                "non_invasive_recommendations_file_path": "",
+                "exclusions": ["floor_insulation", "fireplace"],
+                "budget": None,
+                "Scenario Name": "Deep Retrofit",
+                "multi_plan": True,
+            },
+            # Scenario C, CWI, floor insulation, PV, AHSP
+            {
+                "portfolio_id": str(86),
+                "housing_type": "Private",
+                "goal": "Increase EPC",
+                "goal_value": "A",
+                "trigger_file_path": "",
+                "already_installed_file_path": "",
+                "patches_file_path": "",
+                "non_invasive_recommendations_file_path": "",
+                "exclusions": ["fireplace"],
+                "budget": None,
+                "Scenario Name": "Whole House Retrofit",
+                "multi_plan": True,
+            }
+        ]
+    },
+    87: {
+        "project_code": "VDE002",
+        "surveyor": "JAFFERSONS ENERGY CONSULTANTS",
+        "bodies": [
+            # Scenario A: Solar PV, AHSP
+            {
+                "portfolio_id": str(87),
+                "housing_type": "Private",
+                "goal": "Increase EPC",
+                "goal_value": "A",
+                "trigger_file_path": "",
+                "already_installed_file_path": "",
+                "patches_file_path": "",
+                "non_invasive_recommendations_file_path": "",
+                "exclusions": ["floor_insulation", "fireplace"],
+                "budget": None,
+                "Scenario Name": "Deep Retrofit",
+                "multi_plan": True,
+            },
+            # Scenario B, floor insulation, PV, AHSP
+            {
+                "portfolio_id": str(87),
+                "housing_type": "Private",
+                "goal": "Increase EPC",
+                "goal_value": "A",
+                "trigger_file_path": "",
+                "already_installed_file_path": "",
+                "patches_file_path": "",
+                "non_invasive_recommendations_file_path": "",
+                "exclusions": ["fireplace"],
+                "budget": None,
+                "Scenario Name": "Whole House Retrofit",
+                "multi_plan": True,
+            }
+        ]
+    }
+}


 def main():
@ -34,103 +120,102 @@ def main():

    # TODO: Store the project code in the database
    #
-    energy_assessments = list_files_and_subfolders_in_s3_folder(
-        bucket_name=BUCKET, folder_name=f"{SURVEYORS}/{PROJECT_CODE}/"
-    )

-    logger.info(f"Found {len(energy_assessments)} energy assessments for {SURVEYORS} and {PROJECT_CODE}")
-    assessments_map = {}
-    for assessment in energy_assessments:
-        uploaded_xmls = list_xmls_in_s3_folder(
-            bucket_name=BUCKET, folder_name=os.path.join(assessment, "docs & plans")
+    for scenario_config in SCENARIOS.values():
+        energy_assessments = list_files_and_subfolders_in_s3_folder(
+            bucket_name=BUCKET, folder_name=f"{scenario_config['surveyor']}/{scenario_config['project_code']}/"
        )
-        uprn = int(assessment.rstrip("/").split("/")[-1])
-        assessments_map[uprn] = uploaded_xmls

-    logger.info(f"Exatracted XMLS for the energy assessments")
-
-    # TODO: IF we have many uploads, we can do them in a batch so we don't try and upload huge amounts of data to
-    #       the database at onece
-
-    # TODO: We now have detailed information about primary and secondary walls, so we should use this information
-    #       in our recommendations when we have it
-    #       For example, for 77 Peryn Road, W3 7LT, the energy assessment has a main dwelling and two extensions, where
-    #       the physical dimensions and the fabric of each building is constructed in a way as if each building is
-    #       separate. We should use this information to make recommendations that are specific to each building
-    #       part, though the problem here is that while the fabric and dimensions are separate, the actual SAP, CO2, etc
-    #       figures span across the entire property.
-    #       Idea: We can collect all of this information by building part and store it separately in the database
-    #             against the uprn. We can have key data for the EPC, but then also additional data for each building
-    #             part. We can then use this data to make recommendations that are specific to each building part
-    #       We should probably re-think this data model, so we break up the data in a more considered fasion and produce
-    #       the underlying EPC data as a summary of the building parts. Not only do we have data against the main
-    #       dwelling and extensions, but we also have multiple windows with individiaul pieces of information that
-    #       we can use to make recommendations. We should store this data in a way that we can easily access it and
-    #       use it to make recommendations (e.g. we should have a Windows table)
-
-    # For each property, we download the xmls and extract the data
-    database_data = []
-    for uprn, xmls in assessments_map.items():
-        extracted_data = {}
-        for xml in xmls:
-            xml_data = read_from_s3(bucket_name=BUCKET, s3_file_name=xml)
-            xml_data_io = BytesIO(xml_data)
-            xml_parser = XmlParser(
-                file=xml_data_io,
-                filekey=os.path.join(f"s3://{BUCKET}", xml),
-                uprn=uprn,
-                surveyor_company=SURVEYORS,
+        logger.info(
+            f"Found {len(energy_assessments)} energy assessments for {scenario_config['surveyor']} and "
+            f"{scenario_config['project_code']}"
+        )
+        assessments_map = {}
+        for assessment in energy_assessments:
+            uploaded_xmls = list_xmls_in_s3_folder(
+                bucket_name=BUCKET, folder_name=os.path.join(assessment, "docs & plans")
            )
-            xml_parser.run()
-            if xml_parser.is_lig:
-                logger.info(f"Extracted data from {xml}")
-            extracted_epc = xml_parser.epc
-            extracted_additional_data = xml_parser.additional_data
+            uprn = int(assessment.rstrip("/").split("/")[-1])
+            assessments_map[uprn] = uploaded_xmls

-            data_to_update = {
-                **extracted_epc, **extracted_additional_data
-            }
+        logger.info(f"Exatracted XMLS for the energy assessments")

-            # We need to update the keys to match the database schema - i.e. we should replace all hyphens with
-            # underscores
-            data_to_update = {k.replace("-", "_"): v for k, v in data_to_update.items()}
+        # TODO: IF we have many uploads, we can do them in a batch so we don't try and upload huge amounts of data to
+        #       the database at onece

-            extracted_data.update(data_to_update)
+        # TODO: We now have detailed information about primary and secondary walls, so we should use this information
+        #       in our recommendations when we have it
+        #       For example, for 77 Peryn Road, W3 7LT, the energy assessment has a main dwelling and two extensions,
+        #       where
+        #       the physical dimensions and the fabric of each building is constructed in a way as if each building is
+        #       separate. We should use this information to make recommendations that are specific to each building
+        #       part, though the problem here is that while the fabric and dimensions are separate, the actual SAP,
+        #       CO2, etc
+        #       figures span across the entire property.
+        #       Idea: We can collect all of this information by building part and store it separately in the database
+        #             against the uprn. We can have key data for the EPC, but then also additional data for each
+        #             building
+        #             part. We can then use this data to make recommendations that are specific to each building part
+        #       We should probably re-think this data model, so we break up the data in a more considered fasion and
+        #       produce
+        #       the underlying EPC data as a summary of the building parts. Not only do we have data against the main
+        #       dwelling and extensions, but we also have multiple windows with individiaul pieces of information that
+        #       we can use to make recommendations. We should store this data in a way that we can easily access it and
+        #       use it to make recommendations (e.g. we should have a Windows table)

-        database_data.append(extracted_data)
+        # For each property, we download the xmls and extract the data
+        database_data = []
+        for uprn, xmls in assessments_map.items():
+            extracted_data = {}
+            for xml in xmls:
+                xml_data = read_from_s3(bucket_name=BUCKET, s3_file_name=xml)
+                xml_data_io = BytesIO(xml_data)
+                xml_parser = XmlParser(
+                    file=xml_data_io,
+                    filekey=os.path.join(f"s3://{BUCKET}", xml),
+                    uprn=uprn,
+                    surveyor_company=scenario_config["surveyor"],
+                )
+                xml_parser.run()
+                if xml_parser.is_lig:
+                    logger.info(f"Extracted data from {xml}")
+                extracted_epc = xml_parser.epc
+                extracted_additional_data = xml_parser.additional_data

-    logger.info("Uploading data to the database")
-    session = sessionmaker(bind=db_engine)()
-    bulk_insert_energy_assessments(session, database_data)
-    session.close()
+                data_to_update = {
+                    **extracted_epc, **extracted_additional_data
+                }

-    # Create the asset list
-    asset_list = [
-        {"uprn": x["uprn"], "address": x["address1"], "postcode": x["postcode"]} for x in database_data
-    ]
-    asset_list = pd.DataFrame(asset_list)
+                # We need to update the keys to match the database schema - i.e. we should replace all hyphens with
+                # underscores
+                data_to_update = {k.replace("-", "_"): v for k, v in data_to_update.items()}

-    # Store the asset list in s3
-    filename = f"{USER_ID}/{PORTFOLIO_ID}/non_intrusives.csv"
-    save_csv_to_s3(
-        dataframe=asset_list,
-        bucket_name="retrofit-plan-inputs-dev",
-        file_name=filename
-    )
+                extracted_data.update(data_to_update)

-    body = {
-        "portfolio_id": str(PORTFOLIO_ID),
-        "housing_type": "Private",
-        "goal": "Increase EPC",
-        "goal_value": "A",
-        "trigger_file_path": filename,
-        "already_installed_file_path": "",
-        "patches_file_path": "",
-        "non_invasive_recommendations_file_path": "",
-        "exclusions": ["floor_insulation", "fireplace"],
-        "budget": None,
-    }
-    print(body)
+            database_data.append(extracted_data)
+
+        logger.info("Uploading data to the database")
+        session = sessionmaker(bind=db_engine)()
+        bulk_insert_energy_assessments(session, database_data)
+        session.close()
+
+        # Create the asset list
+        asset_list = [
+            {"uprn": x["uprn"], "address": x["address1"], "postcode": x["postcode"]} for x in database_data
+        ]
+        asset_list = pd.DataFrame(asset_list)
+
+        # Store the asset list in s3
+        filename = f"{USER_ID}/{scenario_config['bodies'][0]['portfolio_id']}/non_intrusives.csv"
+        save_csv_to_s3(
+            dataframe=asset_list,
+            bucket_name="retrofit-plan-inputs-dev",
+            file_name=filename
+        )
+
+        for body in scenario_config["bodies"]:
+            body["trigger_file_path"] = filename
+            print(body)

    # TODO: In order to get the full data associated to the heating system, we need to download and parse the pcdb which
    #       can be found here: https://www.ncm-pcdb.org.uk/pcdb/pcdb10.dat