diff --git a/backend/Property.py b/backend/Property.py index f82c03a7..a1bfe265 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -77,12 +77,14 @@ class Property: non_invasive_recommendations=None, measures=None, energy_assessment=None, + is_new=True, **kwargs ): self.epc_record = epc_record self.id = id + self.is_new = is_new self.address = address self.postcode = postcode diff --git a/backend/app/db/functions/portfolio_functions.py b/backend/app/db/functions/portfolio_functions.py index 402675e8..008c4b8b 100644 --- a/backend/app/db/functions/portfolio_functions.py +++ b/backend/app/db/functions/portfolio_functions.py @@ -30,7 +30,7 @@ def aggregate_portfolio_recommendations( **aggregated_data } - # Get the portfolio and update the fields + # Get the portfolio and update the fields. This data needs to be stored against the plan, not the portfolio portfolio = session.query(Portfolio).filter_by(id=portfolio_id).one() # Update the data for key, value in aggregates_dict.items(): diff --git a/backend/app/db/functions/recommendations_functions.py b/backend/app/db/functions/recommendations_functions.py index 365829e4..cfb3d570 100644 --- a/backend/app/db/functions/recommendations_functions.py +++ b/backend/app/db/functions/recommendations_functions.py @@ -1,8 +1,11 @@ from sqlalchemy import insert, delete from sqlalchemy.orm import Session -from backend.app.db.models.recommendations import Plan, Recommendation, RecommendationMaterials, PlanRecommendations -from backend.app.db.models.portfolio import PropertyModel, PropertyTargetsModel, PropertyDetailsMeter, \ - PropertyDetailsEpcModel +from backend.app.db.models.recommendations import ( + Plan, Recommendation, RecommendationMaterials, PlanRecommendations, Scenario +) +from backend.app.db.models.portfolio import ( + PropertyModel, PropertyTargetsModel, PropertyDetailsMeter, PropertyDetailsEpcModel +) def create_plan(session: Session, plan): @@ -19,6 +22,20 @@ def create_plan(session: Session, plan): return new_plan.id +def create_scenario(session: Session, scenario): + """ + This function will create a record for the scenario in the database if it does not exist. + :param session: The database session + :param scenario: dictionary of data representing a scenario to be created + """ + + new_scenario = Scenario(**scenario) + session.add(new_scenario) + session.flush() + + return new_scenario.id + + def create_recommendation(session: Session, recommendation): """ This function will create a record for the recommendation in the database if it does not exist. @@ -148,6 +165,9 @@ def clear_portfolio(session: Session, portfolio_id: int): # Delete all Plans associated with the portfolio session.execute(delete(Plan).where(Plan.portfolio_id == portfolio_id)) + # Delete all Scenarios associated with the portfolio + session.execute(delete(Scenario).where(Scenario.portfolio_id == portfolio_id)) + # Delete all Recommendations associated with the properties session.execute(delete(Recommendation).where(Recommendation.property_id.in_(property_ids))) diff --git a/backend/app/db/models/recommendations.py b/backend/app/db/models/recommendations.py index 8ab7908f..6eddae1f 100644 --- a/backend/app/db/models/recommendations.py +++ b/backend/app/db/models/recommendations.py @@ -50,6 +50,7 @@ class Plan(Base): __tablename__ = 'plan' id = Column(BigInteger, primary_key=True, autoincrement=True) + name = Column(String, nullable=True, default="") portfolio_id = Column(BigInteger, ForeignKey(Portfolio.id), nullable=False) property_id = Column(BigInteger, ForeignKey(PropertyModel.id), nullable=False) created_at = Column(TIMESTAMP, nullable=False, server_default=func.now()) @@ -65,3 +66,21 @@ class PlanRecommendations(Base): id = Column(BigInteger, primary_key=True, autoincrement=True) plan_id = Column(BigInteger, ForeignKey('plan.id'), nullable=False) recommendation_id = Column(BigInteger, ForeignKey('recommendation.id'), nullable=False) + + +class Scenario(Base): + __tablename__ = 'scenario' + + id = Column(BigInteger, primary_key=True, autoincrement=True) + name = Column(String, nullable=False) + created_at = Column(TIMESTAMP, nullable=False, server_default=func.now()) + budget = Column(Float) + portfolio_id = Column(BigInteger, ForeignKey(Portfolio.id), nullable=False) + housing_type = Column(String, nullable=False) + goal = Column(String, nullable=False) + trigger_file_path = Column(String, nullable=False) + already_installed_file_path = Column(String) + patches_file_path = Column(String) + non_invasive_recommendations_file_path = Column(String) + exclusions = Column(String) + multi_plan = Column(Boolean, default=False) diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 099d0827..1340bae3 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -20,7 +20,7 @@ from backend.app.db.functions.property_functions import ( update_or_create_property_spatial_details ) from backend.app.db.functions.recommendations_functions import ( - create_plan, create_plan_recommendations, upload_recommendations + create_plan, create_plan_recommendations, upload_recommendations, create_scenario ) from backend.app.db.functions.energy_assessment_functions import get_latest_assessment_by_uprn from backend.app.db.models.portfolio import rating_lookup @@ -354,16 +354,17 @@ async def trigger_plan(body: PlanTriggerRequest): property_id, is_new = create_property( session, body.portfolio_id, epc_searcher.address_clean, epc_searcher.postcode_clean, epc_searcher.uprn ) - if not is_new: + if not is_new and not body.multi_plan: continue - create_property_targets( - session, - property_id=property_id, - portfolio_id=body.portfolio_id, - epc_target=body.goal_value, - heat_demand_target=None - ) + if is_new: + create_property_targets( + session, + property_id=property_id, + portfolio_id=body.portfolio_id, + epc_target=body.goal_value, + heat_demand_target=None + ) # If we have an energy assessment in place, that is newer than all of the previous EPCs, we use that. # Otherwise, we use the newest EPC @@ -396,6 +397,7 @@ async def trigger_plan(body: PlanTriggerRequest): input_properties.append( Property( id=property_id, + is_new=is_new, address=epc_searcher.address_clean, postcode=epc_searcher.postcode_clean, epc_record=prepared_epc, @@ -409,6 +411,25 @@ async def trigger_plan(body: PlanTriggerRequest): if not input_properties: return Response(status_code=204) + # If we have any work to do, we create a new scenario + scenario = create_scenario( + session=session, + scenario={ + "name": body.scenario_name, + "created_at": created_at, + "budget": body.budget, + "portfolio_id": body.portfolio_id, + "housing_type": body.housing_type, + "goal": body.goal, + "trigger_file_path": body.trigger_file_path, + "already_installed_file_path": body.already_installed_file_path, + "patches_file_path": body.patches_file_path, + "non_invasive_recommendations_file_path": body.non_invasive_recommendations_file_path, + "exclusions": body.exclusions, + "multi_plan": body.multi_plan + } + ) + # The materials data could be cached or local so we don't need to make # consistent requests to the backend for # the same data @@ -734,18 +755,19 @@ async def trigger_plan(body: PlanTriggerRequest): valuations = PropertyValuation.estimate(property_instance=p, target_epc=new_epc) property_value_increase_ranges[p.id] = valuations - - property_details_epc = p.get_property_details_epc( - portfolio_id=body.portfolio_id, rating_lookup=rating_lookup, - ) - create_property_details_epc(session, property_details_epc) - update_or_create_property_spatial_details(session, p.uprn, p.spatial) + if p.is_new: + property_details_epc = p.get_property_details_epc( + portfolio_id=body.portfolio_id, rating_lookup=rating_lookup, + ) + create_property_details_epc(session, property_details_epc) - property_data = p.get_full_property_data(current_valuation=valuations["current_value"]) - update_property_data( - session, property_id=p.id, portfolio_id=body.portfolio_id, property_data=property_data - ) + update_or_create_property_spatial_details(session, p.uprn, p.spatial) + + property_data = p.get_full_property_data(current_valuation=valuations["current_value"]) + update_property_data( + session, property_id=p.id, portfolio_id=body.portfolio_id, property_data=property_data + ) if not recommendations_to_upload: continue @@ -753,7 +775,8 @@ async def trigger_plan(body: PlanTriggerRequest): new_plan_id = create_plan(session, { "portfolio_id": body.portfolio_id, "property_id": p.id, - "is_default": True, + "is_default": True if p.is_new else False, + "name": body.scenario_name, "valuation_increase_lower_bound": ( valuations["lower_bound_increased_value"] - valuations["current_value"] ), @@ -807,6 +830,8 @@ async def trigger_plan(body: PlanTriggerRequest): aggregate_portfolio_recommendations( session, portfolio_id=body.portfolio_id, + multi_plan=body.multi_plan, + total_valuation_increase=total_valuation_increase, labour_days=labour_days, aggregated_data=aggregated_data @@ -941,6 +966,7 @@ async def build_mds(body: MdsRequest): # already_installed=property_already_installed, # non_invasive_recommendations=property_non_invasive_recommendations, measures=measures, + is_new=is_new, **Property.extract_kwargs(config) ) ) diff --git a/backend/app/plan/schemas.py b/backend/app/plan/schemas.py index 77ac4217..263115af 100644 --- a/backend/app/plan/schemas.py +++ b/backend/app/plan/schemas.py @@ -13,6 +13,10 @@ class PlanTriggerRequest(BaseModel): patches_file_path: Optional[str] = None non_invasive_recommendations_file_path: Optional[str] = None exclusions: Optional[conlist(str, min_items=1)] = None + scenario_name: Optional[str] = "" + # If true, will allow us to create multiple plans for the same portfolio, whereas if this is false, if this property + # exists in the portfolio, it will be ignored + multi_plan: Optional[bool] = False # Pre-defined list of possibilities for exclusions _allowed_exclusions = { diff --git a/etl/xml_survey_extraction/app.py b/etl/xml_survey_extraction/app.py index ab9eae2d..aeaf8abe 100644 --- a/etl/xml_survey_extraction/app.py +++ b/etl/xml_survey_extraction/app.py @@ -10,11 +10,97 @@ from io import BytesIO logger = setup_logger() -SURVEYORS = "JAFFERSONS ENERGY CONSULTANTS" -PROJECT_CODE = "VDE001" BUCKET = "retrofit-energy-assessments-dev" -PORTFOLIO_ID = 86 USER_ID = 8 +SCENARIOS = { + 86: { + "project_code": "VDE001", + "surveyor": "JAFFERSONS ENERGY CONSULTANTS", + "bodies": [ + # Scenario A: Cavity wall insulation + { + "portfolio_id": str(86), + "housing_type": "Private", + "goal": "Increase EPC", + "goal_value": "A", + "trigger_file_path": "", + "already_installed_file_path": "", + "patches_file_path": "", + "non_invasive_recommendations_file_path": "", + "exclusions": ["floor_insulation", "fireplace", "solar_pv", "heating"], + "budget": None, + "scenario_name": "Low Hanging Fruit", + "multi_plan": True, + }, + # Scenario B: CWI, Solar PV, AHSP + { + "portfolio_id": str(86), + "housing_type": "Private", + "goal": "Increase EPC", + "goal_value": "A", + "trigger_file_path": "", + "already_installed_file_path": "", + "patches_file_path": "", + "non_invasive_recommendations_file_path": "", + "exclusions": ["floor_insulation", "fireplace"], + "budget": None, + "Scenario Name": "Deep Retrofit", + "multi_plan": True, + }, + # Scenario C, CWI, floor insulation, PV, AHSP + { + "portfolio_id": str(86), + "housing_type": "Private", + "goal": "Increase EPC", + "goal_value": "A", + "trigger_file_path": "", + "already_installed_file_path": "", + "patches_file_path": "", + "non_invasive_recommendations_file_path": "", + "exclusions": ["fireplace"], + "budget": None, + "Scenario Name": "Whole House Retrofit", + "multi_plan": True, + } + ] + }, + 87: { + "project_code": "VDE002", + "surveyor": "JAFFERSONS ENERGY CONSULTANTS", + "bodies": [ + # Scenario A: Solar PV, AHSP + { + "portfolio_id": str(87), + "housing_type": "Private", + "goal": "Increase EPC", + "goal_value": "A", + "trigger_file_path": "", + "already_installed_file_path": "", + "patches_file_path": "", + "non_invasive_recommendations_file_path": "", + "exclusions": ["floor_insulation", "fireplace"], + "budget": None, + "Scenario Name": "Deep Retrofit", + "multi_plan": True, + }, + # Scenario B, floor insulation, PV, AHSP + { + "portfolio_id": str(87), + "housing_type": "Private", + "goal": "Increase EPC", + "goal_value": "A", + "trigger_file_path": "", + "already_installed_file_path": "", + "patches_file_path": "", + "non_invasive_recommendations_file_path": "", + "exclusions": ["fireplace"], + "budget": None, + "Scenario Name": "Whole House Retrofit", + "multi_plan": True, + } + ] + } +} def main(): @@ -34,103 +120,102 @@ def main(): # TODO: Store the project code in the database # - energy_assessments = list_files_and_subfolders_in_s3_folder( - bucket_name=BUCKET, folder_name=f"{SURVEYORS}/{PROJECT_CODE}/" - ) - logger.info(f"Found {len(energy_assessments)} energy assessments for {SURVEYORS} and {PROJECT_CODE}") - assessments_map = {} - for assessment in energy_assessments: - uploaded_xmls = list_xmls_in_s3_folder( - bucket_name=BUCKET, folder_name=os.path.join(assessment, "docs & plans") + for scenario_config in SCENARIOS.values(): + energy_assessments = list_files_and_subfolders_in_s3_folder( + bucket_name=BUCKET, folder_name=f"{scenario_config['surveyor']}/{scenario_config['project_code']}/" ) - uprn = int(assessment.rstrip("/").split("/")[-1]) - assessments_map[uprn] = uploaded_xmls - logger.info(f"Exatracted XMLS for the energy assessments") - - # TODO: IF we have many uploads, we can do them in a batch so we don't try and upload huge amounts of data to - # the database at onece - - # TODO: We now have detailed information about primary and secondary walls, so we should use this information - # in our recommendations when we have it - # For example, for 77 Peryn Road, W3 7LT, the energy assessment has a main dwelling and two extensions, where - # the physical dimensions and the fabric of each building is constructed in a way as if each building is - # separate. We should use this information to make recommendations that are specific to each building - # part, though the problem here is that while the fabric and dimensions are separate, the actual SAP, CO2, etc - # figures span across the entire property. - # Idea: We can collect all of this information by building part and store it separately in the database - # against the uprn. We can have key data for the EPC, but then also additional data for each building - # part. We can then use this data to make recommendations that are specific to each building part - # We should probably re-think this data model, so we break up the data in a more considered fasion and produce - # the underlying EPC data as a summary of the building parts. Not only do we have data against the main - # dwelling and extensions, but we also have multiple windows with individiaul pieces of information that - # we can use to make recommendations. We should store this data in a way that we can easily access it and - # use it to make recommendations (e.g. we should have a Windows table) - - # For each property, we download the xmls and extract the data - database_data = [] - for uprn, xmls in assessments_map.items(): - extracted_data = {} - for xml in xmls: - xml_data = read_from_s3(bucket_name=BUCKET, s3_file_name=xml) - xml_data_io = BytesIO(xml_data) - xml_parser = XmlParser( - file=xml_data_io, - filekey=os.path.join(f"s3://{BUCKET}", xml), - uprn=uprn, - surveyor_company=SURVEYORS, + logger.info( + f"Found {len(energy_assessments)} energy assessments for {scenario_config['surveyor']} and " + f"{scenario_config['project_code']}" + ) + assessments_map = {} + for assessment in energy_assessments: + uploaded_xmls = list_xmls_in_s3_folder( + bucket_name=BUCKET, folder_name=os.path.join(assessment, "docs & plans") ) - xml_parser.run() - if xml_parser.is_lig: - logger.info(f"Extracted data from {xml}") - extracted_epc = xml_parser.epc - extracted_additional_data = xml_parser.additional_data + uprn = int(assessment.rstrip("/").split("/")[-1]) + assessments_map[uprn] = uploaded_xmls - data_to_update = { - **extracted_epc, **extracted_additional_data - } + logger.info(f"Exatracted XMLS for the energy assessments") - # We need to update the keys to match the database schema - i.e. we should replace all hyphens with - # underscores - data_to_update = {k.replace("-", "_"): v for k, v in data_to_update.items()} + # TODO: IF we have many uploads, we can do them in a batch so we don't try and upload huge amounts of data to + # the database at onece - extracted_data.update(data_to_update) + # TODO: We now have detailed information about primary and secondary walls, so we should use this information + # in our recommendations when we have it + # For example, for 77 Peryn Road, W3 7LT, the energy assessment has a main dwelling and two extensions, + # where + # the physical dimensions and the fabric of each building is constructed in a way as if each building is + # separate. We should use this information to make recommendations that are specific to each building + # part, though the problem here is that while the fabric and dimensions are separate, the actual SAP, + # CO2, etc + # figures span across the entire property. + # Idea: We can collect all of this information by building part and store it separately in the database + # against the uprn. We can have key data for the EPC, but then also additional data for each + # building + # part. We can then use this data to make recommendations that are specific to each building part + # We should probably re-think this data model, so we break up the data in a more considered fasion and + # produce + # the underlying EPC data as a summary of the building parts. Not only do we have data against the main + # dwelling and extensions, but we also have multiple windows with individiaul pieces of information that + # we can use to make recommendations. We should store this data in a way that we can easily access it and + # use it to make recommendations (e.g. we should have a Windows table) - database_data.append(extracted_data) + # For each property, we download the xmls and extract the data + database_data = [] + for uprn, xmls in assessments_map.items(): + extracted_data = {} + for xml in xmls: + xml_data = read_from_s3(bucket_name=BUCKET, s3_file_name=xml) + xml_data_io = BytesIO(xml_data) + xml_parser = XmlParser( + file=xml_data_io, + filekey=os.path.join(f"s3://{BUCKET}", xml), + uprn=uprn, + surveyor_company=scenario_config["surveyor"], + ) + xml_parser.run() + if xml_parser.is_lig: + logger.info(f"Extracted data from {xml}") + extracted_epc = xml_parser.epc + extracted_additional_data = xml_parser.additional_data - logger.info("Uploading data to the database") - session = sessionmaker(bind=db_engine)() - bulk_insert_energy_assessments(session, database_data) - session.close() + data_to_update = { + **extracted_epc, **extracted_additional_data + } - # Create the asset list - asset_list = [ - {"uprn": x["uprn"], "address": x["address1"], "postcode": x["postcode"]} for x in database_data - ] - asset_list = pd.DataFrame(asset_list) + # We need to update the keys to match the database schema - i.e. we should replace all hyphens with + # underscores + data_to_update = {k.replace("-", "_"): v for k, v in data_to_update.items()} - # Store the asset list in s3 - filename = f"{USER_ID}/{PORTFOLIO_ID}/non_intrusives.csv" - save_csv_to_s3( - dataframe=asset_list, - bucket_name="retrofit-plan-inputs-dev", - file_name=filename - ) + extracted_data.update(data_to_update) - body = { - "portfolio_id": str(PORTFOLIO_ID), - "housing_type": "Private", - "goal": "Increase EPC", - "goal_value": "A", - "trigger_file_path": filename, - "already_installed_file_path": "", - "patches_file_path": "", - "non_invasive_recommendations_file_path": "", - "exclusions": ["floor_insulation", "fireplace"], - "budget": None, - } - print(body) + database_data.append(extracted_data) + + logger.info("Uploading data to the database") + session = sessionmaker(bind=db_engine)() + bulk_insert_energy_assessments(session, database_data) + session.close() + + # Create the asset list + asset_list = [ + {"uprn": x["uprn"], "address": x["address1"], "postcode": x["postcode"]} for x in database_data + ] + asset_list = pd.DataFrame(asset_list) + + # Store the asset list in s3 + filename = f"{USER_ID}/{scenario_config['bodies'][0]['portfolio_id']}/non_intrusives.csv" + save_csv_to_s3( + dataframe=asset_list, + bucket_name="retrofit-plan-inputs-dev", + file_name=filename + ) + + for body in scenario_config["bodies"]: + body["trigger_file_path"] = filename + print(body) # TODO: In order to get the full data associated to the heating system, we need to download and parse the pcdb which # can be found here: https://www.ncm-pcdb.org.uk/pcdb/pcdb10.dat