From c7bca626d8b7ad42daff6031d7582285c1d0ea5f Mon Sep 17 00:00:00 2001 From: Michael Duong Date: Mon, 19 Feb 2024 15:19:59 +0000 Subject: [PATCH 1/4] Removed the dropping of cavity fille ending columns and is at rafter ending column --- etl/epc/Dataset.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/etl/epc/Dataset.py b/etl/epc/Dataset.py index dac829e2..7f989633 100644 --- a/etl/epc/Dataset.py +++ b/etl/epc/Dataset.py @@ -563,7 +563,6 @@ class TrainingDataset(BaseDataset): "original_description_ending", "thermal_transmittance_unit_ending", "is_cavity_wall_ending", - "is_filled_cavity_ending", "is_solid_brick_ending", "is_system_built_ending", "is_timber_frame_ending", @@ -607,7 +606,6 @@ class TrainingDataset(BaseDataset): "is_loft_ending", "is_flat_ending", "is_thatched_ending", - "is_at_rafters_ending", "has_dwelling_above_ending", "is_assumed_ending", "is_valid_ending", From c53e4ce8496f695fca3ee20134a73262b6d0736d Mon Sep 17 00:00:00 2001 From: Michael Duong Date: Tue, 20 Feb 2024 09:44:51 +0000 Subject: [PATCH 2/4] change scenario data --- backend/Property.py | 20 +++++++++++++------- etl/epc/Pipeline.py | 3 ++- etl/epc/generate_scenarios_data.py | 6 +++--- 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/backend/Property.py b/backend/Property.py index 418a35a1..c0f19ab5 100644 --- a/backend/Property.py +++ b/backend/Property.py @@ -128,16 +128,19 @@ class Property: It will be the same starting and ending EPC, as we don't have the expected EPC yet """ - difference_record = self.epc_record - self.epc_record + # difference_record = self.epc_record - self.epc_record # TODO: change these lower and replace in the settings file + print("CHANGE THE LATEST FIELD TO REMOVE NUMBER HABITABLE ROOMS IF WE WANT TO USE STARTING/ENDING") fixed_data_col_names = MANDATORY_FIXED_FEATURES + LATEST_FIELD print("NEED TO CHANGE THE DASH TO LOWER CASE") fixed_data_col_names = [x.lower().replace("_", "-") for x in fixed_data_col_names] fixed_data = {k.replace("-", "_"): v for k, v in self.data.items() if k in fixed_data_col_names} - difference_record.append_fixed_data(fixed_data) + # difference_record.append_fixed_data(fixed_data) + + difference_record = self.epc_record.create_EPCDifferenceRecord(self.epc_record, fixed_data) self.base_difference_record = TrainingDataset(datasets=[difference_record], cleaned_lookup=cleaned_lookup) @@ -238,12 +241,15 @@ class Property: # Note: often when the wall is insulatied, the internal/external insulation is not noted so we should # test the impact of using these booleans if recommendation["type"] == "external_wall_insulation": - output["external_insulation"] = True - output["internal_insulation"] = False + output["external_insulation_ending"] = True + output["internal_insulation_ending"] = False if recommendation["type"] == "internal_wall_insulation": - output["external_insulation"] = False - output["internal_insulation"] = True + output["external_insulation_ending"] = False + output["internal_insulation_ending"] = True + + if recommendation["type"] == "cavity_wall_insulation": + output['is_filled_cavity_ending'] = True # TODO: perhaps detrimental # When making a recommendation for the wall, we will also update the ventilation @@ -314,7 +320,7 @@ class Property: if recommendation["type"] == "low_energy_lighting": output["low_energy_lighting_ending"] = 100 - output["lighting_energy_eff_starting"] = "Very Good" + output["lighting_energy_eff_ending"] = "Very Good" if recommendation["type"] == "windows_glazing": output["multi_glaze_proportion_ending"] = 100 diff --git a/etl/epc/Pipeline.py b/etl/epc/Pipeline.py index 0943b206..ba228d89 100644 --- a/etl/epc/Pipeline.py +++ b/etl/epc/Pipeline.py @@ -24,7 +24,8 @@ from etl.epc.settings import ( # TODO: change in setting file MANDATORY_FIXED_FEATURES = [x.lower() for x in MANDATORY_FIXED_FEATURES] -LATEST_FIELD = [x.lower() for x in LATEST_FIELD if x.lower() not in ROOM_FEATURES] +# LATEST_FIELD = [x.lower() for x in LATEST_FIELD if x.lower() not in ROOM_FEATURES] +LATEST_FIELD = [x.lower() for x in LATEST_FIELD] COMPONENT_FEATURES = [x.lower() for x in COMPONENT_FEATURES] RDSAP_RESPONSE = RDSAP_RESPONSE.lower() HEAT_DEMAND_RESPONSE = HEAT_DEMAND_RESPONSE.lower() diff --git a/etl/epc/generate_scenarios_data.py b/etl/epc/generate_scenarios_data.py index afe9ab98..172e8a27 100644 --- a/etl/epc/generate_scenarios_data.py +++ b/etl/epc/generate_scenarios_data.py @@ -183,7 +183,7 @@ for scenario_property in scenario_properties: if "windows" in measure: for rec in windows_recommendations: - if rec["type"] == "windows": + if rec["type"] == "windows_glazing": windows_recs.append(rec) combi_list = [wall_recs, loft_recs, solar_recs, windows_recs] @@ -240,8 +240,8 @@ all_predictions = model_api.predict_all( prediction_buckets={ "sap_change_predictions": get_settings().SAP_PREDICTIONS_BUCKET, "heat_demand_predictions": get_settings().HEAT_PREDICTIONS_BUCKET, - "carbon_change_predictions": get_settings().CARBON_PREDICTIONS_BUCKET - } + "carbon_change_predictions": get_settings().CARBON_PREDICTIONS_BUCKET, + }, ) save_dataframe_to_s3_parquet( From 0ea8a40143e09ccb1b6b7c1061dba732e666318f Mon Sep 17 00:00:00 2001 From: Michael Duong Date: Fri, 22 Mar 2024 19:21:21 +0000 Subject: [PATCH 3/4] add requirements for pyarrow, add timestemp infromation to dataset for loose version control --- etl/epc/Pipeline.py | 16 ++++++++++------ etl/epc/generate_scenarios_data.py | 8 ++++++-- etl/epc/requirements.txt | 3 ++- 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/etl/epc/Pipeline.py b/etl/epc/Pipeline.py index c678c830..6abf05bd 100644 --- a/etl/epc/Pipeline.py +++ b/etl/epc/Pipeline.py @@ -1,5 +1,6 @@ import msgpack import pandas as pd +from datetime import datetime from typing import List from pathlib import Path @@ -82,9 +83,9 @@ class EPCPipeline: run_mode="training", epc_local_file="certificates.csv", epc_bucket_name="retrofit-data-dev", - epc_cleaning_dataset_key="sap_change_model/cleaning_dataset_rooms.parquet", - epc_all_equal_rows_key="sap_change_model/all_equal_rows_rooms.parquet", - epc_compiled_dataset_key="sap_change_model/dataset_rooms.parquet", + epc_cleaning_dataset_key="sap_change_model/{}/cleaning_dataset_rooms.parquet", + epc_all_equal_rows_key="sap_change_model/{}/all_equal_rows_rooms.parquet", + epc_compiled_dataset_key="sap_change_model/{}/dataset_rooms.parquet", use_parallel=False, ): """ @@ -107,10 +108,13 @@ class EPCPipeline: self.run_mode = run_mode self.epc_local_file = epc_local_file self.epc_bucket_name = epc_bucket_name - self.epc_cleaning_dataset_key = epc_cleaning_dataset_key - self.epc_all_equal_rows_key = epc_all_equal_rows_key - self.epc_compiled_dataset_key = epc_compiled_dataset_key + self.use_parallel = use_parallel + self.timeprefix = datetime.now().strftime("%Y-%m-%d-%H-%M-%S") + + self.epc_cleaning_dataset_key = epc_cleaning_dataset_key.format(self.timeprefix) + self.epc_all_equal_rows_key = epc_all_equal_rows_key.format(self.timeprefix) + self.epc_compiled_dataset_key = epc_compiled_dataset_key.format(self.timeprefix) def run(self): """ diff --git a/etl/epc/generate_scenarios_data.py b/etl/epc/generate_scenarios_data.py index 172e8a27..d5bece8b 100644 --- a/etl/epc/generate_scenarios_data.py +++ b/etl/epc/generate_scenarios_data.py @@ -20,6 +20,10 @@ from recommendations.Recommendations import Recommendations from utils.logger import setup_logger from utils.s3 import read_dataframe_from_s3_parquet, save_dataframe_to_s3_parquet +from datetime import datetime + +now = datetime.now().strftime("%d-%m-%Y-%H-%M-%S") + logger = setup_logger() logger.info("Connecting to db") @@ -132,7 +136,7 @@ for scenario_property in scenario_properties: p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds) recommender = Recommendations(property_instance=p, materials=materials) - property_recommendations = recommender.recommend() + property_recommendations = recommender.recommend("0") wall_recommendations = recommender.wall_recomender.recommendations loft_recommendations = recommender.roof_recommender.recommendations @@ -247,5 +251,5 @@ all_predictions = model_api.predict_all( save_dataframe_to_s3_parquet( recommendations_scoring_data, "retrofit-data-dev", - "scenario_data/recommendations_scoring_data.parquet", + f"scenario_data/{now}/recommendations_scoring_data.parquet", ) diff --git a/etl/epc/requirements.txt b/etl/epc/requirements.txt index 9f972bde..87148180 100644 --- a/etl/epc/requirements.txt +++ b/etl/epc/requirements.txt @@ -1,4 +1,5 @@ pandas==2.1.3 tqdm==4.66.1 msgpack==1.0.7 -boto3==1.29.6 \ No newline at end of file +boto3==1.29.6 +pyarrow==15.0.2 \ No newline at end of file From ebb28236617abff1e3a5f91dd6b06b66a001a4d7 Mon Sep 17 00:00:00 2001 From: Michael Duong Date: Wed, 27 Mar 2024 11:39:51 +0000 Subject: [PATCH 4/4] override scenerio data to have average insulation thickness, change impact values --- etl/epc/generate_scenarios_data.py | 48 +++++++++++++++++++++++++----- 1 file changed, 41 insertions(+), 7 deletions(-) diff --git a/etl/epc/generate_scenarios_data.py b/etl/epc/generate_scenarios_data.py index d5bece8b..f9f66034 100644 --- a/etl/epc/generate_scenarios_data.py +++ b/etl/epc/generate_scenarios_data.py @@ -54,9 +54,19 @@ scenario_properties = [ "postcode": "NN1 5JY", "lmk-key": "1459796789102016070507274146560098", "measures": [ - [["internal_wall_insulation"], "11", None, [0]], - [["external_wall_insulation"], "10", None, [0]], - [["solar", "windows"], "12-15", {"photo_supply_ending": 50}, [0, 1]], + [ + ["internal_wall_insulation"], + "11", + {"walls_insulation_thickness_ending": "average"}, + [0], + ], + [ + ["external_wall_insulation"], + "10", + {"walls_insulation_thickness_ending": "average"}, + [0], + ], + [["solar", "windows"], "15", {"photo_supply_ending": 50}, [0, 1]], ], }, { @@ -64,7 +74,12 @@ scenario_properties = [ "postcode": "HP1 2HA", "lmk-key": "c14029235739827d5f627dc8aa9bb567d026b267e851e0db0001db24638667b1", "measures": [ - [["cavity_wall_insulation", "loft_insulation"], "15", None, [0, 1]], + [ + ["cavity_wall_insulation", "loft_insulation"], + "15", + {"walls_insulation_thickness_ending": "average"}, + [0, 1], + ], ], }, { @@ -72,7 +87,12 @@ scenario_properties = [ "postcode": "HP1 2HE", "lmk-key": "99296a6dda21314fef3a61cda59e441e9a2aacf115eb96f4a0fa85696bf7b117", "measures": [ - [["cavity_wall_insulation", "loft_insulation"], "15", None, [0, 1]], + [ + ["cavity_wall_insulation", "loft_insulation"], + "15", + {"walls_insulation_thickness_ending": "average"}, + [0, 1], + ], ], }, { @@ -80,7 +100,12 @@ scenario_properties = [ "postcode": "HP1 2AN", "lmk-key": "d1e0534be3a44c33003323b21d0e322e3daddc65b5ee71936f89c59ddab96b50", "measures": [ - [["cavity_wall_insulation", "loft_insulation"], "15", None, [0, 1]], + [ + ["cavity_wall_insulation", "loft_insulation"], + "15", + {"walls_insulation_thickness_ending": "average"}, + [0, 1], + ], ], }, { @@ -88,11 +113,17 @@ scenario_properties = [ "postcode": "HP1 2HX", "lmk-key": "1eae354db522a95188018d9cd0502ed8c609910b6c88f8797d3a25f59b11770a", "measures": [ - [["cavity_wall_insulation", "loft_insulation"], "15", None, [0, 1]], + [ + ["cavity_wall_insulation", "loft_insulation"], + "15", + {"walls_insulation_thickness_ending": "average"}, + [0, 1], + ], ], }, ] + recommendations_scoring_data = [] for scenario_property in scenario_properties: @@ -217,6 +248,9 @@ for scenario_property in scenario_properties: recommendations_scoring_data.extend(scoring_list) recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data) +recommendations_scoring_data["impact"] = recommendations_scoring_data["impact"].astype( + int +) recommendations_scoring_data = recommendations_scoring_data.drop( columns=[ "rdsap_change",