diff --git a/etl/epc/generate_scenarios_data.py b/etl/epc/generate_scenarios_data.py new file mode 100644 index 00000000..af3ed4a4 --- /dev/null +++ b/etl/epc/generate_scenarios_data.py @@ -0,0 +1,237 @@ +from datetime import datetime +import itertools + +import pandas as pd +from etl.epc.Record import EPCRecord +from backend.SearchEpc import SearchEpc + +from sqlalchemy.orm import sessionmaker + +from backend.app.config import get_settings +from backend.app.db.connection import db_engine +from backend.app.db.functions.materials_functions import get_materials + +from backend.app.plan.utils import get_cleaned + +from backend.Property import Property +from etl.solar.SolarPhotoSupply import SolarPhotoSupply + +from recommendations.Recommendations import Recommendations +from utils.logger import setup_logger +from utils.s3 import read_dataframe_from_s3_parquet, save_dataframe_to_s3_parquet + +logger = setup_logger() + +logger.info("Connecting to db") +session = sessionmaker(bind=db_engine)() +created_at = datetime.now().isoformat() + +session.begin() +logger.info("Getting the inputs") + +cleaning_data = read_dataframe_from_s3_parquet( + bucket_name=get_settings().DATA_BUCKET, + file_key="sap_change_model/cleaning_dataset.parquet", +) + +materials = get_materials(session) +cleaned = get_cleaned() + +uprn_filenames = read_dataframe_from_s3_parquet( + bucket_name=get_settings().DATA_BUCKET, file_key="spatial/filename_meta.parquet" +) +photo_supply_lookup, floor_area_decile_thresholds = SolarPhotoSupply.load( + bucket=get_settings().DATA_BUCKET +) + +scenario_properties = [ + { + "address": "2 South Terrace", + "postcode": "NN1 5JY", + "lmk-key": "1459796789102016070507274146560098", + "measures": [ + [["internal_wall_insulation"], "11", None], + [["external_wall_insulation"], "10", None], + [["solar", "windows"], "12-15", {"photo_supply_ending": 50}], + ], + }, + { + "address": "8 Lindlings", + "postcode": "HP1 2HA", + "lmk-key": "c14029235739827d5f627dc8aa9bb567d026b267e851e0db0001db24638667b1", + "measures": [ + [["cavity_wall_insulation", "loft_insulation"], "15", None], + ], + }, + { + "address": "44 Lindlings", + "postcode": "HP1 2HE", + "lmk-key": "99296a6dda21314fef3a61cda59e441e9a2aacf115eb96f4a0fa85696bf7b117", + "measures": [ + [["cavity_wall_insulation", "loft_insulation"], "15", None], + ], + }, + { + "address": "46 Chaulden Terrace", + "postcode": "HP1 2AN", + "lmk-key": "d1e0534be3a44c33003323b21d0e322e3daddc65b5ee71936f89c59ddab96b50", + "measures": [ + [["cavity_wall_insulation", "loft_insulation"], "15", None], + ], + }, + { + "address": "73 Long Chaulden", + "postcode": "HP1 2HX", + "lmk-key": "1eae354db522a95188018d9cd0502ed8c609910b6c88f8797d3a25f59b11770a", + "measures": [ + [["cavity_wall_insulation", "loft_insulation"], "15", None], + ], + }, +] + +recommendations_scoring_data = [] + +for scenario_property in scenario_properties: + # We validate each record in the file. If the record is NOT valid, we need to handle this accordingly + + epc_searcher = SearchEpc( + address1=scenario_property["address"], + postcode=scenario_property["postcode"], + auth_token=get_settings().EPC_AUTH_TOKEN, + os_api_key=get_settings().ORDNANCE_SURVEY_API_KEY, + ) + epc_searcher.find_property() + + # Find the epc with the same LMK key + all_epcs = epc_searcher.older_epcs.copy() + all_epcs.extend([epc_searcher.newest_epc, epc_searcher.full_sap_epc]) + original_epc = [ + epc + for epc in all_epcs + if epc.get("lmk-key", None) == scenario_property.get("lmk-key") + ][0] + + epc_records = { + "original_epc": original_epc, + "full_sap_epc": {}, + "old_data": [], + } + + prepared_epc = EPCRecord( + epc_records=epc_records, run_mode="newdata", cleaning_data=cleaning_data + ) + + p = Property( + id=prepared_epc.uprn, + address=epc_searcher.address_clean, + postcode=epc_searcher.postcode_clean, + epc_record=prepared_epc, + ) + + p.get_spatial_data(uprn_filenames) + p.get_components(cleaned, photo_supply_lookup, floor_area_decile_thresholds) + + recommender = Recommendations(property_instance=p, materials=materials) + property_recommendations = recommender.recommend() + + wall_recommendations = recommender.wall_recomender.recommendations + loft_recommendations = recommender.roof_recommender.recommendations + solar_recommendations = recommender.solar_recommender.recommendation + windows_recommendations = recommender.windows_recommender.recommendation + + p.create_base_difference_epc_record(cleaned_lookup=cleaned) + + scoring_list = [] + + # Create the record for each of the different measures + for measure_impact_override in scenario_property["measures"]: + + measure = measure_impact_override[0] + impact = measure_impact_override[1] + override = measure_impact_override[2] + + wall_recs = [] + loft_recs = [] + solar_recs = [] + windows_recs = [] + + if "internal_wall_insulation" in measure: + for rec in wall_recommendations: + if rec["type"] == "internal_wall_insulation": + wall_recs.append(rec) + + if "external_wall_insulation" in measure: + for rec in wall_recommendations: + if rec["type"] == "external_wall_insulation": + wall_recs.append(rec) + + if "cavity_wall_insulation" in measure: + for rec in wall_recommendations: + if rec["type"] == "cavity_wall_insulation": + wall_recs.append(rec) + + if "loft_insulation" in measure: + loft_recs = [] + for rec in loft_recommendations: + if rec["type"] == "loft_insulation": + loft_recs.append(rec) + + if "solar" in measure: + for rec in solar_recommendations: + if rec["type"] == "solar_pv": + solar_recs.append(rec) + + if "windows" in measure: + for rec in windows_recommendations: + if rec["type"] == "windows": + windows_recs.append(rec) + + combi_list = [wall_recs, loft_recs, solar_recs, windows_recs] + combi_list = [element for element in combi_list if len(element) != 0] + + all_combi_recommendations = list(itertools.product(*combi_list)) + + for i, combi in enumerate(all_combi_recommendations): + recommendation_record = p.base_difference_record.df.to_dict("records")[ + 0 + ].copy() + for rec in combi: + recommendation_record = p.create_recommendation_scoring_data( + property_id=rec["type"], + recommendation_record=recommendation_record, + recommendation=rec, + ) + + if override is not None: + for key, value in override.items(): + recommendation_record[key] = value + + recommendation_record["id"] = "+".join(measure) + "+" + str(i) + recommendation_record["impact"] = impact + scoring_list.append(recommendation_record) + + recommendations_scoring_data.extend(scoring_list) + +recommendations_scoring_data = pd.DataFrame(recommendations_scoring_data) +recommendations_scoring_data = recommendations_scoring_data.drop( + columns=[ + "rdsap_change", + "heat_demand_change", + "carbon_change", + "sap_ending", + "heat_demand_ending", + "carbon_ending", + ] +) + +impact_col = recommendations_scoring_data.pop("impact") +recommendations_scoring_data.insert(0, "impact", impact_col) + +id_col = recommendations_scoring_data.pop("id") +recommendations_scoring_data.insert(0, "id", id_col) + +save_dataframe_to_s3_parquet( + recommendations_scoring_data, + "retrofit-data-dev", + "scenario_data/recommendations_scoring_data.parquet", +)