diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 06d1aadf..ce5577bb 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -618,3 +618,12 @@ async def trigger_plan(body: PlanTriggerRequest): session.close() return Response(status_code=200) + + +@router.post("/mds") +async def build_mds(body: PlanTriggerRequest): + # TODO: This is a placeholder location for the MDS endpoint, which this is being assembled + + logger.info("Connecting to db") + session = sessionmaker(bind=db_engine)() + created_at = datetime.now().isoformat() diff --git a/etl/customers/eon/pilot_asset_list.py b/etl/customers/eon/pilot_asset_list.py new file mode 100644 index 00000000..8401fde5 --- /dev/null +++ b/etl/customers/eon/pilot_asset_list.py @@ -0,0 +1,225 @@ +import time + +import pandas as pd + +from utils.s3 import read_excel_from_s3 +from backend.SearchEpc import SearchEpc +from dotenv import load_dotenv +import os +from tqdm import tqdm +from utils.s3 import save_csv_to_s3 + +# Read in the .env file in backend +load_dotenv(dotenv_path="backend/.env") +EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN") +# Stored in my notes +ORDNANCE_SURVEY_API_KEY = "" + +PORTFOLIO_ID = 80 +USER_ID = 8 + + +def extract_mds_measures(config): + measures = [] + if not pd.isnull(config["EWI (Trad Const)"]): + measures.append({"external_wall_insulation": "EWI (Trad Const)"}) + + if not pd.isnull(config["EWI (Non Trad Const)"]): + measures.append({"external_wall_insulation": "EWI (Non Trad Const)"}) + + if not pd.isnull(config["CWI"]): + measures.append({"cavity_wall_insulation": "CWI"}) + + if not pd.isnull(config["LI"]): + measures.append({"loft_insulation": "LI"}) + + if not pd.isnull(config["Party Wall Insu"]): + measures.append({"party_wall_insulation": "Party Wall Insu"}) + + if not pd.isnull(config["IWI (POA - Prov Sum Only)"]): + measures.append({"internal_wall_insulation": "IWI (POA - Prov Sum Only)"}) + + if not pd.isnull(config["U/F Insu (Manual install)"]): + measures.append({"suspended_floor_insulation": "U/F Insu (Manual install)"}) + + if not pd.isnull(config["U/F insu (Qbot)"]): + measures.append({"suspended_floor_insulation": "U/F insu (Qbot)"}) + + if not pd.isnull(config["Solid floor insl (Out of scope - Prov sum only)"]): + measures.append({"solid_floor_insulation": "Solid floor insl (Out of scope - Prov sum only)"}) + + if not pd.isnull(config["ASHP Htg"]): + measures.append({"air_source_heat_pump": "ASHP Htg"}) + + if not pd.isnull(config["GSHP Htg"]): + measures.append({"ground_source_heat_pump": "GSHP Htg"}) + + if not pd.isnull(config["Shared ground loops"]): + measures.append({"shared_ground_loops": "Shared ground loops"}) + + if not pd.isnull(config["Communal heat networks"]): + measures.append({"communal_heat_networks": "Communal heat networks"}) + + if not pd.isnull(config["District heating networks"]): + measures.append({"district_heating_networks": "District heating networks"}) + + if not pd.isnull(config["Elec Storage Htrs (Out of scope -Prov sum only)"]): + measures.append({"electric_storage_heaters": "Elec Storage Htrs (Out of scope -Prov sum only)"}) + + if not pd.isnull(config["Low Energy Bulbs"]): + measures.append({"low_energy_lighting": "Low Energy Bulbs"}) + + if not pd.isnull(config["Cyl Insulation"]): + measures.append({"cylinder_insulation": "Cyl Insulation"}) + + if not pd.isnull(config["Smart controls"]): + measures.append({"smart_controls": "Smart controls"}) + + if not pd.isnull(config["Zone controls"]): + measures.append({"zone_controls": "Zone controls"}) + + if not pd.isnull(config["Upgrade TRV's"]): + measures.append({"trvs": "Upgrade TRV's"}) + + if not pd.isnull(config["Solar PV"]): + measures.append({"solar_pv": "Solar PV"}) + + if not pd.isnull(config["Solar Thermal"]): + measures.append({"solar_thermal": "Solar Thermal"}) + + if not pd.isnull(config["Double Glazing (POA - Prov sum only)"]): + measures.append({"double_glazing": "Double Glazing (POA - Prov sum only)"}) + + if not pd.isnull(config["Draught Proofing"]): + measures.append({"draught_proofing": "Draught Proofing"}) + + if not pd.isnull(config["Ventilation upgrade"]): + measures.append({"mechanical_ventilation": "Ventilation upgrade"}) + + if not pd.isnull(config["Gas Boiler Replacement"]): + measures.append({"gas_boiler": "Gas Boiler Replacement"}) + + if not pd.isnull(config["Flat roof (Out of scope - prov sum only)"]): + measures.append({"flat_roof_insulation": "Flat roof (Out of scope - prov sum only)"}) + + if not pd.isnull(config["RIR (POA - Prov sum only)"]): + measures.append({"room_in_roof_insulation": "RIR (POA - Prov sum only)"}) + + if not pd.isnull(config["EV Charging"]): + measures.append({"ev_charging": "EV Charging"}) + + if not pd.isnull(config["Battery"]): + measures.append({"battery": "Battery"}) + + return measures + + +def app(): + """ + Create the initial asset list for the E.ON pilot + :return: + """ + + raw_asset_list = read_excel_from_s3( + bucket_name="retrofit-datalake-dev", + file_key="customers/E.ON/sample SHDF Information MDS Template Vr3.0.xlsx", + header_row=11, + drop_all_na=False + ) + + # Keep just the columns we need + raw_asset_list_base = raw_asset_list[ + [ + "Address", "Postcode", "No Bedrooms" + ] + ].copy().rename( + columns={ + "Address": "address", + "Postcode": "postcode", + "No Bedrooms": "n_bedrooms" + } + ) + + # For each property, retrieve UPRN with from the Ordnance Survey API. To do this, I have created a free + # trial with Ordnance Survey with my personal account as a temporary solution. + # Let's just pull the full EPC data for this + asset_list_with_uprn = [] + for row, property_meta in tqdm(raw_asset_list_base.iterrows(), total=raw_asset_list_base.shape[0]): + if row <= 104: + continue + time.sleep(1.1) + searcher = SearchEpc( + address1=property_meta["address"], + postcode=property_meta["postcode"], + auth_token=EPC_AUTH_TOKEN, + os_api_key=ORDNANCE_SURVEY_API_KEY, + full_address=", ".join([property_meta["address"], property_meta["postcode"]]) + ) + + # Let's just find the UPRN + searcher.ordnance_survey_client.get_places_api() + + uprn = searcher.ordnance_survey_client.most_relevant_result["UPRN"] + + # searcher.find_property(skip_os=False) + + asset_list_with_uprn.append( + { + **property_meta, + "uprn": uprn, + } + ) + + # Store this as a backup + # import pandas as pd + # asset_list_with_uprn_df = pd.DataFrame(asset_list_with_uprn) + # asset_list_with_uprn_df.to_csv("eon_asset_list_with_uprn.csv", index=False) + + # Store the asset list and create the portfolio payload + asset_list_with_uprn_df = pd.DataFrame(asset_list_with_uprn) + asset_list_with_uprn_df["uprn"] = asset_list_with_uprn_df["uprn"].astype(str).astype(int) + + # We now determine which measures we need for each property + finalised_asset_list = [] + for i, config in raw_asset_list.iterrows(): + asset_config = asset_list_with_uprn_df[ + (asset_list_with_uprn_df["address"] == config["Address"]) & + (asset_list_with_uprn_df["postcode"] == config["Postcode"]) + ] + if asset_config.shape[0] != 1: + raise ValueError("Could not find a unique match for the property") + + measures = extract_mds_measures(config) + + finalised_asset_list.append( + { + "address": config["Address"], + "postcode": config["Postcode"], + "uprn": asset_config["uprn"].values[0], + "n_bedrooms": config["No Bedrooms"], + "measures": measures + } + ) + finalised_asset_list = pd.DataFrame(finalised_asset_list) + + # Store the asset list in s3 + filename = f"{USER_ID}/{PORTFOLIO_ID}/pilot.csv" + save_csv_to_s3( + dataframe=finalised_asset_list, + bucket_name="retrofit-plan-inputs-dev", + file_name=filename + ) + + # EPC C portoflio + body = { + "portfolio_id": str(PORTFOLIO_ID), + "housing_type": "Social", + "goal": "Increase EPC", + "goal_value": "C", + "trigger_file_path": filename, + "already_installed_file_path": "", + "patches_file_path": "", + "non_invasive_recommendations_file_path": "", + "budget": None, + } + print(body) diff --git a/utils/s3.py b/utils/s3.py index fd5992ce..05482271 100644 --- a/utils/s3.py +++ b/utils/s3.py @@ -198,13 +198,14 @@ def read_pickle_from_s3(bucket_name, s3_file_name): return data -def read_excel_from_s3(bucket_name, file_key, header_row): +def read_excel_from_s3(bucket_name, file_key, header_row, drop_all_na=True): """ Read an Excel file from an S3 bucket and return it as a pandas DataFrame. :param bucket_name: Name of the S3 bucket. :param file_key: Key of the file (including directory path within the bucket). :param header_row: The row number to use as the header (0-indexed). + :param drop_all_na: Whether to drop columns where all values are NaN. :return: A pandas DataFrame containing the data from the Excel file. """ @@ -219,7 +220,8 @@ def read_excel_from_s3(bucket_name, file_key, header_row): df = pd.read_excel(excel_buffer, header=header_row) # Drop columns where all values are NaN - df.dropna(axis=1, how='all', inplace=True) + if drop_all_na: + df.dropna(axis=1, how='all', inplace=True) # Reset index if the first column is just an index or entirely NaN df.reset_index(drop=True, inplace=True)