Model/sfr/principal_pitch/1_prepare_data.py

"""
This script prepares the data for the principal pitch modelling
"""
import os
import pandas as pd
from dotenv import load_dotenv
from utils.s3 import save_csv_to_s3
from etl.find_my_epc.AssetListEpcData import AssetListEpcData

load_dotenv(dotenv_path="backend/.env")
EPC_AUTH_TOKEN = os.getenv("EPC_AUTH_TOKEN")
PORTFOLIO_ID = 206
USER_ID = 8
EPC_TARGET = "C"

# Read the input file

properties = pd.read_excel(
    "/Users/khalimconn-kowlessar/Documents/hestia/sfr/Spring JV/Birmingham_price_top300.xlsx"
)
# Keep just the D's and below
properties = properties[properties["current_energy_rating"].isin(["D", "E", "F", "G"])].copy()
# Focus on houses
properties = properties[properties["property_type_std"] != "Flat"]
properties = properties[properties["property_type"] != "flat"]

# Rename the key columns
properties = properties.rename(
    columns={
        "address1": "address",
        "number_of_bathrooms": "n_bathrooms",
        "num_beds": "n_bedrooms"
    }
)
properties["patch"] = True

# Pull the non-invasive recommendations
asset_list_epc_client = AssetListEpcData(
    asset_list=properties,
    epc_auth_token=EPC_AUTH_TOKEN
)
asset_list_epc_client.get_data()
asset_list_epc_client.get_non_invasive_recommendations()
asset_list_epc_client.get_patch()

extracted_df = pd.DataFrame(asset_list_epc_client.extracted_data)
epc_df = pd.DataFrame(asset_list_epc_client.epc_data)

# Find examples where patches are different to the api
compare_epc = []
for patch in asset_list_epc_client.patches:
    extracted = extracted_df[extracted_df["uprn"] == patch["uprn"]].squeeze()
    epc = epc_df[epc_df["uprn"] == patch["uprn"]].squeeze()
    compare_epc.append(
        {
            "uprn": extracted["uprn"],
            "address": extracted["address"],
            "postcode": extracted["postcode"],
            "api_epc": int(extracted["current_epc_efficiency"]),
            "fme_epc": int(epc["current-energy-efficiency"]),
        }
    )
compare_epc = pd.DataFrame(compare_epc)
diff = compare_epc[compare_epc["api_epc"] != compare_epc["fme_epc"]]
# Compare matched addresses to make sure they are the same
compare_addresses = extracted_df[["address", "postcode", "uprn"]].merge(
    epc_df[["uprn", "address1", "postcode"]].rename(columns={"address1": "epc_address1", "postcode": "epc_postcode"}),
    how="left",
    on=["uprn"]
)

# Add on uprn
properties = properties.merge(
    extracted_df[["address", "postcode", "uprn"]],
    how="left",
    on=["address", "postcode"]
)

# Store the asset list in s3
filename = f"{USER_ID}/{PORTFOLIO_ID}/asset_list.csv"
save_csv_to_s3(
    dataframe=properties,
    bucket_name="retrofit-plan-inputs-dev",
    file_name=filename
)

# Store non-invasive recommendations in S3
non_invasive_recommendations_filename = f"{USER_ID}/{PORTFOLIO_ID}/non_invasive_recommendations.csv"
save_csv_to_s3(
    dataframe=pd.DataFrame(asset_list_epc_client.non_invasive_recommendations),
    bucket_name="retrofit-plan-inputs-dev",
    file_name=non_invasive_recommendations_filename
)

# Store patches in S3
patches_filename = ""
if asset_list_epc_client.patches:
    patches_filename = f"{USER_ID}/{PORTFOLIO_ID}/patches.csv"
    save_csv_to_s3(
        dataframe=pd.DataFrame(asset_list_epc_client.patches),
        bucket_name="retrofit-plan-inputs-dev",
        file_name=patches_filename
    )

body = {
    "portfolio_id": str(PORTFOLIO_ID),
    "housing_type": "Private",
    "goal": "Increasing EPC",
    "goal_value": "C",
    "trigger_file_path": filename,
    "already_installed_file_path": "",
    "patches_file_path": patches_filename,
    "non_invasive_recommendations_file_path": non_invasive_recommendations_filename,
    "valuation_file_path": "",
    "scenario_name": "EPC C",
    "multi_plan": True,
    "budget": None,
    "ashp_cop": 3.5,
    # This is new - when optimising, we drop scores by a few points to account for SAP 10
    "simulate_sap_10": True,
    "exclusions": ["external_wall_insulation"],
    "required_measures": ["cavity_wall_insulation", "loft_insulation"]
}
print(body)