import pandas as pd
from tqdm import tqdm
from utils.s3 import save_dataframe_to_s3_parquet, read_dataframe_from_s3_parquet
from utils.logger import setup_logger
from etl.epc.settings import EARLIEST_EPC_DATE

logger = setup_logger()


class AirSourceHeatPumpEfficiency:

    def __init__(self, file_directories, cleaned_lookup):
        """
        :param file_directories: A list of directories where files are stored.
        :param cleaned_lookup: A dictionary containing cleaned lookup data.
        """
        self.file_directories = file_directories
        self.cleaned_lookup = cleaned_lookup

        self.results = []

    def create_dataset(self):
        logger.info("Creating solar photo supply dataset")

        heating_data = []
        for dir in tqdm(self.file_directories):
            filepath = dir / "certificates.csv"
            df = pd.read_csv(filepath, low_memory=False)
            # df = df[~pd.isnull(df["UPRN"])]
            # df["UPRN"] = df["UPRN"].astype(int).astype(str)
            # Take entries after SAP12
            df["LODGEMENT_DATE"] = pd.to_datetime(df["LODGEMENT_DATE"])
            df = df[df["LODGEMENT_DATE"] > EARLIEST_EPC_DATE]

            # df = df[
            #     ~df["TENURE"].isin(
            #         [
            #             "unknown",
            #             "Not defined - use in the case of a new dwelling for which the intended tenure in not known. "
            #             "It is not to be used for an existing dwelling"
            #         ]
            #     )
            # ]

            # Take entries that contain an air source heat pump
            df = df[
                (
                    # Air source heat pumps
                    (df["MAINHEAT_DESCRIPTION"] == "Air source heat pump, radiators, electric") &
                    (df["MAINHEATCONT_DESCRIPTION"] == "Time and temperature zone control")
                ) |
                (
                    # High heat retention storage
                    df["MAINHEATCONT_DESCRIPTION"] == "Controls for high heat retention storage heaters"
                )
                ]

            # Drop rows that have a missing PROPERTY_TYPE, BUILT_FORM, CONSTRUCTION_AGE_BAND, TOTAL_FLOOR_AREA
            for col in ["PROPERTY_TYPE", "BUILT_FORM", "CONSTRUCTION_AGE_BAND", "TOTAL_FLOOR_AREA"]:
                df = df[~pd.isnull(df[col])]

            heating_data.append(df)

        # temp
        # import pickle
        # with open("heating_data - delete me.pkl", "wb") as f:
        #     pickle.dump(heating_data, f)

        heating_df = pd.concat(heating_data)
        # Clean construction age band
        from etl.epc.DataProcessor import EPCDataProcessor
        heating_df["CONSTRUCTION_AGE_BAND_CLEAN"] = heating_df["CONSTRUCTION_AGE_BAND"].apply(
            lambda x: EPCDataProcessor.clean_construction_age_band(x)
        )

        ashp_df = heating_df[
            (heating_df["MAINHEAT_DESCRIPTION"] == "Air source heat pump, radiators, electric") &
            # ~heating_df["CONSTRUCTION_AGE_BAND"].str.contains("England and Wales")
            (~heating_df["CONSTRUCTION_AGE_BAND"].isin(["NO DATA!", "INVALID!"])) &
            (heating_df["LODGEMENT_DATE"] >= pd.to_datetime("2019-01-01"))
            ]
        ashp_efficiencies = (
            ashp_df.groupby(
                [
                    "CONSTRUCTION_AGE_BAND_CLEAN",
                    # "WALLS_DESCRIPTION",
                    # "ROOF_DESCRIPTION",
                    "MAINHEAT_ENERGY_EFF",
                ]
            )["LMK_KEY"].count().reset_index()
        )

        ashp_df["MAINHEAT_ENERGY_EFF"].value_counts()

        ashp_efficiencies["CONSTRUCTION_AGE_BAND_CLEAN"].value_counts()
        ashp_efficiency_agg

        all_counts_agg = all_counts.groupby(
            [
                "PROPERTY_TYPE",
                "BUILT_FORM",
                "MAINHEAT_DESCRIPTION",
                "MAINHEAT_ENERGY_EFF",
                "MAINHEATCONT_DESCRIPTION",
                "MAINHEATC_ENERGY_EFF",
                "MAIN_FUEL",
                "HOTWATER_DESCRIPTION",
                "HOT_WATER_ENERGY_EFF",
                "MAINS_GAS_FLAG"
            ]
        )["count"].sum().reset_index()

        all_counts_agg.groupby("PROPERTY_TYPE")["count"].sum()
        # In houses, 68% of the cases where we see air source heat pumps are in detached and semi-detached houses
        all_counts_agg[all_counts_agg["PROPERTY_TYPE"] == "House"]["BUILT_FORM"].value_counts(normalize=True)

        all_counts_agg[all_counts_agg["PROPERTY_TYPE"] == "Flat"]["BUILT_FORM"].value_counts()

        # In Bungalows, 74% of cases where we see air source heat pumps are in detached and semi-detached houses
        all_counts_agg[all_counts_agg["PROPERTY_TYPE"] == "Bungalow"]["BUILT_FORM"].value_counts(normalize=True)

        # TODO: Research options for mid and end-terrace houses
        # TODO: Research the options for flats - we see them appear in flats, but practically speaking, how does the
        #       install process work?