from tqdm import tqdm
import os

from model_data.config import EPC_AUTH_TOKEN
from epc_api.client import EpcClient
from model_data.downloader import pagenated_epc_download
from model_data.EpcClean import EpcClean
from model_data.analysis.UvalueEstimations import UvalueEstimations
from model_data.analysis.SapModel import SapModel

LAND_REGISTRY_PATHS = [
    os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-monthly-update-new-version.csv",
    os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2022 (1).csv",
    os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2021.csv",
    os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2020.csv",
    os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2019.csv",
    os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2018.csv",
    os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2017-part1.csv",
    os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2017-part2.csv",
]


def app():
    """
    For a pre-defined list of constituencies and property types, we'll download EPC data from the API
    and produce a dataset of cleaned fields so that when we get new properties, we can quickly
    sanitise any description data
    :return:
    """

    epc_client = EpcClient(auth_token=EPC_AUTH_TOKEN)

    constituencies = {'E14000555', 'E14000726', 'E14000720', 'E14000721', 'E14000553', 'E14000752'}
    property_types = ["bungalow", "flat", "house", "maisonette", "park home"]
    floor_areas = ["unknown", "s", "m", "l", "xl", "xxl", "xxxl"]

    # We pull properties from local authorities, by property type. This will allow us to build
    # a dataset of up to 10k properties per local authority/property type combination
    # For particularly old EPC data, we have inconsistent records so we'll only include EPCS that were
    # conducted after 2010, since SAP09 was introduced in 2009 an later SAP12 was introduced in England
    # and Wales from 31 July 2014
    # Download data from August 2014 onwards
    data = []
    for c in tqdm(constituencies):
        for pt in property_types:
            for fa in floor_areas:
                data.extend(
                    pagenated_epc_download(
                        client=epc_client,
                        params={
                            "constituency": c,
                            "property-type": pt,
                            "from-month": 8,
                            "from-year": 2014,
                            "floor-area": fa,
                        },
                        page_size=5000,
                        n_pages=10,
                    )
                )

    # Production of sample data for land registry
    # address_meta = [
    #     {
    #         "postcode": x["postcode"].upper(),
    #         "address1": x["address1"].upper(),
    #         "address2": x["address2"].upper(),
    #         "address3": x["address3"].upper(),
    #         "address": x["address"],
    #         "uprn": x["uprn"]
    #     } for x in data
    # ]
    #
    # import pickle
    # with open("sample_addresses.pkl", "wb") as f:
    #     pickle.dump(address_meta, f)

    # Incorporate input data into cleaning
    cleaner = EpcClean(data)
    lighting_averages = cleaner.lighting_averages
    # TODO: WE need to store lighting_averages to a db
    #       We should also extend these averages so they're by more variables (property type, age band, constituency,
    #       etc)
    cleaner.clean()
    # TODO: cleaner.cleaned datasets to a db

    # TODO: Add property age band into this
    uvalue_estimates = UvalueEstimations(data=data)
    uvalue_estimates.get_estimates(cleaner=cleaner)
    # TODO: Store these to a db

    sap_model = SapModel(data=data, cleaner=cleaner)
    sap_model.run()
    # TODO: Store outputs to db