import pandas as pd
from tqdm import tqdm
import os

from model_data.temp_inputs import input_data
from model_data.Property import Property
from model_data.config import EPC_AUTH_TOKEN
from epc_api.client import EpcClient
from model_data.downloader import pagenated_epc_download
from model_data.EpcClean import EpcClean
from model_data.OpenUprnClient import OpenUprnClient


def handler():
    # To begin with, the input data is a list of dictionaries, however we would read this file in

    epc_client = EpcClient(auth_token=EPC_AUTH_TOKEN)

    input_properties = [
        Property(postcode=config['postcode'], address1=config['address1'], epc_client=epc_client)
        for config in input_data
    ]

    for p in input_properties:
        p.search_address_epc()

    uprns = [p.data['uprn'] for p in input_properties]

    open_uprn_client = OpenUprnClient(
        path=os.path.abspath(
            os.path.dirname(__file__)
        ) + "/model_data/local_data/osopenuprn_202306_csv/osopenuprn_202305.csv",
        uprns=uprns
    )
    open_uprn_client.read()

    for p in input_properties:
        p.get_coordinates(open_uprn_client)

    local_authorities = {p.data['local-authority'] for p in input_properties}

    data = []
    for la in tqdm(local_authorities):
        data.extend(
            pagenated_epc_download(
                client=epc_client,
                params={"local-authority": la},
                page_size=5000,
                n_pages=10,
            )
        )

    cleaner = EpcClean(data)

    cleaner.clean()

    import pickle
    with open(os.path.abspath(os.path.dirname(__file__)) + "/data.pkl", "rb") as f:
        data = pickle.load(f)

    postcodes = [x["postcode"].upper() for x in data]
    address_meta = [
        {
            "postcode": x["postcode"].upper(),
            "address1": x["address1"].upper(),
            "address2": x["address2"].upper(),
            "address3": x["address3"].upper(),
            "address": x["address"],
            "uprn": x["uprn"]
        } for x in data
    ]

    # For testing:
    # from model_data.epc_attributes.HotWaterAttributes import HotWaterAttributes
    # from collections import Counter
    # count = Counter([x["main-fuel"] for x in data])
    # descriptions = {x["hotwater-description"] for x in data}
    # out = []
    # for description in descriptions:
    #     res = HotWaterAttributes(description).process()
    #     out.append(
    #         {
    #             "original_description": description,
    #             **res
    #         }
    #     )
    # df = pd.DataFrame(out)
    # df = df.sort_values("original_description")
    # df = df.reset_index(drop=True)
    #
    # import numpy as np
    # idx = 1
    # record = df[df.index == idx].to_dict("records")[0]
    # record = {k: v for k, v in record.items() if v not in [None, np.nan]}
    # from pprint import pprint
    # pprint(record)
    #
    # # Issues:
    # # 1) '2207 Time and temperature zone control' - we don't pick up any reference to the fact this is a time and
    # # temperature zone control
    # #     and we only pick up temperature zone control at the moment. Can we capture this too
    # # 2) 'Charging system linked to use of community heating, programmer and at least two room stats' - what are room
    # # stats and how should
    # #     we capture this?
    #
    # df.to_dict("records")

    # Land registry
    from model_data.LandRegistryClient import LandRegistryClient

    land_registry_client = LandRegistryClient(
        paths=[
            os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-monthly-update-new-version.csv",
            os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2022 (1).csv",
            os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2021.csv",
            os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2020.csv",
            os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2019.csv",
            os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2018.csv",
            os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2017-part1.csv",
            os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-2017-part2.csv",
        ],
        addresses=address_meta
    )

    from dbfread import DBF

    borehole_file = os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/borehole/borehole.dbf"
    table = DBF(borehole_file)
    borehole_data = [x for x in tqdm(table, total=len(table))]

    # There are ~1.4 million entries in this dataset and so we firstly want to reduce the number of
    # entries in here if possible before we produce any form of comparison between our properties, to infer
    # the distance from the property to the nearest borehole

    # Let's take a sample
    borehole_sample = borehole_data[:1000]
    df = pd.DataFrame(borehole_sample)

    # for each property, find the nearest borehole

    # Data dictionary: This description is based on the information presented in the following
    # Geological articles:
    # https://nora.nerc.ac.uk/id/eprint/509366/1/IR04115.pdf
    # https://shop.bgs.ac.uk/Resources/Shop/doc/info/Borehole_Abbreviations.pdf?_ga=2.246788941.895115819.1686912089
    # -542796874.1686912089
    # https://core.ac.uk/download/63732.pdf
    #
    #
    # QS - Borehole identifier information
    # NUMB - Borehole identifier information
    # BSUFF - Borehole identifier information
    # REGNO
    # RT - Borehole identifier information
    # GRID_REFER
    # EASTING - British National Grid coordinates
    # NORTHING - British National Grid coordinates
    # X - British National Grid coordinates - same as EASTING but has a float typing
    # Y - British National Grid coordinates - same as NORTHING but has a float typing
    # CONFIDENTI
    # STRTHEIGHT
    # NAME
    # LENGTH
    # BGS_ID
    # DATE_KNOWN
    # DATE_K_TYP
    # DATE_ENTER
    # AGS_LOG_UR