import pandas as pd
import numpy as np
from recommendations.Costs import MCS_SOLAR_PV_COST_DATA
from backend.ml_models.AnnualBillSavings import AnnualBillSavings
import requests
from functools import lru_cache
import time
from backend.app.db.functions.solar_functions import get_solar_data, store_batch_data
from utils.logger import setup_logger
from sklearn.preprocessing import MinMaxScaler
from recommendations.Costs import Costs
from math import sin, cos, sqrt, atan2, radians

logger = setup_logger()


class GoogleSolarApi:
    NORTH_FACING_AZIMUTH_RANGE = (-30, 30)

    # These are variables, described in the documentation for cost analysis for non-us locations, seen here
    # https://developers.google.com/maps/documentation/solar/calculate-costs-non-us
    # We use the default figures that the API uses for US locations

    # The factor by which the cost of electricity increases annually. The Solar API uses 1.022 (2.2% annual increase)
    # for US locations.
    cost_increase_factor = 1.022

    # The efficiency at which an inverter converts the DC electricity that is produced by the solar panels to the AC
    # electricity that is used in a household. The Solar API uses 85% for US locations. We use 0.95.5 which is the
    # middle value of the 93-98% range, cited by Sunsave:
    # https://www.sunsave.energy/solar-panels-advice/system-size/inverters
    dc_to_ac_rate = 0.955

    # The Solar API uses 1.04 (4% annual increase) for US locations
    discount_rate = 1.04

    # How much the efficiency of the solar panels declines each year. The Solar API uses 0.995 (0.5% annual decrease)
    # for US locations
    efficiency_depreciation_factor = 0.995

    # The expected lifespan of the solar installation. The Solar API uses 20 years. Adjust this value as needed for
    # your area
    installation_life_span = 20

    MIN_UNIT_PANELS = 4  # Minimum number of panels we allow for a domestic building
    MIN_BUILDING_PANELS = 10  # Minimum number of panels we allow for a block of flats

    def __init__(self, api_key, max_retries=5):
        """
        Initialize the GoogleSolarApi class with the provided API key and maximum retries.

        :param api_key: The API key to authenticate requests to the Google Solar API.
        :param max_retries: The maximum number of retries for the API request (default is 5).
        """
        self.api_key = api_key
        self.max_retries = max_retries
        self.base_url = "https://solar.googleapis.com/v1"

        self.insights_data = None
        self.roof_segments = []

        # property attributes:
        self.floor_area = None
        self.roof_area = None
        self.roof_segment_indexes = None
        self.panel_area = None
        self.panel_wattage = None
        self.panel_performance = None

        # Indicates if we need to store the data to the db
        self.need_to_store = False

        # Indicates if we think we have both units attached to a semi-detached property
        self.double_property = False

    def get_building_insights(self, longitude, latitude, required_quality="MEDIUM", max_retries=None):
        """
        Make an API request to retrieve building insights based on the given longitude and latitude, with retry
        mechanism.

        :param longitude: The longitude of the location.
        :param latitude: The latitude of the location.
        :param required_quality: The required quality of the data (default is "MEDIUM").
        :param max_retries: The maximum number of retries for the API request (default is None, which uses the
        instance's max_retries).
        :return: The JSON response containing the building insights data.
        """
        if max_retries is None:
            max_retries = self.max_retries

        insights_url = f"{self.base_url}/buildingInsights:findClosest"
        params = {
            'location.latitude': f'{latitude:.5f}',
            'location.longitude': f'{longitude:.5f}',
            'requiredQuality': required_quality,
            'key': self.api_key
        }

        attempt = 0
        while attempt < max_retries:
            try:
                response = requests.get(insights_url, params=params)
                response.raise_for_status()  # Raise an error for bad status codes
                return response.json()
            except requests.exceptions.RequestException as e:
                attempt += 1
                print(f"Attempt {attempt} failed: {e}")
                time.sleep(2 ** attempt)  # Exponential backoff
                if attempt >= max_retries:
                    raise

    @lru_cache(maxsize=128)
    def get(
        self,
        longitude,
        latitude,
        energy_consumption,
        property_instance=None,
        required_quality="MEDIUM",
        is_building=False,
        session=None,
        uprn=None,
    ):
        """
        Wrapper function that calls get_building_insights and extracts roof segments, with caching.

        :param longitude: The longitude of the location.
        :param latitude: The latitude of the location.
        :param energy_consumption: The energy consumption of the building/unit associated to the longitude and latitude,
                                    that we wish to size the solar panels up against
        :param property_instance: The property instance associated to the longitude and latitude.
        :param required_quality: The required quality of the data (default is "MEDIUM").
        :param is_building: Whether the energy consumption is for a building or a unit.
        :param session: The database session to use for the query (default is None).
        :param uprn: The unique property reference number (default is None).
        :return: The JSON response containing the building insights data.
        """

        is_outdated = False
        if session is not None:
            # Check if the data is already in the database
            self.insights_data, _, is_outdated = get_solar_data(
                session, longitude=longitude, latitude=latitude, uprn=uprn
            )

        # If we have no data in the db, or updated_at is more than 6 months
        if self.insights_data is None or is_outdated:
            self.insights_data = self.get_building_insights(longitude, latitude, required_quality)
            self.need_to_store = True

        # Extract key data from the insights response
        self.roof_segments = self.insights_data["solarPotential"].get('roofSegmentStats', [])
        # Automatically exclude north-facing segments
        self.exclude_north_facing_segments(property_instance=property_instance)
        # If a property is semi-detached, it's possible for us to include segments from an attached unit
        if (property_instance.data["built-form"] == "Semi-Detached") and (
            property_instance.data["extension-count"] == 0
        ):
            self.exclude_likely_duplicate_surfaces()

        self.roof_area = self.insights_data["solarPotential"]["wholeRoofStats"]['areaMeters2']
        self.floor_area = self.insights_data["solarPotential"]["wholeRoofStats"]['groundAreaMeters2']
        self.panel_area = (
            self.insights_data["solarPotential"]["panelHeightMeters"] *
            self.insights_data["solarPotential"]["panelWidthMeters"]
        )
        self.panel_wattage = self.insights_data["solarPotential"]["panelCapacityWatts"]
        if self.panel_wattage != 400:
            # In the API documentation, it claims that the default output is 250W, however we've only seen 400W, so if
            # we get anything other than 400W, we'll need to adjust the calculations in the output. For this, we should
            # refer to https://developers.google.com/maps/documentation/solar/calculate-costs-non-us
            # Where the documentation explains how to adjust the yearlyEnergyDcKwh figures.
            # It should be straightforward, but I'd rather see an actual instance of this happening
            raise NotImplementedError("Panel wattage is not 400W - implement me")

        self.roof_segment_indexes = [segment['segmentIndex'] for segment in self.roof_segments]

        # We now start finding the solar panel configurations
        self.optimise_solar_configuration(
            energy_consumption=energy_consumption, is_building=is_building, property_instance=property_instance
        )

        # Finally, if we have a double property, we half the data we stored area
        if self.double_property:
            self.roof_area = self.roof_area / 2
            self.floor_area = self.floor_area / 2

    def save_to_db(self, session, uprns_to_location, scenario_type):
        if self.insights_data is None:
            raise ValueError("No api data to store")

        if scenario_type not in ["unit", "building"]:
            raise Exception("Invalid scenario type. Must be either 'unit' or 'building'")

        if not self.need_to_store:
            return

        scenarios_data = self.panel_performance.head(1)[
            [
                "n_panels",
                "yearly_dc_energy",
                "total_cost",
                "panneled_roof_area",
                "array_wattage",
                "initial_ac_kwh_per_year",
                "lifetime_ac_kwh",
                "roi",
                "expected_payback_years",
                "lifetime_dc_kwh"
            ]
        ].rename(
            columns={
                "n_panels": "number_panels",
                "yearly_dc_energy": "yearly_dc_kwh",
                "total_cost": "cost",
                "panneled_roof_area": "panelled_roof_area",
                "array_wattage": "array_kwhp",
                "initial_ac_kwh_per_year": "yearly_ac_kwh",
            }
        )
        scenarios_data["is_default"] = True
        scenarios_data["scenario_type"] = scenario_type
        scenarios_data = scenarios_data.to_dict(orient="records")

        store_batch_data(
            session=session,
            api_data=self.insights_data,
            uprns_to_location=uprns_to_location,
            scenarios_data=scenarios_data
        )

    @staticmethod
    def lifetime_production_kwh(
        row,
        efficiency_depreciation_factor,
        installation_life_span,
        column_name="initial_ac_kwh_per_year"
    ):
        """
        Mimics the function described in the Google Solar API documentation, presenting the lifetime production
        AC KWH as a geometric sum
        """

        return (
            row[column_name] *
            (1 - pow(
                efficiency_depreciation_factor,
                installation_life_span)) /
            (1 - efficiency_depreciation_factor))

    def optimise_solar_configuration(self, energy_consumption, is_building=False, property_instance=None):
        """
        Optimise the solar panel configuration for the building.
        :return:
        """
        # If we look at the building level, we don't include any projects fewer than 10 panels, otherwise the
        # minimum is 4
        min_panels = self.MIN_BUILDING_PANELS if is_building else self.MIN_UNIT_PANELS

        cost_instance = Costs(property_instance=property_instance) if property_instance is not None else None

        # Remove any north facing roof segments
        panel_performance = []
        for config in self.insights_data["solarPotential"].get("solarPanelConfigs", []):
            roof_segment_summaries = config["roofSegmentSummaries"]
            # Filter on just the segments in self.roof_segment_indexes
            roof_segment_summaries = [
                segment for segment in roof_segment_summaries if segment["segmentIndex"] in self.roof_segment_indexes
            ]

            roi_summary = []
            for segment in roof_segment_summaries:

                if segment["panelsCount"] < min_panels:
                    continue

                wattage = segment["panelsCount"] * self.insights_data["solarPotential"]["panelCapacityWatts"]
                generated_dc_energy = segment["yearlyEnergyDcKwh"]
                ratio = generated_dc_energy / wattage

                if cost_instance is None:
                    cost = MCS_SOLAR_PV_COST_DATA["average_cost_per_kwh"] * (wattage / 1000)
                else:
                    cost = cost_instance.solar_pv(
                        n_panels=segment["panelsCount"],
                        has_battery=False,
                        n_floors=property_instance.number_of_floors,
                    )["total"]

                roi_summary.append(
                    {
                        "segmentIndex": segment["segmentIndex"],
                        "wattage": wattage,
                        "generated_dc_energy": generated_dc_energy,
                        "ratio": ratio,
                        "n_panels": segment["panelsCount"],
                        "cost": cost,
                        "panneled_roof_area": self.panel_area * int(segment["panelsCount"])
                    }
                )

            roi_summary = pd.DataFrame(roi_summary)
            if roi_summary.empty:
                continue

            weighted_ratio = np.average(
                roi_summary["ratio"].values, weights=roi_summary["generated_dc_energy"].values
            )
            total_cost = roi_summary["cost"].sum()
            yearly_dc_energy = roi_summary["generated_dc_energy"].sum()

            panel_performance.append(
                {
                    "n_panels": roi_summary["n_panels"].sum(),
                    "yearly_dc_energy": yearly_dc_energy,
                    "total_cost": total_cost,
                    "weighted_ratio": weighted_ratio,
                    "panneled_roof_area": roi_summary["panneled_roof_area"].sum(),
                    "array_wattage": roi_summary["n_panels"].sum() * self.panel_wattage
                }
            )

        panel_performance = pd.DataFrame(panel_performance)

        if panel_performance.empty:
            self.panel_performance = pd.DataFrame(
                columns=[
                    "n_panels",
                    "yearly_dc_energy",
                    "total_cost",
                    "panneled_roof_area",
                    "array_wattage",
                    "initial_ac_kwh_per_year",
                    "lifetime_ac_kwh",
                    "roi",
                    "expected_payback_years",
                    "lifetime_dc_kwh"
                ]
            )
            return

            # We can have duplicate configurations

        panel_performance = panel_performance.drop_duplicates()

        if panel_performance.empty:
            self.panel_performance = pd.DataFrame(
                columns=[
                    "n_panels",
                    "yearly_dc_energy",
                    "total_cost",
                    "panneled_roof_area",
                    "array_wattage",
                    "initial_ac_kwh_per_year",
                    "lifetime_ac_kwh",
                    "roi",
                    "expected_payback_years",
                    "lifetime_dc_kwh"
                ]
            )
            return

        panel_performance["initial_ac_kwh_per_year"] = panel_performance["yearly_dc_energy"] * self.dc_to_ac_rate

        # Remove anything where the total ac energy is less than half of the array wattage
        panel_performance = panel_performance[
            (panel_performance["initial_ac_kwh_per_year"] / panel_performance["array_wattage"]) >= 0.5
            ]

        # 2) Calculate the liftime solar energy production
        panel_performance['lifetime_ac_kwh'] = panel_performance.apply(
            self.lifetime_production_kwh,
            axis=1,
            efficiency_depreciation_factor=self.efficiency_depreciation_factor,
            installation_life_span=self.installation_life_span,
            column_name="initial_ac_kwh_per_year"
        )

        panel_performance['lifetime_dc_kwh'] = panel_performance.apply(
            self.lifetime_production_kwh,
            axis=1,
            efficiency_depreciation_factor=self.efficiency_depreciation_factor,
            installation_life_span=self.installation_life_span,
            column_name="yearly_dc_energy",
        )

        # Now that we know the lifetime cnsumption of ac kwh, we can estimate the roi
        # Key things we estimate:
        # - generation_value: this is the gbp value of the electricity generated
        # - roi: the return on investment, calcualated as generation_value / total_cost
        # - surplus: this is the amount of additional energy generated, and therefore how much will be exported
        # - surplus_value: the value of the surplus energy - this feeds into generation_value, when relevant
        # - expected_payback_years: the number of years it will take to pay back the initial investment

        # If we have a double property (i.e. the solar api has returned data for two units) we size up the solar panels
        # for double the consumption, as if for two units.
        if self.double_property:
            lifetime_energy_consumption = energy_consumption * 2 * self.installation_life_span
        else:
            lifetime_energy_consumption = energy_consumption * self.installation_life_span
        roi_results = []
        for _, panel_config in panel_performance.iterrows():
            lifetime_ac_kwh = panel_config["lifetime_ac_kwh"]

            surplus = 0
            generation_deficit = 0
            if lifetime_ac_kwh < lifetime_energy_consumption:
                # We estimate the amount of electricity generated, based on the price cap
                generation_value = lifetime_ac_kwh * AnnualBillSavings.ELECTRICITY_PRICE_CAP
                roi = generation_value / panel_config["total_cost"]
                generation_deficit = lifetime_energy_consumption - lifetime_ac_kwh
            else:

                # We now have a surplus of energy, which we can sell back to the grid
                surplus = lifetime_ac_kwh - lifetime_energy_consumption
                surplus_value = surplus * AnnualBillSavings.ELECTRICITY_EXPORT_PAYMENT
                generation_value = lifetime_energy_consumption * AnnualBillSavings.ELECTRICITY_PRICE_CAP
                roi = (generation_value + surplus_value) / panel_config["total_cost"]

            # Calculate expected payback years
            if generation_value > 0:
                expected_payback_years = panel_config["total_cost"] / (
                    generation_value / self.installation_life_span)
            else:
                expected_payback_years = None  # or some high value indicating no payback

            # Generation deficit tells us how much more energy we need to meet the generation demand.
            roi_results.append(
                {
                    "n_panels": panel_config["n_panels"],
                    "roi": roi,
                    "generation_value": generation_value,
                    "generation_deficit": generation_deficit,
                    "expected_payback_years": expected_payback_years,
                    "surplus": surplus
                }
            )

        roi_results = pd.DataFrame(roi_results)

        panel_performance = panel_performance.merge(
            roi_results, how="left", on="n_panels"
        )

        # We want max roi, minimal generation deficit, and max generation value - we create a ranking score
        # Assign equal weights to each metric
        weights = {'roi': 0.6, 'generation_value': 0.2, 'generation_deficit': 0.2}
        metrics = panel_performance[['roi', 'generation_value', 'generation_deficit']]

        # Normalize the columns (0 to 1 scale)
        scaler = MinMaxScaler()
        normalized_metrics = scaler.fit_transform(metrics)

        # Convert normalized metrics back to a dataframe
        normalized_metrics_df = pd.DataFrame(
            normalized_metrics, columns=['roi', 'generation_value', 'generation_deficit']
        )
        normalized_metrics_df['combined_score'] = (
            normalized_metrics_df['roi'] * weights['roi'] +
            normalized_metrics_df['generation_value'] * weights['generation_value'] +
            (1 - normalized_metrics_df['generation_deficit']) * weights['generation_deficit']
        )

        panel_performance['combined_score'] = normalized_metrics_df['combined_score'].values
        panel_performance['rank'] = panel_performance['combined_score'].rank(ascending=False)
        panel_performance = panel_performance.sort_values(by='rank')

        panel_performance["expected_payback_years"] = np.ceil(panel_performance["expected_payback_years"]).astype(int)

        if self.double_property:
            # Now that we've optimise to an energy consumption that is double the original, we need to half the
            # results
            panel_performance["n_panels_halved"] = panel_performance["n_panels"] / 2
            n_panels_required = {int(x) for x in np.floor(panel_performance["n_panels"] / 2)}
            # We filter the data on this number of panels
            panel_performance = panel_performance[panel_performance["n_panels_halved"].isin(n_panels_required)]
            # We half the generation values
            for col in [
                "yearly_dc_energy",
                "total_cost",
                "panneled_roof_area",
                "array_wattage",
                "initial_ac_kwh_per_year",
                "lifetime_ac_kwh",
                "lifetime_dc_kwh",
                "generation_value",
                "generation_deficit",
                "surplus"
            ]:
                panel_performance[col] = panel_performance[col] / 2

            panel_performance["n_panels"] = panel_performance["n_panels_halved"]
            panel_performance = panel_performance.drop(columns=["n_panels_halved"])
            panel_performance = panel_performance[panel_performance["n_panels"] >= min_panels]

        self.panel_performance = panel_performance

    def exclude_north_facing_segments(self, property_instance):
        """
        Filter out any north-facing roof segments from the roof_segments attribute.

        North-facing segments are defined as those with an azimuth between -30 and 30 degrees.
        """

        filtered_segments = []
        for segment_index, segment in enumerate(self.roof_segments):
            segment["segmentIndex"] = segment_index
            # Check if the segment is north-facing
            if (
                self.NORTH_FACING_AZIMUTH_RANGE[0] <= segment['azimuthDegrees'] <= self.NORTH_FACING_AZIMUTH_RANGE[1]
            ) and not property_instance.roof["is_flat"]:
                continue

            filtered_segments.append(segment)

        self.roof_segments = filtered_segments

    @staticmethod
    def haversine(lat1, lon1, lat2, lon2):
        """
        Calculate the great-circle distance between two points on the Earth
        given their latitude and longitude in decimal degrees. Using haversine formula.
        """
        R = 6373.0  # approximate radius of earth in km

        lat1 = radians(lat1)
        lon1 = radians(lon1)
        lat2 = radians(lat2)
        lon2 = radians(lon2)

        dlon = lon2 - lon1
        dlat = lat2 - lat1

        a = sin(dlat / 2) ** 2 + cos(lat1) * cos(lat2) * sin(dlon / 2) ** 2
        c = 2 * atan2(sqrt(a), sqrt(1 - a))

        distance = R * c
        return distance

    def exclude_likely_duplicate_surfaces(self):
        """
        By checking the azimuth of the segments, we can exclude any segments that are likely to be duplicates
        :return:
        """

        def is_similar(segment1, segment2, azimuth_tol=20):
            azimuth_diff = abs(segment1['azimuthDegrees'] - segment2['azimuthDegrees'])
            return azimuth_diff <= azimuth_tol

        property_center = self.insights_data["center"]

        deduped_segments = []
        dropped_segments = []
        for segment in self.roof_segments:
            if not deduped_segments:
                deduped_segments.append(segment)
                continue

            similar_segments = [s for s in deduped_segments if is_similar(segment, s)]
            if not similar_segments:
                deduped_segments.append(segment)
            else:
                # Compare distances to the property center and keep the closer segment
                for similar_segment in similar_segments:
                    current_dist = self.haversine(
                        property_center['latitude'], property_center['longitude'],
                        segment['center']['latitude'], segment['center']['longitude']
                    )
                    similar_dist = self.haversine(
                        property_center['latitude'], property_center['longitude'],
                        similar_segment['center']['latitude'], similar_segment['center']['longitude']
                    )

                    if current_dist < similar_dist:
                        deduped_segments.remove(similar_segment)
                        deduped_segments.append(segment)
                        dropped_segments.append(similar_segment)
                    else:
                        dropped_segments.append(segment)

        # If we have a semi-detached property that has duplicated segments, we should expect to half the number of
        # segments
        if len(deduped_segments) < len(self.roof_segments):
            if len(deduped_segments) != len(self.roof_segments) / 2:
                # We don't perform any dropping in this case
                return

            # Because the segments are duplicated, but the sizes aren't necessarily split perfectly in half, what
            # we need to do is perform the solar analysis and then half the results. We set an indicator which
            # implies we should do this
            self.double_property = True

    @classmethod
    def default_panel_performance(cls, property_instance):
        """
        In a small number of cases, where properties have simulated uprns, we do not have a longitude and latitude
        value and therefore we just return a default panel performance
        :param property_instance:
        :return:
        """

        cost_instance = Costs(property_instance=property_instance)

        # We return a 2.4 and 4 kwp system
        panel_performance = pd.DataFrame(
            [
                {
                    'n_panels': 10,
                    'yearly_dc_energy': 4000 * 0.99,  # Assumed 99% efficient wattage -> dc
                    'total_cost': cost_instance.solar_pv(
                        n_panels=10, has_battery=False, n_floors=property_instance.number_of_floors
                    )["total"],
                    'weighted_ratio': None,
                    'panneled_roof_area': 10 * 1.8,
                    'array_wattage': 4000,
                    'initial_ac_kwh_per_year': 4000 * 0.95,  # Assumed 95% efficient wattage -> ac
                    'lifetime_ac_kwh': None,
                    'lifetime_dc_kwh': None,
                    'roi': None,
                    'generation_value': None,
                    'generation_deficit': None,
                    'expected_payback_years': None,
                    'surplus': None,
                    'combined_score': None,
                    'rank': None
                },
                {
                    'n_panels': 6,
                    'yearly_dc_energy': 2400 * 0.99,  # Assumed 99% efficient wattage -> dc
                    'total_cost': cost_instance.solar_pv(
                        n_panels=6, has_battery=False, n_floors=property_instance.number_of_floors
                    )["total"],
                    'weighted_ratio': None,
                    'panneled_roof_area': 6 * 1.8,
                    'array_wattage': 2400,
                    'initial_ac_kwh_per_year': 2400 * 0.95,  # Assumed 95% efficient wattage -> ac
                    'lifetime_ac_kwh': None,
                    'lifetime_dc_kwh': None,
                    'roi': None,
                    'generation_value': None,
                    'generation_deficit': None,
                    'expected_payback_years': None,
                    'surplus': None,
                    'combined_score': None,
                    'rank': None
                },
            ]
        )
        return panel_performance