From 681a449187f0e93ffc2b655b25021189ce74628e Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Fri, 18 Oct 2024 15:52:49 +0100
Subject: [PATCH] preparing the data for lewes council

---
 etl/customers/newhaven/slides.py | 249 ++++++++++++++++++++++++-------
 1 file changed, 192 insertions(+), 57 deletions(-)

diff --git a/etl/customers/newhaven/slides.py b/etl/customers/newhaven/slides.py
index 61ed89cc..3c62de53 100644
--- a/etl/customers/newhaven/slides.py
+++ b/etl/customers/newhaven/slides.py
@@ -417,9 +417,14 @@ def slides():
     # Show more characters in a column
     pd.set_option('display.max_colwidth', None)
 
-    # preparing of this data for the following 2 needs:
-    # 1) dataset to share with Nextgen heating
-    # 2) Breakdown of results by property type
+
+def app():
+    """
+    preparing of this data for the following 2 needs:
+    1) dataset to share with Nextgen heating
+    2) Breakdown of results by property type
+    :return:
+    """
 
     # get the asset list
     asset_list = read_csv_from_s3(bucket_name="retrofit-plan-inputs-dev", filepath="8/90/pilot.csv")
@@ -431,6 +436,14 @@ def slides():
     )
     non_intrusive_recommendations = pd.DataFrame(non_intrusive_recommendations)
 
+    # Right now this is the second version of the nehaven portfolio
+    portfolio_id = 90
+    # Look at one scenario at a time, otherwise this is agony
+    scenario_ids = [47, 48, 49, 50, 51]
+    properties_data, plans_data, recommendations_data = get_data(portfolio_id, scenario_ids)
+    properties_df = pd.DataFrame(properties_data)
+    recommendations_df = pd.DataFrame(recommendations_data)
+
     # Unnest this
     import ast
     survey_recs = []
@@ -502,27 +515,74 @@ def slides():
 
     # We now pull out the recommendations impact by property type and sub type
 
+    # Exclude sealing open fireplaces
+    recommendations_df = recommendations_df[recommendations_df["type"] != "sealing_open_fireplace"]
+
+    # We update the type column so that if type == heating, and the description contains "air source heat pump",
+    # the type is "air_source_heat_pump", else if the description contains "high heat retention storage heaters", else
+    # if the description contains "condensing boiler, the type is updated to "boiler_upgrade"
+    recommendations_df["type"] = np.where(
+        recommendations_df["type"] == "heating",
+        np.where(
+            recommendations_df["description"].str.contains("air source heat pump"),
+            "air_source_heat_pump",
+            np.where(
+                recommendations_df["description"].str.contains("high heat retention"),
+                "high_heat_retention_storage_heaters",
+                np.where(
+                    recommendations_df["description"].str.contains("condensing boiler"),
+                    "boiler_upgrade",
+                    recommendations_df["type"]
+                )
+            )
+        ),
+        recommendations_df["type"]
+    )
+
+    recommendation_types = recommendations_df["type"].unique().tolist()
+    rename_dict = {
+        'hot_water_tank_insulation': 'Hot Water Tank Insulation',
+        'windows_glazing': 'Windows Glazing',
+        'secondary_heating': 'Secondary Heating',
+        'cavity_wall_insulation': 'Cavity Wall Insulation',
+        'flat_roof_insulation': 'Flat Roof Insulation',
+        'mechanical_ventilation': 'Mechanical Ventilation',
+        'loft_insulation': 'Loft Insulation',
+        'cylinder_thermostat': 'Cylinder Thermostat',
+        'room_roof_insulation': 'Room Roof Insulation',
+        'low_energy_lighting': 'Low Energy Lighting',
+        'external_wall_insulation': 'External Wall Insulation',
+        'heating': 'Heating',
+        'solar_pv': 'Solar PV',
+        'heating_control': 'Heating Control',
+        'solid_floor_insulation': 'Solid Floor Insulation',
+        'suspended_floor_insulation': 'Suspended Floor Insulation',
+        'internal_wall_insulation': 'Internal Wall Insulation'
+    }
+
     property_scenario_impact = []
-    for scenario_id in scenario_ids:
+    for scenario_id in tqdm(scenario_ids):
         # Get the recommendations for the scenario, default
         scenario_recommendations = recommendations_df[
             (recommendations_df["Scenario ID"] == scenario_id) &
             (recommendations_df["default"] == True)
             ].copy()
 
-        scenario_recommendations['ligting_kwh'] = scenario_recommendations.apply(
+        scenario_recommendations['Estimated Lighting kWh Savings'] = scenario_recommendations.apply(
             lambda x: x['kwh_savings'] if x['type'] == 'low_energy_lighting' else 0,
             axis=1)
-        scenario_recommendations['solar_kwh'] = scenario_recommendations.apply(
+        scenario_recommendations['Estimated Solar kWh Savings'] = scenario_recommendations.apply(
             lambda x: x['kwh_savings'] if x['type'] == 'solar_pv' else 0, axis=1)
 
         # Set 'Estimated Kwh Savings' to zero where specific kwh columns are used
-        scenario_recommendations['Estimated Kwh Savings'] = scenario_recommendations.apply(
+        scenario_recommendations['Estimated Heating Demand kWh Savings'] = scenario_recommendations.apply(
             lambda x: 0 if x['type'] in ['low_energy_lighting', 'solar_pv'] else x[
                 'kwh_savings'], axis=1)
 
         scenario_grouped_data = scenario_recommendations.groupby(['property_id']).agg({
-            'Estimated Kwh Savings': 'sum',
+            'Estimated Heating Demand kWh Savings': 'sum',
+            'Estimated Lighting kWh Savings': 'sum',
+            'Estimated Solar kWh Savings': 'sum',
             "estimated_cost": "sum"
         }).reset_index()
 
@@ -531,18 +591,52 @@ def slides():
         ].merge(
             scenario_grouped_data, on=["property_id"], how="left"
         )
-        comparison["Estimated Kwh Savings"] = comparison["Estimated Kwh Savings"].fillna(0)
+        comparison["Estimated Heating Demand kWh Savings"] = (
+            comparison["Estimated Heating Demand kWh Savings"].fillna(0)
+        )
+        comparison["Estimated Lighting kWh Savings"] = (
+            comparison["Estimated Lighting kWh Savings"].fillna(0)
+        )
+        comparison["Estimated Solar kWh Savings"] = (
+            comparison["Estimated Solar kWh Savings"].fillna(0)
+        )
         comparison["estimated_cost"] = comparison["estimated_cost"].fillna(0)
 
         comparison["post_scenario_heating_hotwater_kwh"] = (
-            comparison["current_energy_demand_heating_hotwater"] - comparison["Estimated Kwh Savings"]
+            comparison["current_energy_demand_heating_hotwater"] - comparison["Estimated Heating Demand kWh Savings"]
+        )
+
+        # For each scenario, we create a measure matrix
+        measure_matrix = scenario_recommendations.pivot_table(
+            index='property_id',
+            columns='type',
+            values='id',  # Using 'id' just as a placeholder for the pivot
+            aggfunc=lambda x: True,  # If an ID exists for a given type, mark as True
+            fill_value=False  # Fill other entries as False
+        ).reset_index()
+
+        non_zero_heat_demand_impact = comparison[
+            (comparison["Estimated Heating Demand kWh Savings"] > 0) |
+            (comparison["Estimated Lighting kWh Savings"] > 0) |
+            (comparison["Estimated Solar kWh Savings"] > 0)
+            ]
+        measure_matrix = measure_matrix[
+            measure_matrix["property_id"].isin(non_zero_heat_demand_impact["property_id"].values)
+        ]
+        measure_matrix = measure_matrix.rename(columns=rename_dict)
+
+        comparison = comparison.merge(
+            measure_matrix, on="property_id", how="left"
         )
         comparison["scenario_id"] = scenario_id
 
         property_scenario_impact.append(comparison)
 
     property_scenario_impact = pd.concat(property_scenario_impact)
-    property_scenario_impact = property_scenario_impact.drop(columns=["property_id", "Estimated Kwh Savings"])
+    # property_scenario_impact = property_scenario_impact.drop(columns=["property_id", "Estimated Kwh Savings"])
+    for v in rename_dict.values():
+        # Fill NaNs with False
+        property_scenario_impact[v] = property_scenario_impact[v].fillna(False)
 
     # Scale
     property_scenario_impact["post_scenario_heating_hotwater_kwh_scaled"] = (
@@ -600,57 +694,98 @@ def slides():
         "post_scenario_heating_hotwater_kwh_scaled"]].empty:
         raise Exception("someting went wrong")
 
-        # Reorder the columns
-        grouped_data = grouped_data[
-            [
-                'property_type',
-                'property_sub_type',
-                'scenario',
-                'estimated_heating_hotwater_kwh',
-                'post_scenario_heating_hotwater_kwh',
-                'estimated_heating_hotwater_kwh_scaled',
-                'post_scenario_heating_hotwater_kwh_scaled',
-                'estimated_cost',
-            ]
+    # Reorder the columns
+    grouped_data = grouped_data[
+        [
+            'property_type',
+            'property_sub_type',
+            'scenario',
+            'estimated_heating_hotwater_kwh',
+            'post_scenario_heating_hotwater_kwh',
+            'estimated_heating_hotwater_kwh_scaled',
+            'post_scenario_heating_hotwater_kwh_scaled',
+            'estimated_cost',
+        ]
+    ]
+
+    grouped_data = grouped_data.rename(
+        columns={
+            "property_type": "Property Type",
+            "property_sub_type": "Property Sub Type",
+            "scenario": "Scenario",
+            "estimated_heating_hotwater_kwh": "Estimated Heating & Hot Water kwh",
+            "post_scenario_heating_hotwater_kwh": "Post Scenario Heating & Hot Water kwh",
+            "estimated_heating_hotwater_kwh_scaled": "Estimated Heating & Hot Water kwh (scaled)",
+            "post_scenario_heating_hotwater_kwh_scaled": "Post Scenario Heating & Hot Water kwh (scaled)",
+            "estimated_cost": "Estimated Cost or Retrofit",
+        }
+    )
+
+    # grouped_data.to_excel(
+    #     "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Newhaven/outputs/Scenario kWh Impact by Property "
+    #     "Type.xlsx",
+    #     index=False
+    # )
+
+    property_scenario_impact = property_scenario_impact.merge(
+        scenario_names, how="left", on="scenario_id"
+    )
+
+    property_scenario_impact = property_scenario_impact.sort_values(
+        ["postcode", "uprn", "scenario_id"], ascending=True
+    )
+
+    lewes_data = next_gen_dataset.merge(
+        property_scenario_impact, how="left", on="uprn"
+    )
+
+    # Rearrange,  rename columns and drop what we don't need
+    # TODO - remap the heating type
+    lewes_data = lewes_data[
+        [
+            'uprn', 'address', 'postcode', 'property_type', 'built_form', 'estimated_heating_hotwater_kwh',
+            'primary_fuel_type', 'gross_floor_area', 'floor_height', 'number_of_floors', 'ashp_suitable',
+            'ashp_size_kw',
+            'ashp_cost', 'solar_suitable', 'solar_size_kwp', 'solar_cost', 'estimated_heating_hotwater_kwh_scaled',
+            # 'property_id',  - dropped
+            'current_energy_demand_heating_hotwater', 'Estimated Heating Demand kWh Savings',
+            'Estimated Lighting kWh Savings', 'Estimated Solar kWh Savings', 'estimated_cost',
+            'post_scenario_heating_hotwater_kwh', 'Cavity Wall Insulation', 'Cylinder Thermostat',
+            'Flat Roof Insulation',
+            'Hot Water Tank Insulation', 'Loft Insulation', 'Mechanical Ventilation', 'Room Roof Insulation',
+            # 'scenario_id', - dropped
+            'Low Energy Lighting', 'Secondary Heating', 'Windows Glazing', 'External Wall Insulation',
+            'Heating',
+            'Heating Control',
+            'Solar PV',
+            'Internal Wall Insulation',
+            'Solid Floor Insulation',
+            'Suspended Floor Insulation',
+            'post_scenario_heating_hotwater_kwh_scaled',
+            'scenario'
         ]
 
-        grouped_data = grouped_data.rename(
-            columns={
-                "property_type": "Property Type",
-                "property_sub_type": "Property Sub Type",
-                "scenario": "Scenario",
-                "estimated_heating_hotwater_kwh": "Estimated Heating & Hot Water kwh",
-                "post_scenario_heating_hotwater_kwh": "Post Scenario Heating & Hot Water kwh",
-                "estimated_heating_hotwater_kwh_scaled": "Estimated Heating & Hot Water kwh (scaled)",
-                "post_scenario_heating_hotwater_kwh_scaled": "Post Scenario Heating & Hot Water kwh (scaled)",
-                "estimated_cost": "Estimated Cost or Retrofit",
-            }
-        )
+    ]
 
-        grouped_data.to_excel(
-            "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Newhaven/outputs/Scenario kWh Impact by Property "
-            "Type.xlsx",
-            index=False
-        )
+    # We save this dataset, which will be shared with Lewes Council
+    lewes_data.to_csv(
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Newhaven/outputs/property data.csv", index=False
+    )
 
-        property_scenario_impact = property_scenario_impact.merge(
-            scenario_names, how="left", on="scenario_id"
-        )
+    df_pivot = property_scenario_impact.pivot_table(index='uprn', columns='scenario',
+                                                    values=['post_scenario_heating_hotwater_kwh',
+                                                            'post_scenario_heating_hotwater_kwh_scaled'])
 
-        df_pivot = property_scenario_impact.pivot_table(index='uprn', columns='scenario',
-                                                        values=['post_scenario_heating_hotwater_kwh',
-                                                                'post_scenario_heating_hotwater_kwh_scaled'])
+    # Flattening multi-index columns
+    df_pivot.columns = [f'{col[0]}_{col[1]}' for col in df_pivot.columns]
 
-        # Flattening multi-index columns
-        df_pivot.columns = [f'{col[0]}_{col[1]}' for col in df_pivot.columns]
+    # Reset the index to have a clean dataframe
+    df_pivot.reset_index(inplace=True)
 
-        # Reset the index to have a clean dataframe
-        df_pivot.reset_index(inplace=True)
+    next_gen_dataset = next_gen_dataset.merge(
+        df_pivot, how="left", on="uprn"
+    )
 
-        next_gen_dataset = next_gen_dataset.merge(
-            df_pivot, how="left", on="uprn"
-        )
-
-        next_gen_dataset.to_csv(
-            "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Newhaven/outputs/next_gen_dataset.csv", index=False
-        )
+    next_gen_dataset.to_csv(
+        "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Newhaven/outputs/next_gen_dataset.csv", index=False
+    )