From 681a449187f0e93ffc2b655b25021189ce74628e Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Fri, 18 Oct 2024 15:52:49 +0100 Subject: [PATCH] preparing the data for lewes council --- etl/customers/newhaven/slides.py | 249 ++++++++++++++++++++++++------- 1 file changed, 192 insertions(+), 57 deletions(-) diff --git a/etl/customers/newhaven/slides.py b/etl/customers/newhaven/slides.py index 61ed89cc..3c62de53 100644 --- a/etl/customers/newhaven/slides.py +++ b/etl/customers/newhaven/slides.py @@ -417,9 +417,14 @@ def slides(): # Show more characters in a column pd.set_option('display.max_colwidth', None) - # preparing of this data for the following 2 needs: - # 1) dataset to share with Nextgen heating - # 2) Breakdown of results by property type + +def app(): + """ + preparing of this data for the following 2 needs: + 1) dataset to share with Nextgen heating + 2) Breakdown of results by property type + :return: + """ # get the asset list asset_list = read_csv_from_s3(bucket_name="retrofit-plan-inputs-dev", filepath="8/90/pilot.csv") @@ -431,6 +436,14 @@ def slides(): ) non_intrusive_recommendations = pd.DataFrame(non_intrusive_recommendations) + # Right now this is the second version of the nehaven portfolio + portfolio_id = 90 + # Look at one scenario at a time, otherwise this is agony + scenario_ids = [47, 48, 49, 50, 51] + properties_data, plans_data, recommendations_data = get_data(portfolio_id, scenario_ids) + properties_df = pd.DataFrame(properties_data) + recommendations_df = pd.DataFrame(recommendations_data) + # Unnest this import ast survey_recs = [] @@ -502,27 +515,74 @@ def slides(): # We now pull out the recommendations impact by property type and sub type + # Exclude sealing open fireplaces + recommendations_df = recommendations_df[recommendations_df["type"] != "sealing_open_fireplace"] + + # We update the type column so that if type == heating, and the description contains "air source heat pump", + # the type is "air_source_heat_pump", else if the description contains "high heat retention storage heaters", else + # if the description contains "condensing boiler, the type is updated to "boiler_upgrade" + recommendations_df["type"] = np.where( + recommendations_df["type"] == "heating", + np.where( + recommendations_df["description"].str.contains("air source heat pump"), + "air_source_heat_pump", + np.where( + recommendations_df["description"].str.contains("high heat retention"), + "high_heat_retention_storage_heaters", + np.where( + recommendations_df["description"].str.contains("condensing boiler"), + "boiler_upgrade", + recommendations_df["type"] + ) + ) + ), + recommendations_df["type"] + ) + + recommendation_types = recommendations_df["type"].unique().tolist() + rename_dict = { + 'hot_water_tank_insulation': 'Hot Water Tank Insulation', + 'windows_glazing': 'Windows Glazing', + 'secondary_heating': 'Secondary Heating', + 'cavity_wall_insulation': 'Cavity Wall Insulation', + 'flat_roof_insulation': 'Flat Roof Insulation', + 'mechanical_ventilation': 'Mechanical Ventilation', + 'loft_insulation': 'Loft Insulation', + 'cylinder_thermostat': 'Cylinder Thermostat', + 'room_roof_insulation': 'Room Roof Insulation', + 'low_energy_lighting': 'Low Energy Lighting', + 'external_wall_insulation': 'External Wall Insulation', + 'heating': 'Heating', + 'solar_pv': 'Solar PV', + 'heating_control': 'Heating Control', + 'solid_floor_insulation': 'Solid Floor Insulation', + 'suspended_floor_insulation': 'Suspended Floor Insulation', + 'internal_wall_insulation': 'Internal Wall Insulation' + } + property_scenario_impact = [] - for scenario_id in scenario_ids: + for scenario_id in tqdm(scenario_ids): # Get the recommendations for the scenario, default scenario_recommendations = recommendations_df[ (recommendations_df["Scenario ID"] == scenario_id) & (recommendations_df["default"] == True) ].copy() - scenario_recommendations['ligting_kwh'] = scenario_recommendations.apply( + scenario_recommendations['Estimated Lighting kWh Savings'] = scenario_recommendations.apply( lambda x: x['kwh_savings'] if x['type'] == 'low_energy_lighting' else 0, axis=1) - scenario_recommendations['solar_kwh'] = scenario_recommendations.apply( + scenario_recommendations['Estimated Solar kWh Savings'] = scenario_recommendations.apply( lambda x: x['kwh_savings'] if x['type'] == 'solar_pv' else 0, axis=1) # Set 'Estimated Kwh Savings' to zero where specific kwh columns are used - scenario_recommendations['Estimated Kwh Savings'] = scenario_recommendations.apply( + scenario_recommendations['Estimated Heating Demand kWh Savings'] = scenario_recommendations.apply( lambda x: 0 if x['type'] in ['low_energy_lighting', 'solar_pv'] else x[ 'kwh_savings'], axis=1) scenario_grouped_data = scenario_recommendations.groupby(['property_id']).agg({ - 'Estimated Kwh Savings': 'sum', + 'Estimated Heating Demand kWh Savings': 'sum', + 'Estimated Lighting kWh Savings': 'sum', + 'Estimated Solar kWh Savings': 'sum', "estimated_cost": "sum" }).reset_index() @@ -531,18 +591,52 @@ def slides(): ].merge( scenario_grouped_data, on=["property_id"], how="left" ) - comparison["Estimated Kwh Savings"] = comparison["Estimated Kwh Savings"].fillna(0) + comparison["Estimated Heating Demand kWh Savings"] = ( + comparison["Estimated Heating Demand kWh Savings"].fillna(0) + ) + comparison["Estimated Lighting kWh Savings"] = ( + comparison["Estimated Lighting kWh Savings"].fillna(0) + ) + comparison["Estimated Solar kWh Savings"] = ( + comparison["Estimated Solar kWh Savings"].fillna(0) + ) comparison["estimated_cost"] = comparison["estimated_cost"].fillna(0) comparison["post_scenario_heating_hotwater_kwh"] = ( - comparison["current_energy_demand_heating_hotwater"] - comparison["Estimated Kwh Savings"] + comparison["current_energy_demand_heating_hotwater"] - comparison["Estimated Heating Demand kWh Savings"] + ) + + # For each scenario, we create a measure matrix + measure_matrix = scenario_recommendations.pivot_table( + index='property_id', + columns='type', + values='id', # Using 'id' just as a placeholder for the pivot + aggfunc=lambda x: True, # If an ID exists for a given type, mark as True + fill_value=False # Fill other entries as False + ).reset_index() + + non_zero_heat_demand_impact = comparison[ + (comparison["Estimated Heating Demand kWh Savings"] > 0) | + (comparison["Estimated Lighting kWh Savings"] > 0) | + (comparison["Estimated Solar kWh Savings"] > 0) + ] + measure_matrix = measure_matrix[ + measure_matrix["property_id"].isin(non_zero_heat_demand_impact["property_id"].values) + ] + measure_matrix = measure_matrix.rename(columns=rename_dict) + + comparison = comparison.merge( + measure_matrix, on="property_id", how="left" ) comparison["scenario_id"] = scenario_id property_scenario_impact.append(comparison) property_scenario_impact = pd.concat(property_scenario_impact) - property_scenario_impact = property_scenario_impact.drop(columns=["property_id", "Estimated Kwh Savings"]) + # property_scenario_impact = property_scenario_impact.drop(columns=["property_id", "Estimated Kwh Savings"]) + for v in rename_dict.values(): + # Fill NaNs with False + property_scenario_impact[v] = property_scenario_impact[v].fillna(False) # Scale property_scenario_impact["post_scenario_heating_hotwater_kwh_scaled"] = ( @@ -600,57 +694,98 @@ def slides(): "post_scenario_heating_hotwater_kwh_scaled"]].empty: raise Exception("someting went wrong") - # Reorder the columns - grouped_data = grouped_data[ - [ - 'property_type', - 'property_sub_type', - 'scenario', - 'estimated_heating_hotwater_kwh', - 'post_scenario_heating_hotwater_kwh', - 'estimated_heating_hotwater_kwh_scaled', - 'post_scenario_heating_hotwater_kwh_scaled', - 'estimated_cost', - ] + # Reorder the columns + grouped_data = grouped_data[ + [ + 'property_type', + 'property_sub_type', + 'scenario', + 'estimated_heating_hotwater_kwh', + 'post_scenario_heating_hotwater_kwh', + 'estimated_heating_hotwater_kwh_scaled', + 'post_scenario_heating_hotwater_kwh_scaled', + 'estimated_cost', + ] + ] + + grouped_data = grouped_data.rename( + columns={ + "property_type": "Property Type", + "property_sub_type": "Property Sub Type", + "scenario": "Scenario", + "estimated_heating_hotwater_kwh": "Estimated Heating & Hot Water kwh", + "post_scenario_heating_hotwater_kwh": "Post Scenario Heating & Hot Water kwh", + "estimated_heating_hotwater_kwh_scaled": "Estimated Heating & Hot Water kwh (scaled)", + "post_scenario_heating_hotwater_kwh_scaled": "Post Scenario Heating & Hot Water kwh (scaled)", + "estimated_cost": "Estimated Cost or Retrofit", + } + ) + + # grouped_data.to_excel( + # "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Newhaven/outputs/Scenario kWh Impact by Property " + # "Type.xlsx", + # index=False + # ) + + property_scenario_impact = property_scenario_impact.merge( + scenario_names, how="left", on="scenario_id" + ) + + property_scenario_impact = property_scenario_impact.sort_values( + ["postcode", "uprn", "scenario_id"], ascending=True + ) + + lewes_data = next_gen_dataset.merge( + property_scenario_impact, how="left", on="uprn" + ) + + # Rearrange, rename columns and drop what we don't need + # TODO - remap the heating type + lewes_data = lewes_data[ + [ + 'uprn', 'address', 'postcode', 'property_type', 'built_form', 'estimated_heating_hotwater_kwh', + 'primary_fuel_type', 'gross_floor_area', 'floor_height', 'number_of_floors', 'ashp_suitable', + 'ashp_size_kw', + 'ashp_cost', 'solar_suitable', 'solar_size_kwp', 'solar_cost', 'estimated_heating_hotwater_kwh_scaled', + # 'property_id', - dropped + 'current_energy_demand_heating_hotwater', 'Estimated Heating Demand kWh Savings', + 'Estimated Lighting kWh Savings', 'Estimated Solar kWh Savings', 'estimated_cost', + 'post_scenario_heating_hotwater_kwh', 'Cavity Wall Insulation', 'Cylinder Thermostat', + 'Flat Roof Insulation', + 'Hot Water Tank Insulation', 'Loft Insulation', 'Mechanical Ventilation', 'Room Roof Insulation', + # 'scenario_id', - dropped + 'Low Energy Lighting', 'Secondary Heating', 'Windows Glazing', 'External Wall Insulation', + 'Heating', + 'Heating Control', + 'Solar PV', + 'Internal Wall Insulation', + 'Solid Floor Insulation', + 'Suspended Floor Insulation', + 'post_scenario_heating_hotwater_kwh_scaled', + 'scenario' ] - grouped_data = grouped_data.rename( - columns={ - "property_type": "Property Type", - "property_sub_type": "Property Sub Type", - "scenario": "Scenario", - "estimated_heating_hotwater_kwh": "Estimated Heating & Hot Water kwh", - "post_scenario_heating_hotwater_kwh": "Post Scenario Heating & Hot Water kwh", - "estimated_heating_hotwater_kwh_scaled": "Estimated Heating & Hot Water kwh (scaled)", - "post_scenario_heating_hotwater_kwh_scaled": "Post Scenario Heating & Hot Water kwh (scaled)", - "estimated_cost": "Estimated Cost or Retrofit", - } - ) + ] - grouped_data.to_excel( - "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Newhaven/outputs/Scenario kWh Impact by Property " - "Type.xlsx", - index=False - ) + # We save this dataset, which will be shared with Lewes Council + lewes_data.to_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Newhaven/outputs/property data.csv", index=False + ) - property_scenario_impact = property_scenario_impact.merge( - scenario_names, how="left", on="scenario_id" - ) + df_pivot = property_scenario_impact.pivot_table(index='uprn', columns='scenario', + values=['post_scenario_heating_hotwater_kwh', + 'post_scenario_heating_hotwater_kwh_scaled']) - df_pivot = property_scenario_impact.pivot_table(index='uprn', columns='scenario', - values=['post_scenario_heating_hotwater_kwh', - 'post_scenario_heating_hotwater_kwh_scaled']) + # Flattening multi-index columns + df_pivot.columns = [f'{col[0]}_{col[1]}' for col in df_pivot.columns] - # Flattening multi-index columns - df_pivot.columns = [f'{col[0]}_{col[1]}' for col in df_pivot.columns] + # Reset the index to have a clean dataframe + df_pivot.reset_index(inplace=True) - # Reset the index to have a clean dataframe - df_pivot.reset_index(inplace=True) + next_gen_dataset = next_gen_dataset.merge( + df_pivot, how="left", on="uprn" + ) - next_gen_dataset = next_gen_dataset.merge( - df_pivot, how="left", on="uprn" - ) - - next_gen_dataset.to_csv( - "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Newhaven/outputs/next_gen_dataset.csv", index=False - ) + next_gen_dataset.to_csv( + "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Newhaven/outputs/next_gen_dataset.csv", index=False + )