preparing the data for lewes council

This commit is contained in:
Khalim Conn-Kowlessar 2024-10-18 15:52:49 +01:00
parent d7ed4dd9a4
commit 681a449187

View file

@ -417,9 +417,14 @@ def slides():
# Show more characters in a column
pd.set_option('display.max_colwidth', None)
# preparing of this data for the following 2 needs:
# 1) dataset to share with Nextgen heating
# 2) Breakdown of results by property type
def app():
"""
preparing of this data for the following 2 needs:
1) dataset to share with Nextgen heating
2) Breakdown of results by property type
:return:
"""
# get the asset list
asset_list = read_csv_from_s3(bucket_name="retrofit-plan-inputs-dev", filepath="8/90/pilot.csv")
@ -431,6 +436,14 @@ def slides():
)
non_intrusive_recommendations = pd.DataFrame(non_intrusive_recommendations)
# Right now this is the second version of the nehaven portfolio
portfolio_id = 90
# Look at one scenario at a time, otherwise this is agony
scenario_ids = [47, 48, 49, 50, 51]
properties_data, plans_data, recommendations_data = get_data(portfolio_id, scenario_ids)
properties_df = pd.DataFrame(properties_data)
recommendations_df = pd.DataFrame(recommendations_data)
# Unnest this
import ast
survey_recs = []
@ -502,27 +515,74 @@ def slides():
# We now pull out the recommendations impact by property type and sub type
# Exclude sealing open fireplaces
recommendations_df = recommendations_df[recommendations_df["type"] != "sealing_open_fireplace"]
# We update the type column so that if type == heating, and the description contains "air source heat pump",
# the type is "air_source_heat_pump", else if the description contains "high heat retention storage heaters", else
# if the description contains "condensing boiler, the type is updated to "boiler_upgrade"
recommendations_df["type"] = np.where(
recommendations_df["type"] == "heating",
np.where(
recommendations_df["description"].str.contains("air source heat pump"),
"air_source_heat_pump",
np.where(
recommendations_df["description"].str.contains("high heat retention"),
"high_heat_retention_storage_heaters",
np.where(
recommendations_df["description"].str.contains("condensing boiler"),
"boiler_upgrade",
recommendations_df["type"]
)
)
),
recommendations_df["type"]
)
recommendation_types = recommendations_df["type"].unique().tolist()
rename_dict = {
'hot_water_tank_insulation': 'Hot Water Tank Insulation',
'windows_glazing': 'Windows Glazing',
'secondary_heating': 'Secondary Heating',
'cavity_wall_insulation': 'Cavity Wall Insulation',
'flat_roof_insulation': 'Flat Roof Insulation',
'mechanical_ventilation': 'Mechanical Ventilation',
'loft_insulation': 'Loft Insulation',
'cylinder_thermostat': 'Cylinder Thermostat',
'room_roof_insulation': 'Room Roof Insulation',
'low_energy_lighting': 'Low Energy Lighting',
'external_wall_insulation': 'External Wall Insulation',
'heating': 'Heating',
'solar_pv': 'Solar PV',
'heating_control': 'Heating Control',
'solid_floor_insulation': 'Solid Floor Insulation',
'suspended_floor_insulation': 'Suspended Floor Insulation',
'internal_wall_insulation': 'Internal Wall Insulation'
}
property_scenario_impact = []
for scenario_id in scenario_ids:
for scenario_id in tqdm(scenario_ids):
# Get the recommendations for the scenario, default
scenario_recommendations = recommendations_df[
(recommendations_df["Scenario ID"] == scenario_id) &
(recommendations_df["default"] == True)
].copy()
scenario_recommendations['ligting_kwh'] = scenario_recommendations.apply(
scenario_recommendations['Estimated Lighting kWh Savings'] = scenario_recommendations.apply(
lambda x: x['kwh_savings'] if x['type'] == 'low_energy_lighting' else 0,
axis=1)
scenario_recommendations['solar_kwh'] = scenario_recommendations.apply(
scenario_recommendations['Estimated Solar kWh Savings'] = scenario_recommendations.apply(
lambda x: x['kwh_savings'] if x['type'] == 'solar_pv' else 0, axis=1)
# Set 'Estimated Kwh Savings' to zero where specific kwh columns are used
scenario_recommendations['Estimated Kwh Savings'] = scenario_recommendations.apply(
scenario_recommendations['Estimated Heating Demand kWh Savings'] = scenario_recommendations.apply(
lambda x: 0 if x['type'] in ['low_energy_lighting', 'solar_pv'] else x[
'kwh_savings'], axis=1)
scenario_grouped_data = scenario_recommendations.groupby(['property_id']).agg({
'Estimated Kwh Savings': 'sum',
'Estimated Heating Demand kWh Savings': 'sum',
'Estimated Lighting kWh Savings': 'sum',
'Estimated Solar kWh Savings': 'sum',
"estimated_cost": "sum"
}).reset_index()
@ -531,18 +591,52 @@ def slides():
].merge(
scenario_grouped_data, on=["property_id"], how="left"
)
comparison["Estimated Kwh Savings"] = comparison["Estimated Kwh Savings"].fillna(0)
comparison["Estimated Heating Demand kWh Savings"] = (
comparison["Estimated Heating Demand kWh Savings"].fillna(0)
)
comparison["Estimated Lighting kWh Savings"] = (
comparison["Estimated Lighting kWh Savings"].fillna(0)
)
comparison["Estimated Solar kWh Savings"] = (
comparison["Estimated Solar kWh Savings"].fillna(0)
)
comparison["estimated_cost"] = comparison["estimated_cost"].fillna(0)
comparison["post_scenario_heating_hotwater_kwh"] = (
comparison["current_energy_demand_heating_hotwater"] - comparison["Estimated Kwh Savings"]
comparison["current_energy_demand_heating_hotwater"] - comparison["Estimated Heating Demand kWh Savings"]
)
# For each scenario, we create a measure matrix
measure_matrix = scenario_recommendations.pivot_table(
index='property_id',
columns='type',
values='id', # Using 'id' just as a placeholder for the pivot
aggfunc=lambda x: True, # If an ID exists for a given type, mark as True
fill_value=False # Fill other entries as False
).reset_index()
non_zero_heat_demand_impact = comparison[
(comparison["Estimated Heating Demand kWh Savings"] > 0) |
(comparison["Estimated Lighting kWh Savings"] > 0) |
(comparison["Estimated Solar kWh Savings"] > 0)
]
measure_matrix = measure_matrix[
measure_matrix["property_id"].isin(non_zero_heat_demand_impact["property_id"].values)
]
measure_matrix = measure_matrix.rename(columns=rename_dict)
comparison = comparison.merge(
measure_matrix, on="property_id", how="left"
)
comparison["scenario_id"] = scenario_id
property_scenario_impact.append(comparison)
property_scenario_impact = pd.concat(property_scenario_impact)
property_scenario_impact = property_scenario_impact.drop(columns=["property_id", "Estimated Kwh Savings"])
# property_scenario_impact = property_scenario_impact.drop(columns=["property_id", "Estimated Kwh Savings"])
for v in rename_dict.values():
# Fill NaNs with False
property_scenario_impact[v] = property_scenario_impact[v].fillna(False)
# Scale
property_scenario_impact["post_scenario_heating_hotwater_kwh_scaled"] = (
@ -600,57 +694,98 @@ def slides():
"post_scenario_heating_hotwater_kwh_scaled"]].empty:
raise Exception("someting went wrong")
# Reorder the columns
grouped_data = grouped_data[
[
'property_type',
'property_sub_type',
'scenario',
'estimated_heating_hotwater_kwh',
'post_scenario_heating_hotwater_kwh',
'estimated_heating_hotwater_kwh_scaled',
'post_scenario_heating_hotwater_kwh_scaled',
'estimated_cost',
]
# Reorder the columns
grouped_data = grouped_data[
[
'property_type',
'property_sub_type',
'scenario',
'estimated_heating_hotwater_kwh',
'post_scenario_heating_hotwater_kwh',
'estimated_heating_hotwater_kwh_scaled',
'post_scenario_heating_hotwater_kwh_scaled',
'estimated_cost',
]
]
grouped_data = grouped_data.rename(
columns={
"property_type": "Property Type",
"property_sub_type": "Property Sub Type",
"scenario": "Scenario",
"estimated_heating_hotwater_kwh": "Estimated Heating & Hot Water kwh",
"post_scenario_heating_hotwater_kwh": "Post Scenario Heating & Hot Water kwh",
"estimated_heating_hotwater_kwh_scaled": "Estimated Heating & Hot Water kwh (scaled)",
"post_scenario_heating_hotwater_kwh_scaled": "Post Scenario Heating & Hot Water kwh (scaled)",
"estimated_cost": "Estimated Cost or Retrofit",
}
)
# grouped_data.to_excel(
# "/Users/khalimconn-kowlessar/Documents/hestia/Customers/Newhaven/outputs/Scenario kWh Impact by Property "
# "Type.xlsx",
# index=False
# )
property_scenario_impact = property_scenario_impact.merge(
scenario_names, how="left", on="scenario_id"
)
property_scenario_impact = property_scenario_impact.sort_values(
["postcode", "uprn", "scenario_id"], ascending=True
)
lewes_data = next_gen_dataset.merge(
property_scenario_impact, how="left", on="uprn"
)
# Rearrange, rename columns and drop what we don't need
# TODO - remap the heating type
lewes_data = lewes_data[
[
'uprn', 'address', 'postcode', 'property_type', 'built_form', 'estimated_heating_hotwater_kwh',
'primary_fuel_type', 'gross_floor_area', 'floor_height', 'number_of_floors', 'ashp_suitable',
'ashp_size_kw',
'ashp_cost', 'solar_suitable', 'solar_size_kwp', 'solar_cost', 'estimated_heating_hotwater_kwh_scaled',
# 'property_id', - dropped
'current_energy_demand_heating_hotwater', 'Estimated Heating Demand kWh Savings',
'Estimated Lighting kWh Savings', 'Estimated Solar kWh Savings', 'estimated_cost',
'post_scenario_heating_hotwater_kwh', 'Cavity Wall Insulation', 'Cylinder Thermostat',
'Flat Roof Insulation',
'Hot Water Tank Insulation', 'Loft Insulation', 'Mechanical Ventilation', 'Room Roof Insulation',
# 'scenario_id', - dropped
'Low Energy Lighting', 'Secondary Heating', 'Windows Glazing', 'External Wall Insulation',
'Heating',
'Heating Control',
'Solar PV',
'Internal Wall Insulation',
'Solid Floor Insulation',
'Suspended Floor Insulation',
'post_scenario_heating_hotwater_kwh_scaled',
'scenario'
]
grouped_data = grouped_data.rename(
columns={
"property_type": "Property Type",
"property_sub_type": "Property Sub Type",
"scenario": "Scenario",
"estimated_heating_hotwater_kwh": "Estimated Heating & Hot Water kwh",
"post_scenario_heating_hotwater_kwh": "Post Scenario Heating & Hot Water kwh",
"estimated_heating_hotwater_kwh_scaled": "Estimated Heating & Hot Water kwh (scaled)",
"post_scenario_heating_hotwater_kwh_scaled": "Post Scenario Heating & Hot Water kwh (scaled)",
"estimated_cost": "Estimated Cost or Retrofit",
}
)
]
grouped_data.to_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Newhaven/outputs/Scenario kWh Impact by Property "
"Type.xlsx",
index=False
)
# We save this dataset, which will be shared with Lewes Council
lewes_data.to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Newhaven/outputs/property data.csv", index=False
)
property_scenario_impact = property_scenario_impact.merge(
scenario_names, how="left", on="scenario_id"
)
df_pivot = property_scenario_impact.pivot_table(index='uprn', columns='scenario',
values=['post_scenario_heating_hotwater_kwh',
'post_scenario_heating_hotwater_kwh_scaled'])
df_pivot = property_scenario_impact.pivot_table(index='uprn', columns='scenario',
values=['post_scenario_heating_hotwater_kwh',
'post_scenario_heating_hotwater_kwh_scaled'])
# Flattening multi-index columns
df_pivot.columns = [f'{col[0]}_{col[1]}' for col in df_pivot.columns]
# Flattening multi-index columns
df_pivot.columns = [f'{col[0]}_{col[1]}' for col in df_pivot.columns]
# Reset the index to have a clean dataframe
df_pivot.reset_index(inplace=True)
# Reset the index to have a clean dataframe
df_pivot.reset_index(inplace=True)
next_gen_dataset = next_gen_dataset.merge(
df_pivot, how="left", on="uprn"
)
next_gen_dataset = next_gen_dataset.merge(
df_pivot, how="left", on="uprn"
)
next_gen_dataset.to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Newhaven/outputs/next_gen_dataset.csv", index=False
)
next_gen_dataset.to_csv(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Newhaven/outputs/next_gen_dataset.csv", index=False
)