updating stonewater modelling code to use new data

This commit is contained in:
Khalim Conn-Kowlessar 2024-11-19 22:00:00 +00:00
parent d163ca9931
commit 1645f9ab9e

View file

@ -1071,10 +1071,13 @@ def main():
]
# We now merge on the coordinator data so that against each property, we can map the measures
# TODO: Get the pre & post primary energy numbers
# TODO: Make sure the numbers are going down
retrofit_packages_board = pd.read_excel(
os.path.join(
CUSTOMER_FOLDER_PATH,
"Stonewater_SHDF_3_0_Board_work_in_progress_-_Operations_1731315080 11.11.24.xlsx"
"Stonewater_SHDF_3_0_Board_work_in_progress_-_Operations_1732034933 Final 19.11.24.xlsx"
),
header=4
)
@ -1084,6 +1087,18 @@ def main():
retrofit_packages_board["RA"].isin(["Invoiced", "Completed"])
]
# populated_primary_energy = retrofit_packages_board[
# ~pd.isnull(retrofit_packages_board['BASE Primary energy (13a-272)'])
# ]
#
# z = populated_primary_energy[
# populated_primary_energy['POST Primary energy (13a - 272)'] > populated_primary_energy[
# 'BASE Primary energy (13a-272)']
# ]
#
# all(populated_primary_energy['POST Primary energy (13a - 272)'] <= populated_primary_energy[
# 'BASE Primary energy (13a-272)'])
# Replace \n with ""
extracted_data["Postcode"] = extracted_data["Postcode"].str.replace("\n", "")
@ -1192,7 +1207,7 @@ def main():
# missed[["Name", "Postcode", "Archetype ID", "Arch. Group Rank"]].to_csv(
# CUSTOMER_FOLDER_PATH + "/missed_debugging.csv")
if len(missing_ids) != 6:
if len(missing_ids) != 1:
raise Exception("Unacceptable number of missings")
if matching_lookup["Address ID"].duplicated().sum():
@ -1239,7 +1254,6 @@ def main():
if stonewater_data["Address ID"].duplicated().sum():
raise Exception("Duplicate Address IDs")
# Create a section for costs
for measure in measure_columns:
stonewater_data[f"Cost of {measure}"] = None
@ -1297,8 +1311,41 @@ def main():
]:
stonewater_data[c] = stonewater_data[c].astype(str)
# FIll the primary energy numbers from the excel
stonewater_data = stonewater_data.merge(
retrofit_packages_board[
[
"Name", "Address ID", "BASE Primary energy (13a-272)", "POST Primary energy (13a - 272)"
]
],
on=["Address ID", "Name"],
how="left"
)
stonewater_data["Primary Energy Use (kWh/yr)"] = np.where(
pd.isnull(stonewater_data["Primary Energy Use (kWh/yr)"]),
stonewater_data["BASE Primary energy (13a-272)"],
stonewater_data["Primary Energy Use (kWh/yr)"]
)
stonewater_data = stonewater_data.drop(columns=["BASE Primary energy (13a-272)"])
# Add on organisation reference
original_archetypes = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater SHDF_3_0_Board Triage 22.05.24 "
"- Archetyped V3.1.xlsx",
header=4
)
original_archetypes = original_archetypes[~pd.isnull(original_archetypes["Address ID"])]
original_archetypes = original_archetypes[original_archetypes["Address ID"] != "Address ID"]
original_archetypes["Address ID"] = original_archetypes["Address ID"].astype(int)
stonewater_data = stonewater_data.merge(
original_archetypes[["Address ID", 'Org. ref.']],
on="Address ID",
how="left"
)
# Save this data to excel
stonewater_data.to_excel(CUSTOMER_FOLDER_PATH + "/Stonewater - costed retrofit packages V3.xlsx", index=False)
stonewater_data.to_excel(CUSTOMER_FOLDER_PATH + "/Stonewater - costed retrofit packages V4.xlsx", index=False)
cost_sheet = [
{
@ -1677,6 +1724,12 @@ def propsed_wave_3_sample():
asset_list = asset_list[asset_list["Address ID"] != "Address ID"]
asset_list["Address ID"] = asset_list["Address ID"].astype(int)
asset_list["Street name"] = np.where(
pd.isnull(asset_list["Street name"]),
asset_list["Postcode"],
asset_list["Street name"]
)
# Create the postal region, taking the first part of the postcode
asset_list["Postal Region"] = asset_list["Postcode"].str.split(" ").str[0]
asset_list["Street and Region"] = asset_list["Street name"] + " " + asset_list["Postal Region"]
@ -1684,43 +1737,16 @@ def propsed_wave_3_sample():
# Keep just the columns we need
asset_list = asset_list[
["UPRN", "Address ID", "Archetype ID", "Postal Region", "Postcode", "Street and Region",
["UPRN", "Address ID", 'Org. ref.', "Archetype ID", "Postal Region", "Name", "Postcode", "Street and Region",
"Property Type", "Wall Type", "Roof Type", "Heating"]
]
# Updated packages: to_excel(CUSTOMER_FOLDER_PATH + "/Stonewater - costed retrofit packages V3.xlsx", index=False)
survey_results = pd.read_excel(
os.path.join(CUSTOMER_FOLDER_PATH, "Stonewater - Bid Packages WIP 14.11.24.xlsx"),
os.path.join(CUSTOMER_FOLDER_PATH, "Stonewater - Bid Packages WIP 14.11.19 V2.xlsx"),
header=13,
sheet_name="Modelled Packages"
)
additional_survey_data = pd.read_excel(
os.path.join(CUSTOMER_FOLDER_PATH, "Stonewater - costed retrofit packages V3.xlsx"),
header=0
)
survey_results = survey_results.drop(
columns=["Main Roof Type", "Main Roof Insulation", "Main Roof Insulation Thickness"]
).merge(
additional_survey_data[
[
"Address ID",
"Main Wall Type", "Main Wall Insulation_x", "Main Wall Thickness",
"Main Building Alternative Wall Type", "Main Building Alternative Wall Insulation",
"Main Building Alternative Wall Thickness",
"Main Roof Type", "Main Roof Insulation", "Main Roof Insulation Thickness"
]
].rename(
columns={
"Main Wall Insulation_x": "Main Wall Insulation Type",
}
),
how="left",
on="Address ID"
)
# TOOD: We probably want the actual surveyed wall, roof, heating type
survey_results = survey_results[
[
"Address ID", "Archetype ID", "Current SAP Rating", "Current EPC Band", "Postcode",
@ -1768,6 +1794,105 @@ def propsed_wave_3_sample():
if survey_results_with_original_features.shape[0] != survey_results.shape[0]:
raise ValueError("Something went wrong")
# Against properties that have NO package ref, we assign a package ref
properties_with_packages = survey_results_with_original_features[
~pd.isnull(survey_results_with_original_features["Package Ref"])
]
properties_without_packages = survey_results_with_original_features[
(survey_results_with_original_features["Current SAP Rating"] < 69) & pd.isnull(
survey_results_with_original_features["Package Ref"]
)
]
# Change this to a lookup
package_ratings = pd.DataFrame([
{
"1A": 1,
"1B": 2,
"2A": 3,
"2B": 4,
"3A": 5,
"3B": 6,
4: 7
}
])
package_ratings = pd.melt(package_ratings, var_name="Package Ref", value_name="Rank")
mapped_package_refs = []
for _, property in tqdm(properties_without_packages.iterrows(), total=len(properties_without_packages)):
# Same archetype?
matches = properties_with_packages[properties_with_packages["Archetype ID"] == property["Archetype ID"]]
if matches.empty:
# Similar property
matches = properties_with_packages[
(properties_with_packages["Property Type"].str.split(":").str[0] ==
property["Property Type"].split(":")[0]) &
(properties_with_packages["Wall Type"] == property["Wall Type"]) &
(properties_with_packages["Roof Type"].str.split(":").str[0] == property["Roof Type"].split(":")[0]) &
(properties_with_packages["Heating"].str.split(":").str[0] == property["Heating"].split(":")[0])
]
if matches.empty:
matches = properties_with_packages[
(properties_with_packages["Property Type"].str.split(":").str[0] ==
property["Property Type"].split(":")[0]) &
(properties_with_packages["Wall Type"].str.split(":").str[0] == property["Wall Type"].split(":")[0]) &
(properties_with_packages["Roof Type"].str.split(":").str[0] == property["Roof Type"].split(":")[0]) &
(properties_with_packages["Heating"].str.split(":").str[0] == property["Heating"].split(":")[0])
]
if matches.empty:
raise Exception("Implement me")
if matches.shape[0] > 1:
# Take the package with the highest rank
matches = matches.merge(
package_ratings,
on="Package Ref",
how="left"
).sort_values("Rank", ascending=False).head(1)
mapped_package_refs.append(
{
"Address ID": property["Address ID"],
"Matched Package Ref": matches["Package Ref"].values[0]
}
)
mapped_package_refs = pd.DataFrame(mapped_package_refs)
survey_results = survey_results.merge(
mapped_package_refs,
on="Address ID",
how="left"
)
survey_results["Package Ref"] = np.where(
pd.notnull(survey_results["Matched Package Ref"]),
survey_results["Matched Package Ref"],
survey_results["Package Ref"]
)
survey_results = survey_results.drop(columns=["Matched Package Ref"])
# Do the same with survey_results_with_original_features
survey_results_with_original_features = survey_results_with_original_features.merge(
mapped_package_refs,
on="Address ID",
how="left"
)
survey_results_with_original_features["Package Ref"] = np.where(
pd.notnull(survey_results_with_original_features["Matched Package Ref"]),
survey_results_with_original_features["Matched Package Ref"],
survey_results_with_original_features["Package Ref"]
)
survey_results_with_original_features = survey_results_with_original_features.drop(columns=["Matched Package Ref"])
# Save the data for reference
# mapped_package_refs = mapped_package_refs.merge(
# asset_list[["Name", "Postcode", "Address ID", "Org. ref."]],
# on="Address ID",
# how="left"
# )
# mapped_package_refs.to_csv(os.path.join(CUSTOMER_FOLDER_PATH, "mapped_package_refs.csv"), index=False)
# We get longitude & Latitude
archetyping_spatial_features = read_pickle_from_s3(
bucket_name="retrofit-data-dev", s3_file_name="scustomers/Stonewater/clustering/spatial_data_to_uprn.pkl",
@ -1911,7 +2036,8 @@ def propsed_wave_3_sample():
'Current EPC Band', 'Current SAP Rating',
'Survey: Main Wall Type', 'Survey: Main Alternative Wall',
'Survey: Main Roof Type', 'Survey: Primary Heating System',
'Survey: Matching Address ID', 'Distance to Closest Match (m)'
'Survey: Matching Address ID', 'Distance to Closest Match (m)',
"Package Ref"
]:
region_assets[col] = np.where(
pd.isnull(region_assets[col]) & pd.notnull(region_assets[col + suffix]),
@ -2027,7 +2153,7 @@ def propsed_wave_3_sample():
"Archetype ID", "Address ID", "Current EPC Band", "Current SAP Rating",
'Survey: Main Wall Type', 'Survey: Main Alternative Wall', 'Survey: Main Roof Type',
'Survey: Primary Heating System', "Survey: Matching Address ID", 'Distance to Closest Match (m)',
"Match Type"
"Match Type", "Package Ref"
]
)
@ -2183,6 +2309,13 @@ def propsed_wave_3_sample():
closest_match = surveyed.iloc[0]
# The closest property may be an EPC C, we we take the package ref from the property that's the nearest
# with non-NA package ref
if expected_epc in ["C", "B", "A"]:
package_ref = None
else:
package_ref = surveyed["Package Ref"].dropna().values[0]
final_missed_matches.append(
{
"Address ID": a_id,
@ -2195,7 +2328,7 @@ def propsed_wave_3_sample():
"Survey: Primary Heating System": closest_match["Survey: Primary Heating System"],
"Survey: Matching Address ID": closest_match["Address ID"],
'Distance to Closest Match (m)': closest_match["distance_meters"],
"Package Ref": closest_match["Package Ref"]
"Package Ref": package_ref
}
)
continue
@ -2225,6 +2358,11 @@ def propsed_wave_3_sample():
results = pd.concat(results)
results[
pd.isnull(results["Package Ref"]) & (results["Current EPC Band"] == "D")
]["Postal Region"]
results[resul]
# Check if there are missings in current epc band, current sap rating or any of the survey attributes
for c in (
[
@ -2269,8 +2407,6 @@ def propsed_wave_3_sample():
street_summary["Gain"] = street_summary[gain_columns].sum(axis=1)
street_summary["Loss"] = street_summary[loss_columns].sum(axis=1)
print(street_summary.sum())
selected_rows, _ = optimise(
gain=street_summary["Gain"].values,
loss=street_summary["Loss"].values,
@ -2334,9 +2470,6 @@ def propsed_wave_3_sample():
package_summary, how="left", on="Street and Region"
)
street_bid_structure = street_bid_structure.sort_values("Gain", ascending=False)
street_bid_structure.to_csv(
os.path.join(CUSTOMER_FOLDER_PATH, "Street Bid Structure.csv"), index=False
)
individual_units_programme = results.copy()
individual_units_programme["Unit in Programme"] = individual_units_programme["Street and Region"].isin(
@ -2386,6 +2519,79 @@ def propsed_wave_3_sample():
.str.strip() # Strip leading/trailing spaces
)
# Any EPC C properties that have been included should be flagged as potential low carbon heating
selected_epc_c = individual_units_programme[
(individual_units_programme["Current EPC Band"].isin(["C", "B", "A", "Needs Survey"])) &
(individual_units_programme["Unit in Programme"])
]
flat_wall_map = {
"CA Cavity: F Filled Cavity": False,
"CA Cavity: A As Built": True,
"SO Solid Brick: A As Built": True,
"Not Surveyed": False
}
heating_map = {
"BGW Post 98 Combi condens. with auto ign.": False,
"BGB Post 98 Regular condens. with auto ign.": False,
"SEK High heat retention storage heaters": False,
"SEB Modern slimline storage heaters": True,
"Not Surveyed": False
}
infill_data = []
for _, epc_c_property in selected_epc_c.iterrows():
if epc_c_property["Property Type"].split(":")[0] == "Flat":
# Look for a wall insulation measure
infill = flat_wall_map[epc_c_property["Survey: Main Wall Type"]]
infill_data.append(
{
"Address ID": epc_c_property["Address ID"],
"Street and Region": epc_c_property["Street and Region"],
"Possible Flat Infill?": infill
}
)
continue
infill = heating_map[epc_c_property["Survey: Primary Heating System"]]
infill_data.append(
{
"Address ID": epc_c_property["Address ID"],
"Street and Region": epc_c_property["Street and Region"],
"Low Carbon Heating Infill?": infill
}
)
infill_data = pd.DataFrame(infill_data)
individual_units_programme = individual_units_programme.merge(
infill_data[["Address ID", 'Possible Flat Infill?', 'Low Carbon Heating Infill?']],
how="left", on="Address ID"
)
for c in ['Possible Flat Infill?', 'Low Carbon Heating Infill?']:
individual_units_programme[c] = individual_units_programme[c].fillna(False)
infill_by_street = infill_data.pivot_table(
index='Street and Region',
values=['Possible Flat Infill?', 'Low Carbon Heating Infill?'],
aggfunc='sum',
fill_value=0
).reset_index()
street_bid_structure = street_bid_structure.merge(
infill_by_street, how="left", on="Street and Region"
)
for c in ['Low Carbon Heating Infill?', 'Possible Flat Infill?']:
street_bid_structure[c] = street_bid_structure[c].fillna(0)
street_bid_structure.to_csv(
os.path.join(CUSTOMER_FOLDER_PATH, "Street Bid Structure.csv"), index=False
)
# TODO: Add the full Address!!!
individual_units_programme.to_csv(
os.path.join(CUSTOMER_FOLDER_PATH, "Individual units - programme.csv"), index=False
)