mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
updating stonewater modelling code to use new data
This commit is contained in:
parent
d163ca9931
commit
1645f9ab9e
1 changed files with 247 additions and 41 deletions
|
|
@ -1071,10 +1071,13 @@ def main():
|
|||
]
|
||||
|
||||
# We now merge on the coordinator data so that against each property, we can map the measures
|
||||
# TODO: Get the pre & post primary energy numbers
|
||||
# TODO: Make sure the numbers are going down
|
||||
|
||||
retrofit_packages_board = pd.read_excel(
|
||||
os.path.join(
|
||||
CUSTOMER_FOLDER_PATH,
|
||||
"Stonewater_SHDF_3_0_Board_work_in_progress_-_Operations_1731315080 11.11.24.xlsx"
|
||||
"Stonewater_SHDF_3_0_Board_work_in_progress_-_Operations_1732034933 Final 19.11.24.xlsx"
|
||||
),
|
||||
header=4
|
||||
)
|
||||
|
|
@ -1084,6 +1087,18 @@ def main():
|
|||
retrofit_packages_board["RA"].isin(["Invoiced", "Completed"])
|
||||
]
|
||||
|
||||
# populated_primary_energy = retrofit_packages_board[
|
||||
# ~pd.isnull(retrofit_packages_board['BASE Primary energy (13a-272)'])
|
||||
# ]
|
||||
#
|
||||
# z = populated_primary_energy[
|
||||
# populated_primary_energy['POST Primary energy (13a - 272)'] > populated_primary_energy[
|
||||
# 'BASE Primary energy (13a-272)']
|
||||
# ]
|
||||
#
|
||||
# all(populated_primary_energy['POST Primary energy (13a - 272)'] <= populated_primary_energy[
|
||||
# 'BASE Primary energy (13a-272)'])
|
||||
|
||||
# Replace \n with ""
|
||||
extracted_data["Postcode"] = extracted_data["Postcode"].str.replace("\n", "")
|
||||
|
||||
|
|
@ -1192,7 +1207,7 @@ def main():
|
|||
# missed[["Name", "Postcode", "Archetype ID", "Arch. Group Rank"]].to_csv(
|
||||
# CUSTOMER_FOLDER_PATH + "/missed_debugging.csv")
|
||||
|
||||
if len(missing_ids) != 6:
|
||||
if len(missing_ids) != 1:
|
||||
raise Exception("Unacceptable number of missings")
|
||||
|
||||
if matching_lookup["Address ID"].duplicated().sum():
|
||||
|
|
@ -1239,7 +1254,6 @@ def main():
|
|||
|
||||
if stonewater_data["Address ID"].duplicated().sum():
|
||||
raise Exception("Duplicate Address IDs")
|
||||
|
||||
# Create a section for costs
|
||||
for measure in measure_columns:
|
||||
stonewater_data[f"Cost of {measure}"] = None
|
||||
|
|
@ -1297,8 +1311,41 @@ def main():
|
|||
]:
|
||||
stonewater_data[c] = stonewater_data[c].astype(str)
|
||||
|
||||
# FIll the primary energy numbers from the excel
|
||||
stonewater_data = stonewater_data.merge(
|
||||
retrofit_packages_board[
|
||||
[
|
||||
"Name", "Address ID", "BASE Primary energy (13a-272)", "POST Primary energy (13a - 272)"
|
||||
]
|
||||
],
|
||||
on=["Address ID", "Name"],
|
||||
how="left"
|
||||
)
|
||||
stonewater_data["Primary Energy Use (kWh/yr)"] = np.where(
|
||||
pd.isnull(stonewater_data["Primary Energy Use (kWh/yr)"]),
|
||||
stonewater_data["BASE Primary energy (13a-272)"],
|
||||
stonewater_data["Primary Energy Use (kWh/yr)"]
|
||||
)
|
||||
stonewater_data = stonewater_data.drop(columns=["BASE Primary energy (13a-272)"])
|
||||
|
||||
# Add on organisation reference
|
||||
original_archetypes = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater SHDF_3_0_Board Triage 22.05.24 "
|
||||
"- Archetyped V3.1.xlsx",
|
||||
header=4
|
||||
)
|
||||
original_archetypes = original_archetypes[~pd.isnull(original_archetypes["Address ID"])]
|
||||
original_archetypes = original_archetypes[original_archetypes["Address ID"] != "Address ID"]
|
||||
original_archetypes["Address ID"] = original_archetypes["Address ID"].astype(int)
|
||||
|
||||
stonewater_data = stonewater_data.merge(
|
||||
original_archetypes[["Address ID", 'Org. ref.']],
|
||||
on="Address ID",
|
||||
how="left"
|
||||
)
|
||||
|
||||
# Save this data to excel
|
||||
stonewater_data.to_excel(CUSTOMER_FOLDER_PATH + "/Stonewater - costed retrofit packages V3.xlsx", index=False)
|
||||
stonewater_data.to_excel(CUSTOMER_FOLDER_PATH + "/Stonewater - costed retrofit packages V4.xlsx", index=False)
|
||||
|
||||
cost_sheet = [
|
||||
{
|
||||
|
|
@ -1677,6 +1724,12 @@ def propsed_wave_3_sample():
|
|||
asset_list = asset_list[asset_list["Address ID"] != "Address ID"]
|
||||
asset_list["Address ID"] = asset_list["Address ID"].astype(int)
|
||||
|
||||
asset_list["Street name"] = np.where(
|
||||
pd.isnull(asset_list["Street name"]),
|
||||
asset_list["Postcode"],
|
||||
asset_list["Street name"]
|
||||
)
|
||||
|
||||
# Create the postal region, taking the first part of the postcode
|
||||
asset_list["Postal Region"] = asset_list["Postcode"].str.split(" ").str[0]
|
||||
asset_list["Street and Region"] = asset_list["Street name"] + " " + asset_list["Postal Region"]
|
||||
|
|
@ -1684,43 +1737,16 @@ def propsed_wave_3_sample():
|
|||
|
||||
# Keep just the columns we need
|
||||
asset_list = asset_list[
|
||||
["UPRN", "Address ID", "Archetype ID", "Postal Region", "Postcode", "Street and Region",
|
||||
["UPRN", "Address ID", 'Org. ref.', "Archetype ID", "Postal Region", "Name", "Postcode", "Street and Region",
|
||||
"Property Type", "Wall Type", "Roof Type", "Heating"]
|
||||
]
|
||||
|
||||
# Updated packages: to_excel(CUSTOMER_FOLDER_PATH + "/Stonewater - costed retrofit packages V3.xlsx", index=False)
|
||||
survey_results = pd.read_excel(
|
||||
os.path.join(CUSTOMER_FOLDER_PATH, "Stonewater - Bid Packages WIP 14.11.24.xlsx"),
|
||||
os.path.join(CUSTOMER_FOLDER_PATH, "Stonewater - Bid Packages WIP 14.11.19 V2.xlsx"),
|
||||
header=13,
|
||||
sheet_name="Modelled Packages"
|
||||
)
|
||||
|
||||
additional_survey_data = pd.read_excel(
|
||||
os.path.join(CUSTOMER_FOLDER_PATH, "Stonewater - costed retrofit packages V3.xlsx"),
|
||||
header=0
|
||||
)
|
||||
|
||||
survey_results = survey_results.drop(
|
||||
columns=["Main Roof Type", "Main Roof Insulation", "Main Roof Insulation Thickness"]
|
||||
).merge(
|
||||
additional_survey_data[
|
||||
[
|
||||
"Address ID",
|
||||
"Main Wall Type", "Main Wall Insulation_x", "Main Wall Thickness",
|
||||
"Main Building Alternative Wall Type", "Main Building Alternative Wall Insulation",
|
||||
"Main Building Alternative Wall Thickness",
|
||||
"Main Roof Type", "Main Roof Insulation", "Main Roof Insulation Thickness"
|
||||
]
|
||||
].rename(
|
||||
columns={
|
||||
"Main Wall Insulation_x": "Main Wall Insulation Type",
|
||||
}
|
||||
),
|
||||
how="left",
|
||||
on="Address ID"
|
||||
)
|
||||
|
||||
# TOOD: We probably want the actual surveyed wall, roof, heating type
|
||||
survey_results = survey_results[
|
||||
[
|
||||
"Address ID", "Archetype ID", "Current SAP Rating", "Current EPC Band", "Postcode",
|
||||
|
|
@ -1768,6 +1794,105 @@ def propsed_wave_3_sample():
|
|||
if survey_results_with_original_features.shape[0] != survey_results.shape[0]:
|
||||
raise ValueError("Something went wrong")
|
||||
|
||||
# Against properties that have NO package ref, we assign a package ref
|
||||
properties_with_packages = survey_results_with_original_features[
|
||||
~pd.isnull(survey_results_with_original_features["Package Ref"])
|
||||
]
|
||||
|
||||
properties_without_packages = survey_results_with_original_features[
|
||||
(survey_results_with_original_features["Current SAP Rating"] < 69) & pd.isnull(
|
||||
survey_results_with_original_features["Package Ref"]
|
||||
)
|
||||
]
|
||||
|
||||
# Change this to a lookup
|
||||
package_ratings = pd.DataFrame([
|
||||
{
|
||||
"1A": 1,
|
||||
"1B": 2,
|
||||
"2A": 3,
|
||||
"2B": 4,
|
||||
"3A": 5,
|
||||
"3B": 6,
|
||||
4: 7
|
||||
}
|
||||
])
|
||||
package_ratings = pd.melt(package_ratings, var_name="Package Ref", value_name="Rank")
|
||||
|
||||
mapped_package_refs = []
|
||||
for _, property in tqdm(properties_without_packages.iterrows(), total=len(properties_without_packages)):
|
||||
# Same archetype?
|
||||
matches = properties_with_packages[properties_with_packages["Archetype ID"] == property["Archetype ID"]]
|
||||
|
||||
if matches.empty:
|
||||
# Similar property
|
||||
matches = properties_with_packages[
|
||||
(properties_with_packages["Property Type"].str.split(":").str[0] ==
|
||||
property["Property Type"].split(":")[0]) &
|
||||
(properties_with_packages["Wall Type"] == property["Wall Type"]) &
|
||||
(properties_with_packages["Roof Type"].str.split(":").str[0] == property["Roof Type"].split(":")[0]) &
|
||||
(properties_with_packages["Heating"].str.split(":").str[0] == property["Heating"].split(":")[0])
|
||||
]
|
||||
if matches.empty:
|
||||
matches = properties_with_packages[
|
||||
(properties_with_packages["Property Type"].str.split(":").str[0] ==
|
||||
property["Property Type"].split(":")[0]) &
|
||||
(properties_with_packages["Wall Type"].str.split(":").str[0] == property["Wall Type"].split(":")[0]) &
|
||||
(properties_with_packages["Roof Type"].str.split(":").str[0] == property["Roof Type"].split(":")[0]) &
|
||||
(properties_with_packages["Heating"].str.split(":").str[0] == property["Heating"].split(":")[0])
|
||||
]
|
||||
if matches.empty:
|
||||
raise Exception("Implement me")
|
||||
if matches.shape[0] > 1:
|
||||
# Take the package with the highest rank
|
||||
matches = matches.merge(
|
||||
package_ratings,
|
||||
on="Package Ref",
|
||||
how="left"
|
||||
).sort_values("Rank", ascending=False).head(1)
|
||||
|
||||
mapped_package_refs.append(
|
||||
{
|
||||
"Address ID": property["Address ID"],
|
||||
"Matched Package Ref": matches["Package Ref"].values[0]
|
||||
}
|
||||
)
|
||||
|
||||
mapped_package_refs = pd.DataFrame(mapped_package_refs)
|
||||
|
||||
survey_results = survey_results.merge(
|
||||
mapped_package_refs,
|
||||
on="Address ID",
|
||||
how="left"
|
||||
)
|
||||
survey_results["Package Ref"] = np.where(
|
||||
pd.notnull(survey_results["Matched Package Ref"]),
|
||||
survey_results["Matched Package Ref"],
|
||||
survey_results["Package Ref"]
|
||||
)
|
||||
survey_results = survey_results.drop(columns=["Matched Package Ref"])
|
||||
|
||||
# Do the same with survey_results_with_original_features
|
||||
survey_results_with_original_features = survey_results_with_original_features.merge(
|
||||
mapped_package_refs,
|
||||
on="Address ID",
|
||||
how="left"
|
||||
)
|
||||
survey_results_with_original_features["Package Ref"] = np.where(
|
||||
pd.notnull(survey_results_with_original_features["Matched Package Ref"]),
|
||||
survey_results_with_original_features["Matched Package Ref"],
|
||||
survey_results_with_original_features["Package Ref"]
|
||||
)
|
||||
survey_results_with_original_features = survey_results_with_original_features.drop(columns=["Matched Package Ref"])
|
||||
|
||||
# Save the data for reference
|
||||
# mapped_package_refs = mapped_package_refs.merge(
|
||||
# asset_list[["Name", "Postcode", "Address ID", "Org. ref."]],
|
||||
# on="Address ID",
|
||||
# how="left"
|
||||
# )
|
||||
# mapped_package_refs.to_csv(os.path.join(CUSTOMER_FOLDER_PATH, "mapped_package_refs.csv"), index=False)
|
||||
|
||||
# We get longitude & Latitude
|
||||
archetyping_spatial_features = read_pickle_from_s3(
|
||||
bucket_name="retrofit-data-dev", s3_file_name="scustomers/Stonewater/clustering/spatial_data_to_uprn.pkl",
|
||||
|
|
@ -1911,7 +2036,8 @@ def propsed_wave_3_sample():
|
|||
'Current EPC Band', 'Current SAP Rating',
|
||||
'Survey: Main Wall Type', 'Survey: Main Alternative Wall',
|
||||
'Survey: Main Roof Type', 'Survey: Primary Heating System',
|
||||
'Survey: Matching Address ID', 'Distance to Closest Match (m)'
|
||||
'Survey: Matching Address ID', 'Distance to Closest Match (m)',
|
||||
"Package Ref"
|
||||
]:
|
||||
region_assets[col] = np.where(
|
||||
pd.isnull(region_assets[col]) & pd.notnull(region_assets[col + suffix]),
|
||||
|
|
@ -2027,7 +2153,7 @@ def propsed_wave_3_sample():
|
|||
"Archetype ID", "Address ID", "Current EPC Band", "Current SAP Rating",
|
||||
'Survey: Main Wall Type', 'Survey: Main Alternative Wall', 'Survey: Main Roof Type',
|
||||
'Survey: Primary Heating System', "Survey: Matching Address ID", 'Distance to Closest Match (m)',
|
||||
"Match Type"
|
||||
"Match Type", "Package Ref"
|
||||
]
|
||||
)
|
||||
|
||||
|
|
@ -2183,6 +2309,13 @@ def propsed_wave_3_sample():
|
|||
|
||||
closest_match = surveyed.iloc[0]
|
||||
|
||||
# The closest property may be an EPC C, we we take the package ref from the property that's the nearest
|
||||
# with non-NA package ref
|
||||
if expected_epc in ["C", "B", "A"]:
|
||||
package_ref = None
|
||||
else:
|
||||
package_ref = surveyed["Package Ref"].dropna().values[0]
|
||||
|
||||
final_missed_matches.append(
|
||||
{
|
||||
"Address ID": a_id,
|
||||
|
|
@ -2195,7 +2328,7 @@ def propsed_wave_3_sample():
|
|||
"Survey: Primary Heating System": closest_match["Survey: Primary Heating System"],
|
||||
"Survey: Matching Address ID": closest_match["Address ID"],
|
||||
'Distance to Closest Match (m)': closest_match["distance_meters"],
|
||||
"Package Ref": closest_match["Package Ref"]
|
||||
"Package Ref": package_ref
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
|
@ -2225,6 +2358,11 @@ def propsed_wave_3_sample():
|
|||
|
||||
results = pd.concat(results)
|
||||
|
||||
results[
|
||||
pd.isnull(results["Package Ref"]) & (results["Current EPC Band"] == "D")
|
||||
]["Postal Region"]
|
||||
results[resul]
|
||||
|
||||
# Check if there are missings in current epc band, current sap rating or any of the survey attributes
|
||||
for c in (
|
||||
[
|
||||
|
|
@ -2269,8 +2407,6 @@ def propsed_wave_3_sample():
|
|||
street_summary["Gain"] = street_summary[gain_columns].sum(axis=1)
|
||||
street_summary["Loss"] = street_summary[loss_columns].sum(axis=1)
|
||||
|
||||
print(street_summary.sum())
|
||||
|
||||
selected_rows, _ = optimise(
|
||||
gain=street_summary["Gain"].values,
|
||||
loss=street_summary["Loss"].values,
|
||||
|
|
@ -2334,9 +2470,6 @@ def propsed_wave_3_sample():
|
|||
package_summary, how="left", on="Street and Region"
|
||||
)
|
||||
street_bid_structure = street_bid_structure.sort_values("Gain", ascending=False)
|
||||
street_bid_structure.to_csv(
|
||||
os.path.join(CUSTOMER_FOLDER_PATH, "Street Bid Structure.csv"), index=False
|
||||
)
|
||||
|
||||
individual_units_programme = results.copy()
|
||||
individual_units_programme["Unit in Programme"] = individual_units_programme["Street and Region"].isin(
|
||||
|
|
@ -2386,6 +2519,79 @@ def propsed_wave_3_sample():
|
|||
.str.strip() # Strip leading/trailing spaces
|
||||
)
|
||||
|
||||
# Any EPC C properties that have been included should be flagged as potential low carbon heating
|
||||
selected_epc_c = individual_units_programme[
|
||||
(individual_units_programme["Current EPC Band"].isin(["C", "B", "A", "Needs Survey"])) &
|
||||
(individual_units_programme["Unit in Programme"])
|
||||
]
|
||||
|
||||
flat_wall_map = {
|
||||
"CA Cavity: F Filled Cavity": False,
|
||||
"CA Cavity: A As Built": True,
|
||||
"SO Solid Brick: A As Built": True,
|
||||
"Not Surveyed": False
|
||||
}
|
||||
|
||||
heating_map = {
|
||||
"BGW Post 98 Combi condens. with auto ign.": False,
|
||||
"BGB Post 98 Regular condens. with auto ign.": False,
|
||||
"SEK High heat retention storage heaters": False,
|
||||
"SEB Modern slimline storage heaters": True,
|
||||
"Not Surveyed": False
|
||||
}
|
||||
|
||||
infill_data = []
|
||||
for _, epc_c_property in selected_epc_c.iterrows():
|
||||
if epc_c_property["Property Type"].split(":")[0] == "Flat":
|
||||
# Look for a wall insulation measure
|
||||
infill = flat_wall_map[epc_c_property["Survey: Main Wall Type"]]
|
||||
infill_data.append(
|
||||
{
|
||||
"Address ID": epc_c_property["Address ID"],
|
||||
"Street and Region": epc_c_property["Street and Region"],
|
||||
"Possible Flat Infill?": infill
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
||||
infill = heating_map[epc_c_property["Survey: Primary Heating System"]]
|
||||
infill_data.append(
|
||||
{
|
||||
"Address ID": epc_c_property["Address ID"],
|
||||
"Street and Region": epc_c_property["Street and Region"],
|
||||
"Low Carbon Heating Infill?": infill
|
||||
}
|
||||
)
|
||||
infill_data = pd.DataFrame(infill_data)
|
||||
|
||||
individual_units_programme = individual_units_programme.merge(
|
||||
infill_data[["Address ID", 'Possible Flat Infill?', 'Low Carbon Heating Infill?']],
|
||||
how="left", on="Address ID"
|
||||
)
|
||||
|
||||
for c in ['Possible Flat Infill?', 'Low Carbon Heating Infill?']:
|
||||
individual_units_programme[c] = individual_units_programme[c].fillna(False)
|
||||
|
||||
infill_by_street = infill_data.pivot_table(
|
||||
index='Street and Region',
|
||||
values=['Possible Flat Infill?', 'Low Carbon Heating Infill?'],
|
||||
aggfunc='sum',
|
||||
fill_value=0
|
||||
).reset_index()
|
||||
|
||||
street_bid_structure = street_bid_structure.merge(
|
||||
infill_by_street, how="left", on="Street and Region"
|
||||
)
|
||||
|
||||
for c in ['Low Carbon Heating Infill?', 'Possible Flat Infill?']:
|
||||
street_bid_structure[c] = street_bid_structure[c].fillna(0)
|
||||
|
||||
street_bid_structure.to_csv(
|
||||
os.path.join(CUSTOMER_FOLDER_PATH, "Street Bid Structure.csv"), index=False
|
||||
)
|
||||
|
||||
# TODO: Add the full Address!!!
|
||||
|
||||
individual_units_programme.to_csv(
|
||||
os.path.join(CUSTOMER_FOLDER_PATH, "Individual units - programme.csv"), index=False
|
||||
)
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue