fixing filling of property

This commit is contained in:
Khalim Conn-Kowlessar 2024-11-19 13:54:46 +00:00
parent d65c99f62a
commit d163ca9931

View file

@ -1669,7 +1669,7 @@ def propsed_wave_3_sample():
header=4
)
# TODO: We drop 302 properties that are not priority postcodes - confirm w/ Stonewater and 7 properties missing
# TODO: We drop 7 properties missing
# UPRN
asset_list = asset_list[~asset_list["Archetype ID"].isin(["MISSING UPRN"])]
# Clean address ids
@ -1699,15 +1699,23 @@ def propsed_wave_3_sample():
os.path.join(CUSTOMER_FOLDER_PATH, "Stonewater - costed retrofit packages V3.xlsx"),
header=0
)
survey_results = survey_results.merge(
survey_results = survey_results.drop(
columns=["Main Roof Type", "Main Roof Insulation", "Main Roof Insulation Thickness"]
).merge(
additional_survey_data[
[
"Address ID",
"Main Wall Type", "Main Wall Insulation_x", "Main Wall Thickness",
"Main Building Alternative Wall Type", "Main Building Alternative Wall Insulation",
"Main Building Alternative Wall Thickness"
"Main Building Alternative Wall Thickness",
"Main Roof Type", "Main Roof Insulation", "Main Roof Insulation Thickness"
]
].rename(columns={"Main Wall Insulation_x": "Main Wall Insulation Type"}),
].rename(
columns={
"Main Wall Insulation_x": "Main Wall Insulation Type",
}
),
how="left",
on="Address ID"
)
@ -1718,6 +1726,7 @@ def propsed_wave_3_sample():
"Address ID", "Archetype ID", "Current SAP Rating", "Current EPC Band", "Postcode",
"Main Roof Type", "Main Roof Insulation", "Main Roof Insulation Thickness",
"Existing Primary Heating System",
"Package Ref",
"Main Wall Type", "Main Wall Insulation Type", "Main Wall Thickness",
"Main Building Alternative Wall Type", "Main Building Alternative Wall Insulation",
"Main Building Alternative Wall Thickness"
@ -1727,6 +1736,7 @@ def propsed_wave_3_sample():
"Existing Primary Heating System": "Survey: Primary Heating System"
}
)
survey_results["Postal Region"] = survey_results["Postcode"].str.split(" ").str[0]
# Concatenate from the wall information
survey_results["Survey: Main Wall Type"] = survey_results["Main Wall Type"].astype(str) + ": " + survey_results[
@ -1929,7 +1939,7 @@ def propsed_wave_3_sample():
region_assets = region_assets.merge(
exact_surveyed[
["Address ID", "Current EPC Band", "Current SAP Rating"] + survey_attribute_columns + [
"Survey: Matching Address ID"
"Survey: Matching Address ID", "Package Ref"
]
],
on="Address ID",
@ -2005,6 +2015,7 @@ def propsed_wave_3_sample():
'Survey: Primary Heating System': closest_match["Survey: Primary Heating System"],
"Survey: Matching Address ID": closest_match["Address ID"],
'Distance to Closest Match (m)': closest_match["distance_meters"],
"Package Ref": closest_match["Package Ref"],
"Match Type": match_type
}
)
@ -2015,7 +2026,8 @@ def propsed_wave_3_sample():
columns=[
"Archetype ID", "Address ID", "Current EPC Band", "Current SAP Rating",
'Survey: Main Wall Type', 'Survey: Main Alternative Wall', 'Survey: Main Roof Type',
'Survey: Primary Heating System', "Survey: Matching Address ID", 'Distance to Closest Match (m)'
'Survey: Primary Heating System', "Survey: Matching Address ID", 'Distance to Closest Match (m)',
"Match Type"
]
)
@ -2032,8 +2044,8 @@ def propsed_wave_3_sample():
# Label the tier 1 properties
region_assets["Confidence Tier"] = np.where(
region_assets["Current EPC Band_method1"].isin(["D", "E", "F", "G"]) &
pd.isnull(region_assets["Confidence Tier"]),
"1 - Archetype surveyed in region", region_assets["Confidence Tier"]
pd.isnull(region_assets["Confidence Tier"]) & ~pd.isnull(region_assets["Match Type"]),
region_assets["Match Type"], region_assets["Confidence Tier"]
)
# Handle EPC C
@ -2046,86 +2058,7 @@ def propsed_wave_3_sample():
region_assets = fill_survey_columns(region_assets, suffix="_method1")
method_1_columns = [c for c in region_assets.columns if c.endswith("_method1")]
region_assets = region_assets.drop(columns=method_1_columns)
missed_archetypes = set(archetype_ids) - set(region_surveyed["Archetype ID"])
# archetype_surveyed = []
for arch_id in missed_archetypes:
for _, property in region_assets[region_assets["Archetype ID"] == arch_id].iterrows():
archetype_data = survey_results_with_original_features[
survey_results["Archetype ID"] == arch_id
].copy()
if archetype_data.empty:
continue
raise Exception("IMPLEMENT ME")
# archetype_data["distance_meters"] = haversine(
# lat1=property.latitude, lon1=property.longitude,
# lat2=archetype_data["latitude"].values, lon2=archetype_data["longitude"].values
# )
# expected_sap = np.average(
# archetype_data["Current SAP Rating"], weights=1 / (archetype_data["distance_meters"] + 1)
# )
# expected_epc = sap_to_epc(expected_sap)
# archetype_surveyed.append(
# {
# "Archetype ID": arch_id,
# "Address ID": property["Address ID"],
# "Current EPC Band": expected_epc
# }
# )
# archetype_surveyed = pd.DataFrame(archetype_surveyed)
# if archetype_surveyed.empty:
# archetype_surveyed = pd.DataFrame(
# columns=[
# "Archetype ID", "Address ID", "Current EPC Band", "Current SAP Rating",
# 'Survey: Main Wall Type', 'Survey: Main Alternative Wall', 'Survey: Main Roof Type',
# 'Survey: Primary Heating System', "Survey: Matching Address ID", 'Distance to Closest Match (m)'
# ]
# )
#
# region_assets = region_assets.merge(
# archetype_surveyed,
# on=["Archetype ID", "Address ID"],
# how="left",
# suffixes=("", "_method2")
# )
#
# region_assets["Confidence Tier"] = np.where(
# region_assets["Current EPC Band_method2"].isin(["D", "E", "F", "G"]) & pd.isnull(
# region_assets["Confidence Tier"]),
# "2 - same archetype", region_assets["Confidence Tier"]
# )
#
# for col in [
# 'Current EPC Band', 'Current SAP Rating',
# 'Survey: Main Wall Type', 'Survey: Main Alternative Wall',
# 'Survey: Main Roof Type', 'Survey: Primary Heating System',
# 'Survey: Matching Address ID', 'Distance to Closest Match (m)'
# ]:
# region_assets[col] = np.where(
# pd.isnull(region_assets[col]) & pd.notnull(region_assets[col + "_method2"]),
# region_assets[col + "_method2"], region_assets[col]
# )
#
# method_2_columns = [c for c in region_assets.columns if c.endswith("_method2")]
# region_assets = region_assets.drop(columns=method_2_columns)
# We label EPC C properties
# region_assets["Confidence Tier"] = np.where(
# region_assets["Current EPC Band"].isin(["C", "B", "A"]) & pd.isnull(region_assets["Confidence Tier"]),
# "5 - EPC C or above", region_assets["Confidence Tier"]
# )
#
# region_assets["Confidence Tier"] = np.where(
# region_assets["Archetype ID"] == "EPC C OR ABOVE",
# "5 - EPC C or above", region_assets["Confidence Tier"]
# )
#
# region_assets["Current EPC Band"] = np.where(
# region_assets["Archetype ID"] == "EPC C OR ABOVE",
# "C", region_assets["Current EPC Band"]
# )
region_assets = region_assets.drop(columns=method_1_columns + ["Match Type"])
missed_addressids = region_assets[pd.isnull(region_assets["Confidence Tier"])]["Address ID"].unique().tolist()
@ -2217,6 +2150,7 @@ def propsed_wave_3_sample():
"Survey: Primary Heating System": "Not Surveyed",
"Survey: Matching Address ID": "Not Surveyed",
'Distance to Closest Match (m)': 9999999,
"Package Ref": "Not Surveyed",
}
)
continue
@ -2261,6 +2195,7 @@ def propsed_wave_3_sample():
"Survey: Primary Heating System": closest_match["Survey: Primary Heating System"],
"Survey: Matching Address ID": closest_match["Address ID"],
'Distance to Closest Match (m)': closest_match["distance_meters"],
"Package Ref": closest_match["Package Ref"]
}
)
continue
@ -2292,8 +2227,10 @@ def propsed_wave_3_sample():
# Check if there are missings in current epc band, current sap rating or any of the survey attributes
for c in (
["Current EPC Band", "Current SAP Rating", "Survey: Matching Address ID", 'Distance to Closest Match (m)'] +
survey_attribute_columns):
[
"Current EPC Band", "Current SAP Rating", "Survey: Matching Address ID", 'Distance to Closest Match (m)'] +
survey_attribute_columns
):
if pd.isnull(results[c]).sum():
raise Exception("Something went wrong")
@ -2382,5 +2319,76 @@ def propsed_wave_3_sample():
total_bid_size = bid_size + no_loss_postcodes["Gain"].sum()
print(total_bid_size)
# Label final outputs
# We create a summary of packages by street
results["Package Ref"] = results["Package Ref"].fillna("Incomplete")
results["Package Ref"] = results["Package Ref"].astype(str)
package_summary = results.pivot_table(
index='Street and Region',
columns='Package Ref',
aggfunc='size',
fill_value=0
).reset_index()
street_bid_structure = street_summary.merge(
package_summary, how="left", on="Street and Region"
)
street_bid_structure = street_bid_structure.sort_values("Gain", ascending=False)
street_bid_structure.to_csv(
os.path.join(CUSTOMER_FOLDER_PATH, "Street Bid Structure.csv"), index=False
)
individual_units_programme = results.copy()
individual_units_programme["Unit in Programme"] = individual_units_programme["Street and Region"].isin(
street_bid_structure[street_bid_structure["Selected"]]["Street and Region"].values
)
# Merge on Stonewaters ID
asset_list_ids = pd.read_excel(
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater SHDF_3_0_Board Triage 22.05.24 "
"- Archetyped V3.1.xlsx",
header=4
)[["Address ID", "Org. ref."]]
# Clean address ids
asset_list_ids = asset_list_ids[~pd.isnull(asset_list_ids["Address ID"])]
asset_list_ids = asset_list_ids[asset_list_ids["Address ID"] != "Address ID"]
asset_list_ids["Address ID"] = asset_list_ids["Address ID"].astype(int)
individual_units_programme = individual_units_programme.merge(
asset_list_ids,
how="left",
on="Address ID",
)
individual_units_programme = individual_units_programme.merge(
asset_list_ids.rename(
columns={"Org. ref.": "Survey: Org. ref.", "Address ID": "Survey: Matching Address ID"}
),
how="left",
on="Survey: Matching Address ID"
)
individual_units_programme["Survey: Org. ref."] = np.where(
(individual_units_programme["Survey: Matching Address ID"] == "Not Surveyed"),
"Not Surveyed",
individual_units_programme["Survey: Org. ref."]
)
if pd.isnull(individual_units_programme["Survey: Org. ref."]).sum() or pd.isnull(
individual_units_programme["Org. ref."]).sum():
raise ValueError("something went wrong")
for col in ["Survey: Main Roof Type", "Survey: Main Wall Type", "Survey: Main Alternative Wall"]:
individual_units_programme[col] = (
individual_units_programme[col]
.str.replace(r': nan(?=$|:)', '', regex=True) # Remove ': nan' at the end or before another ':'
.str.replace(r':\s+:', ': ', regex=True) # Replace occurrences of ': :' with ': '
.str.replace(r'\s+', ' ', regex=True) # Replace multiple spaces with a single space
.str.strip() # Strip leading/trailing spaces
)
individual_units_programme.to_csv(
os.path.join(CUSTOMER_FOLDER_PATH, "Individual units - programme.csv"), index=False
)
# if __name__ == "__main__":
# main()