mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
fixing filling of property
This commit is contained in:
parent
d65c99f62a
commit
d163ca9931
1 changed files with 98 additions and 90 deletions
|
|
@ -1669,7 +1669,7 @@ def propsed_wave_3_sample():
|
|||
header=4
|
||||
)
|
||||
|
||||
# TODO: We drop 302 properties that are not priority postcodes - confirm w/ Stonewater and 7 properties missing
|
||||
# TODO: We drop 7 properties missing
|
||||
# UPRN
|
||||
asset_list = asset_list[~asset_list["Archetype ID"].isin(["MISSING UPRN"])]
|
||||
# Clean address ids
|
||||
|
|
@ -1699,15 +1699,23 @@ def propsed_wave_3_sample():
|
|||
os.path.join(CUSTOMER_FOLDER_PATH, "Stonewater - costed retrofit packages V3.xlsx"),
|
||||
header=0
|
||||
)
|
||||
survey_results = survey_results.merge(
|
||||
|
||||
survey_results = survey_results.drop(
|
||||
columns=["Main Roof Type", "Main Roof Insulation", "Main Roof Insulation Thickness"]
|
||||
).merge(
|
||||
additional_survey_data[
|
||||
[
|
||||
"Address ID",
|
||||
"Main Wall Type", "Main Wall Insulation_x", "Main Wall Thickness",
|
||||
"Main Building Alternative Wall Type", "Main Building Alternative Wall Insulation",
|
||||
"Main Building Alternative Wall Thickness"
|
||||
"Main Building Alternative Wall Thickness",
|
||||
"Main Roof Type", "Main Roof Insulation", "Main Roof Insulation Thickness"
|
||||
]
|
||||
].rename(columns={"Main Wall Insulation_x": "Main Wall Insulation Type"}),
|
||||
].rename(
|
||||
columns={
|
||||
"Main Wall Insulation_x": "Main Wall Insulation Type",
|
||||
}
|
||||
),
|
||||
how="left",
|
||||
on="Address ID"
|
||||
)
|
||||
|
|
@ -1718,6 +1726,7 @@ def propsed_wave_3_sample():
|
|||
"Address ID", "Archetype ID", "Current SAP Rating", "Current EPC Band", "Postcode",
|
||||
"Main Roof Type", "Main Roof Insulation", "Main Roof Insulation Thickness",
|
||||
"Existing Primary Heating System",
|
||||
"Package Ref",
|
||||
"Main Wall Type", "Main Wall Insulation Type", "Main Wall Thickness",
|
||||
"Main Building Alternative Wall Type", "Main Building Alternative Wall Insulation",
|
||||
"Main Building Alternative Wall Thickness"
|
||||
|
|
@ -1727,6 +1736,7 @@ def propsed_wave_3_sample():
|
|||
"Existing Primary Heating System": "Survey: Primary Heating System"
|
||||
}
|
||||
)
|
||||
|
||||
survey_results["Postal Region"] = survey_results["Postcode"].str.split(" ").str[0]
|
||||
# Concatenate from the wall information
|
||||
survey_results["Survey: Main Wall Type"] = survey_results["Main Wall Type"].astype(str) + ": " + survey_results[
|
||||
|
|
@ -1929,7 +1939,7 @@ def propsed_wave_3_sample():
|
|||
region_assets = region_assets.merge(
|
||||
exact_surveyed[
|
||||
["Address ID", "Current EPC Band", "Current SAP Rating"] + survey_attribute_columns + [
|
||||
"Survey: Matching Address ID"
|
||||
"Survey: Matching Address ID", "Package Ref"
|
||||
]
|
||||
],
|
||||
on="Address ID",
|
||||
|
|
@ -2005,6 +2015,7 @@ def propsed_wave_3_sample():
|
|||
'Survey: Primary Heating System': closest_match["Survey: Primary Heating System"],
|
||||
"Survey: Matching Address ID": closest_match["Address ID"],
|
||||
'Distance to Closest Match (m)': closest_match["distance_meters"],
|
||||
"Package Ref": closest_match["Package Ref"],
|
||||
"Match Type": match_type
|
||||
}
|
||||
)
|
||||
|
|
@ -2015,7 +2026,8 @@ def propsed_wave_3_sample():
|
|||
columns=[
|
||||
"Archetype ID", "Address ID", "Current EPC Band", "Current SAP Rating",
|
||||
'Survey: Main Wall Type', 'Survey: Main Alternative Wall', 'Survey: Main Roof Type',
|
||||
'Survey: Primary Heating System', "Survey: Matching Address ID", 'Distance to Closest Match (m)'
|
||||
'Survey: Primary Heating System', "Survey: Matching Address ID", 'Distance to Closest Match (m)',
|
||||
"Match Type"
|
||||
]
|
||||
)
|
||||
|
||||
|
|
@ -2032,8 +2044,8 @@ def propsed_wave_3_sample():
|
|||
# Label the tier 1 properties
|
||||
region_assets["Confidence Tier"] = np.where(
|
||||
region_assets["Current EPC Band_method1"].isin(["D", "E", "F", "G"]) &
|
||||
pd.isnull(region_assets["Confidence Tier"]),
|
||||
"1 - Archetype surveyed in region", region_assets["Confidence Tier"]
|
||||
pd.isnull(region_assets["Confidence Tier"]) & ~pd.isnull(region_assets["Match Type"]),
|
||||
region_assets["Match Type"], region_assets["Confidence Tier"]
|
||||
)
|
||||
|
||||
# Handle EPC C
|
||||
|
|
@ -2046,86 +2058,7 @@ def propsed_wave_3_sample():
|
|||
region_assets = fill_survey_columns(region_assets, suffix="_method1")
|
||||
|
||||
method_1_columns = [c for c in region_assets.columns if c.endswith("_method1")]
|
||||
region_assets = region_assets.drop(columns=method_1_columns)
|
||||
|
||||
missed_archetypes = set(archetype_ids) - set(region_surveyed["Archetype ID"])
|
||||
|
||||
# archetype_surveyed = []
|
||||
for arch_id in missed_archetypes:
|
||||
for _, property in region_assets[region_assets["Archetype ID"] == arch_id].iterrows():
|
||||
archetype_data = survey_results_with_original_features[
|
||||
survey_results["Archetype ID"] == arch_id
|
||||
].copy()
|
||||
if archetype_data.empty:
|
||||
continue
|
||||
raise Exception("IMPLEMENT ME")
|
||||
# archetype_data["distance_meters"] = haversine(
|
||||
# lat1=property.latitude, lon1=property.longitude,
|
||||
# lat2=archetype_data["latitude"].values, lon2=archetype_data["longitude"].values
|
||||
# )
|
||||
# expected_sap = np.average(
|
||||
# archetype_data["Current SAP Rating"], weights=1 / (archetype_data["distance_meters"] + 1)
|
||||
# )
|
||||
# expected_epc = sap_to_epc(expected_sap)
|
||||
# archetype_surveyed.append(
|
||||
# {
|
||||
# "Archetype ID": arch_id,
|
||||
# "Address ID": property["Address ID"],
|
||||
# "Current EPC Band": expected_epc
|
||||
# }
|
||||
# )
|
||||
# archetype_surveyed = pd.DataFrame(archetype_surveyed)
|
||||
# if archetype_surveyed.empty:
|
||||
# archetype_surveyed = pd.DataFrame(
|
||||
# columns=[
|
||||
# "Archetype ID", "Address ID", "Current EPC Band", "Current SAP Rating",
|
||||
# 'Survey: Main Wall Type', 'Survey: Main Alternative Wall', 'Survey: Main Roof Type',
|
||||
# 'Survey: Primary Heating System', "Survey: Matching Address ID", 'Distance to Closest Match (m)'
|
||||
# ]
|
||||
# )
|
||||
#
|
||||
# region_assets = region_assets.merge(
|
||||
# archetype_surveyed,
|
||||
# on=["Archetype ID", "Address ID"],
|
||||
# how="left",
|
||||
# suffixes=("", "_method2")
|
||||
# )
|
||||
#
|
||||
# region_assets["Confidence Tier"] = np.where(
|
||||
# region_assets["Current EPC Band_method2"].isin(["D", "E", "F", "G"]) & pd.isnull(
|
||||
# region_assets["Confidence Tier"]),
|
||||
# "2 - same archetype", region_assets["Confidence Tier"]
|
||||
# )
|
||||
#
|
||||
# for col in [
|
||||
# 'Current EPC Band', 'Current SAP Rating',
|
||||
# 'Survey: Main Wall Type', 'Survey: Main Alternative Wall',
|
||||
# 'Survey: Main Roof Type', 'Survey: Primary Heating System',
|
||||
# 'Survey: Matching Address ID', 'Distance to Closest Match (m)'
|
||||
# ]:
|
||||
# region_assets[col] = np.where(
|
||||
# pd.isnull(region_assets[col]) & pd.notnull(region_assets[col + "_method2"]),
|
||||
# region_assets[col + "_method2"], region_assets[col]
|
||||
# )
|
||||
#
|
||||
# method_2_columns = [c for c in region_assets.columns if c.endswith("_method2")]
|
||||
# region_assets = region_assets.drop(columns=method_2_columns)
|
||||
|
||||
# We label EPC C properties
|
||||
# region_assets["Confidence Tier"] = np.where(
|
||||
# region_assets["Current EPC Band"].isin(["C", "B", "A"]) & pd.isnull(region_assets["Confidence Tier"]),
|
||||
# "5 - EPC C or above", region_assets["Confidence Tier"]
|
||||
# )
|
||||
#
|
||||
# region_assets["Confidence Tier"] = np.where(
|
||||
# region_assets["Archetype ID"] == "EPC C OR ABOVE",
|
||||
# "5 - EPC C or above", region_assets["Confidence Tier"]
|
||||
# )
|
||||
#
|
||||
# region_assets["Current EPC Band"] = np.where(
|
||||
# region_assets["Archetype ID"] == "EPC C OR ABOVE",
|
||||
# "C", region_assets["Current EPC Band"]
|
||||
# )
|
||||
region_assets = region_assets.drop(columns=method_1_columns + ["Match Type"])
|
||||
|
||||
missed_addressids = region_assets[pd.isnull(region_assets["Confidence Tier"])]["Address ID"].unique().tolist()
|
||||
|
||||
|
|
@ -2217,6 +2150,7 @@ def propsed_wave_3_sample():
|
|||
"Survey: Primary Heating System": "Not Surveyed",
|
||||
"Survey: Matching Address ID": "Not Surveyed",
|
||||
'Distance to Closest Match (m)': 9999999,
|
||||
"Package Ref": "Not Surveyed",
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
|
@ -2261,6 +2195,7 @@ def propsed_wave_3_sample():
|
|||
"Survey: Primary Heating System": closest_match["Survey: Primary Heating System"],
|
||||
"Survey: Matching Address ID": closest_match["Address ID"],
|
||||
'Distance to Closest Match (m)': closest_match["distance_meters"],
|
||||
"Package Ref": closest_match["Package Ref"]
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
|
@ -2292,8 +2227,10 @@ def propsed_wave_3_sample():
|
|||
|
||||
# Check if there are missings in current epc band, current sap rating or any of the survey attributes
|
||||
for c in (
|
||||
["Current EPC Band", "Current SAP Rating", "Survey: Matching Address ID", 'Distance to Closest Match (m)'] +
|
||||
survey_attribute_columns):
|
||||
[
|
||||
"Current EPC Band", "Current SAP Rating", "Survey: Matching Address ID", 'Distance to Closest Match (m)'] +
|
||||
survey_attribute_columns
|
||||
):
|
||||
if pd.isnull(results[c]).sum():
|
||||
raise Exception("Something went wrong")
|
||||
|
||||
|
|
@ -2382,5 +2319,76 @@ def propsed_wave_3_sample():
|
|||
total_bid_size = bid_size + no_loss_postcodes["Gain"].sum()
|
||||
print(total_bid_size)
|
||||
|
||||
# Label final outputs
|
||||
# We create a summary of packages by street
|
||||
results["Package Ref"] = results["Package Ref"].fillna("Incomplete")
|
||||
results["Package Ref"] = results["Package Ref"].astype(str)
|
||||
package_summary = results.pivot_table(
|
||||
index='Street and Region',
|
||||
columns='Package Ref',
|
||||
aggfunc='size',
|
||||
fill_value=0
|
||||
).reset_index()
|
||||
|
||||
street_bid_structure = street_summary.merge(
|
||||
package_summary, how="left", on="Street and Region"
|
||||
)
|
||||
street_bid_structure = street_bid_structure.sort_values("Gain", ascending=False)
|
||||
street_bid_structure.to_csv(
|
||||
os.path.join(CUSTOMER_FOLDER_PATH, "Street Bid Structure.csv"), index=False
|
||||
)
|
||||
|
||||
individual_units_programme = results.copy()
|
||||
individual_units_programme["Unit in Programme"] = individual_units_programme["Street and Region"].isin(
|
||||
street_bid_structure[street_bid_structure["Selected"]]["Street and Region"].values
|
||||
)
|
||||
|
||||
# Merge on Stonewaters ID
|
||||
asset_list_ids = pd.read_excel(
|
||||
"/Users/khalimconn-kowlessar/Documents/hestia/Customers/Stonewater/Stonewater SHDF_3_0_Board Triage 22.05.24 "
|
||||
"- Archetyped V3.1.xlsx",
|
||||
header=4
|
||||
)[["Address ID", "Org. ref."]]
|
||||
# Clean address ids
|
||||
asset_list_ids = asset_list_ids[~pd.isnull(asset_list_ids["Address ID"])]
|
||||
asset_list_ids = asset_list_ids[asset_list_ids["Address ID"] != "Address ID"]
|
||||
asset_list_ids["Address ID"] = asset_list_ids["Address ID"].astype(int)
|
||||
individual_units_programme = individual_units_programme.merge(
|
||||
asset_list_ids,
|
||||
how="left",
|
||||
on="Address ID",
|
||||
)
|
||||
|
||||
individual_units_programme = individual_units_programme.merge(
|
||||
asset_list_ids.rename(
|
||||
columns={"Org. ref.": "Survey: Org. ref.", "Address ID": "Survey: Matching Address ID"}
|
||||
),
|
||||
how="left",
|
||||
on="Survey: Matching Address ID"
|
||||
)
|
||||
|
||||
individual_units_programme["Survey: Org. ref."] = np.where(
|
||||
(individual_units_programme["Survey: Matching Address ID"] == "Not Surveyed"),
|
||||
"Not Surveyed",
|
||||
individual_units_programme["Survey: Org. ref."]
|
||||
)
|
||||
|
||||
if pd.isnull(individual_units_programme["Survey: Org. ref."]).sum() or pd.isnull(
|
||||
individual_units_programme["Org. ref."]).sum():
|
||||
raise ValueError("something went wrong")
|
||||
|
||||
for col in ["Survey: Main Roof Type", "Survey: Main Wall Type", "Survey: Main Alternative Wall"]:
|
||||
individual_units_programme[col] = (
|
||||
individual_units_programme[col]
|
||||
.str.replace(r': nan(?=$|:)', '', regex=True) # Remove ': nan' at the end or before another ':'
|
||||
.str.replace(r':\s+:', ': ', regex=True) # Replace occurrences of ': :' with ': '
|
||||
.str.replace(r'\s+', ' ', regex=True) # Replace multiple spaces with a single space
|
||||
.str.strip() # Strip leading/trailing spaces
|
||||
)
|
||||
|
||||
individual_units_programme.to_csv(
|
||||
os.path.join(CUSTOMER_FOLDER_PATH, "Individual units - programme.csv"), index=False
|
||||
)
|
||||
|
||||
# if __name__ == "__main__":
|
||||
# main()
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue