preparing outputs for stonewater

This commit is contained in:
Khalim Conn-Kowlessar 2025-02-13 22:32:31 +00:00
parent b8a094106c
commit bd131a2f66

View file

@ -2984,6 +2984,8 @@ def revised_model():
original_archetypes["Address ID"] = original_archetypes["Address ID"].astype(int)
original_archetypes["UPRN"] = original_archetypes["UPRN"].astype("Int64").astype(str)
wave_21_folder_name = "Wave 2.1 Surveys - 2"
# Check if we have all of the addresses
missed = original_archetypes[
~original_archetypes["Address ID"].isin(new_priority_postcodes["Address ID"].values)
@ -3028,7 +3030,6 @@ def revised_model():
"10. Little Island",
"11. CCS Dorset"
]
wave_21_folder_name = "Wave 2.1 Surveys - 2"
for wave_2_1_folder in wave_21_folders:
folder_path = os.path.join(CUSTOMER_FOLDER_PATH, wave_21_folder_name, wave_2_1_folder)
@ -3252,7 +3253,9 @@ def revised_model():
'Main Wall Thickness', 'Main Building Alternative Wall Type',
'Main Building Alternative Wall Insulation',
'Main Building Alternative Wall Dry-lining',
'Main Building Alternative Wall Thickness', 'Main Fuel'
'Main Building Alternative Wall Thickness',
'Main Fuel',
'Main Building Age Band',
]
# For the columns in retrofit_assessments_data_columns, prefix all of them with Survey:
retrofit_assessments_data_columns_prefixed = ["Survey: " + x for x in retrofit_assessments_data_columns]
@ -3795,7 +3798,8 @@ def revised_model():
"Name", "Postcode", 'SAP Band Pre', 'SAP Rating Pre', 'SAP Rating Install Package',
'SAP Band Install Package', 'Package Approved (Client)',
'Wall Insulation', 'Loft Insulation', 'Windows Upgrade', 'Ext. Doors Upgrade',
'Ventilation', 'Heating', 'Other Measures', "Asset ID.1_y",
'Ventilation', 'Heating', 'Other Measures', 'PV System',
"Asset ID.1_y",
] + retrofit_assessments_data_columns_prefixed
].rename(
columns={
@ -3811,6 +3815,7 @@ def revised_model():
'Heating': 'Main Heating',
'Other Measures': 'Other measures',
'Asset ID.1_y': 'Organisation Reference',
"PV System": "Solar PV",
}
),
wates_coordination[
@ -3818,8 +3823,7 @@ def revised_model():
"Name", "Postcode", 'SAP Band Pre', 'SAP Rating Pre', 'SAP Rating Install Package',
'SAP Band Install Package', 'Package Approved (Client)',
'Wall Insulation', 'Loft Insulation', 'Windows Upgrade', 'Ext. Doors Upgrade',
'Ventilation', 'Heating', 'Other Measures', 'Asset ID_x'
'Ventilation', 'Heating', 'Other Measures', 'Asset ID_x', "PV System"
] + retrofit_assessments_data_columns_prefixed
].rename(
columns={
@ -3835,6 +3839,7 @@ def revised_model():
'Heating': 'Main Heating',
'Other Measures': 'Other measures',
'Asset ID_x': 'Organisation Reference',
"PV System": "Solar PV",
}
)
]
@ -3857,12 +3862,12 @@ def revised_model():
def find_nearest_matching_property(coordinated_packages, home):
filter_levels = [
(["Postcode", "Property Type", "Walls", "Roofs", "Heating", "Main Fuel", "Age"], 1),
(["Postal Region", "Property Type", "Walls", "Roofs", "Heating", "Main Fuel", "Age"], 2),
(["Property Type", "Walls", "Roofs", "Heating", "Main Fuel", "Age"], 3),
(["Property Type", "Walls", "Roof Simple", "Heating", "Main Fuel", "Age"], 4),
(["Primary Property Type", "Walls", "Roofs", "Heating", "Main Fuel", "Age"], 5),
(["Primary Property Type", "Walls", "Roof Simple", "Heating", "Main Fuel", "Age"], 6),
(["Postcode", "Property Type", "Walls", "Roofs", "Heating", "Main Fuel", "Age"], 2),
(["Postal Region", "Property Type", "Walls", "Roofs", "Heating", "Main Fuel", "Age"], 3),
(["Property Type", "Walls", "Roofs", "Heating", "Main Fuel", "Age"], 4),
(["Property Type", "Walls", "Roof Simple", "Heating", "Main Fuel", "Age"], 5),
(["Primary Property Type", "Walls", "Roofs", "Heating", "Main Fuel", "Age"], 6),
(["Primary Property Type", "Walls", "Roof Simple", "Heating", "Main Fuel", "Age"], 7),
]
max_confidence = max([confidence for (_, confidence) in filter_levels])
@ -3911,12 +3916,13 @@ def revised_model():
{
"Organisation Reference": home["Organisation Reference"],
"Best Match Organisation Reference": m,
"match_confidence": 1,
"Was Surveyed": True
} for m in survey_result["Organisation Reference"].values
]
matches.extend(to_extend)
continue
blah
closest_match, match_confidence = find_nearest_matching_property(coordinated_packages, home)
if closest_match is None:
no_match.append(home["Organisation Reference"])
@ -3926,6 +3932,7 @@ def revised_model():
{
"Organisation Reference": home["Organisation Reference"],
"Best Match Organisation Reference": m,
"match_confidence": match_confidence,
"Was Surveyed": False
} for m in closest_match["Organisation Reference"].values
]
@ -3953,10 +3960,29 @@ def revised_model():
suffixes=("", " - Closest Match")
)
measures_columns = [
'Main Wall Insulation', 'Secondary Wall Insulation', 'Loft insulation',
'Flat Roof', 'Room in Roof', 'Window Upgrade', 'Door Upgrade',
'Ventilation', 'Main Heating', 'Water Heating', 'Heating Controls',
'Solar PV', 'Other measures'
]
# We want to aggregate the matches, when we have multiple
aggregated_matches_df = []
for org_ref, mapped_matches in matches_df.groupby("Organisation Reference"):
measures = coordinated_packages[
(
coordinated_packages["Organisation Reference"].isin(
mapped_matches['Best Match Organisation Reference'].values
)
)
][measures_columns]
if mapped_matches.shape[0] == 1:
# Get the measures for this property
measures = measures.squeeze()
aggregated_matches_df.append(
{
"Organisation Reference": org_ref,
@ -3965,6 +3991,7 @@ def revised_model():
"Estimated SAP Rating": mapped_matches["Survey: Current SAP Rating"].values[0],
"Estimated EPC Rating": mapped_matches["Survey: Current EPC Band"].values[0],
"Was Surveyed": mapped_matches["Was Surveyed"].values[0],
**measures
}
)
continue
@ -3978,6 +4005,17 @@ def revised_model():
mapped_matches[mapped_matches["Survey: Current EPC Band"] == average_epc_rating].shape[
0] / number_of_matches * 100
)
measures_aggregated = {}
for m in measures_columns:
if any(~pd.isnull(measures[m])):
# Check if we have 2 unique values
vals = measures[~pd.isnull(measures[m])][m].unique()
if len(vals) > 1:
measures_aggregated[m] = ", ".join(vals)
else:
measures_aggregated[m] = vals[0]
aggregated_matches_df.append(
{
"Organisation Reference": org_ref,
@ -3985,7 +4023,8 @@ def revised_model():
"Proportion": proportion_with_this_epc,
"Estimated SAP Rating": average_rating,
"Estimated EPC Rating": average_epc_rating,
"Was Surveyed": False
"Was Surveyed": False,
**measures_aggregated
}
)
@ -4002,7 +4041,6 @@ def revised_model():
def remove_leading_zero(address):
return re.sub(r"^0([1-9]) ", r"\1 ", address)
# Example usage
mapped_priority_list["address1"] = mapped_priority_list["address1"].apply(remove_leading_zero)
mapped_priority_list["address1"] = np.where(
mapped_priority_list["Organisation Reference"] == 37004,
@ -4020,6 +4058,13 @@ def revised_model():
)
mapped_priority_list["row_id"] = mapped_priority_list["Organisation Reference"]
# Flag where 2 out of the three columns have consensus
mapped_priority_list["2 of 3 Data Sources Have Consensus on EPC"] = (
(mapped_priority_list["SAP Band"] == mapped_priority_list["EPC Band"]) |
(mapped_priority_list["SAP Band"] == mapped_priority_list["Estimated EPC Rating"]) |
(mapped_priority_list["EPC Band"] == mapped_priority_list["Estimated EPC Rating"])
)
# Let's get the newest EPC data for these properties
# We merge on UPRN, when we have it
# from etl.route_march_data_pull.app import get_data
@ -4081,6 +4126,7 @@ def revised_model():
'Survey: Main Building Alternative Wall Dry-lining',
'Survey: Main Building Alternative Wall Thickness',
'Survey: Main Fuel',
'Survey: Main Building Age Band',
'Walls', 'Roofs', 'Heating', 'Main Fuel', 'Age', 'Property Type'
]
].rename(
@ -4133,7 +4179,8 @@ def revised_model():
[
"Organisation Reference", 'Survey: Main Wall Type', 'Survey: Main Wall Insulation',
'Survey: Main Roof Type', 'Survey: Main Roof Insulation', 'Survey: Main Roof Insulation Thickness',
'Survey: Existing Primary Heating System',
'Survey: Existing Primary Heating System', 'Survey: Main Building Age Band',
'Survey: Main Building Wall Area (m2)',
]
].rename(
columns={