mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
preparing outputs for stonewater
This commit is contained in:
parent
b8a094106c
commit
bd131a2f66
1 changed files with 62 additions and 15 deletions
|
|
@ -2984,6 +2984,8 @@ def revised_model():
|
|||
original_archetypes["Address ID"] = original_archetypes["Address ID"].astype(int)
|
||||
original_archetypes["UPRN"] = original_archetypes["UPRN"].astype("Int64").astype(str)
|
||||
|
||||
wave_21_folder_name = "Wave 2.1 Surveys - 2"
|
||||
|
||||
# Check if we have all of the addresses
|
||||
missed = original_archetypes[
|
||||
~original_archetypes["Address ID"].isin(new_priority_postcodes["Address ID"].values)
|
||||
|
|
@ -3028,7 +3030,6 @@ def revised_model():
|
|||
"10. Little Island",
|
||||
"11. CCS Dorset"
|
||||
]
|
||||
wave_21_folder_name = "Wave 2.1 Surveys - 2"
|
||||
|
||||
for wave_2_1_folder in wave_21_folders:
|
||||
folder_path = os.path.join(CUSTOMER_FOLDER_PATH, wave_21_folder_name, wave_2_1_folder)
|
||||
|
|
@ -3252,7 +3253,9 @@ def revised_model():
|
|||
'Main Wall Thickness', 'Main Building Alternative Wall Type',
|
||||
'Main Building Alternative Wall Insulation',
|
||||
'Main Building Alternative Wall Dry-lining',
|
||||
'Main Building Alternative Wall Thickness', 'Main Fuel'
|
||||
'Main Building Alternative Wall Thickness',
|
||||
'Main Fuel',
|
||||
'Main Building Age Band',
|
||||
]
|
||||
# For the columns in retrofit_assessments_data_columns, prefix all of them with Survey:
|
||||
retrofit_assessments_data_columns_prefixed = ["Survey: " + x for x in retrofit_assessments_data_columns]
|
||||
|
|
@ -3795,7 +3798,8 @@ def revised_model():
|
|||
"Name", "Postcode", 'SAP Band Pre', 'SAP Rating Pre', 'SAP Rating Install Package',
|
||||
'SAP Band Install Package', 'Package Approved (Client)',
|
||||
'Wall Insulation', 'Loft Insulation', 'Windows Upgrade', 'Ext. Doors Upgrade',
|
||||
'Ventilation', 'Heating', 'Other Measures', "Asset ID.1_y",
|
||||
'Ventilation', 'Heating', 'Other Measures', 'PV System',
|
||||
"Asset ID.1_y",
|
||||
] + retrofit_assessments_data_columns_prefixed
|
||||
].rename(
|
||||
columns={
|
||||
|
|
@ -3811,6 +3815,7 @@ def revised_model():
|
|||
'Heating': 'Main Heating',
|
||||
'Other Measures': 'Other measures',
|
||||
'Asset ID.1_y': 'Organisation Reference',
|
||||
"PV System": "Solar PV",
|
||||
}
|
||||
),
|
||||
wates_coordination[
|
||||
|
|
@ -3818,8 +3823,7 @@ def revised_model():
|
|||
"Name", "Postcode", 'SAP Band Pre', 'SAP Rating Pre', 'SAP Rating Install Package',
|
||||
'SAP Band Install Package', 'Package Approved (Client)',
|
||||
'Wall Insulation', 'Loft Insulation', 'Windows Upgrade', 'Ext. Doors Upgrade',
|
||||
'Ventilation', 'Heating', 'Other Measures', 'Asset ID_x'
|
||||
|
||||
'Ventilation', 'Heating', 'Other Measures', 'Asset ID_x', "PV System"
|
||||
] + retrofit_assessments_data_columns_prefixed
|
||||
].rename(
|
||||
columns={
|
||||
|
|
@ -3835,6 +3839,7 @@ def revised_model():
|
|||
'Heating': 'Main Heating',
|
||||
'Other Measures': 'Other measures',
|
||||
'Asset ID_x': 'Organisation Reference',
|
||||
"PV System": "Solar PV",
|
||||
}
|
||||
)
|
||||
]
|
||||
|
|
@ -3857,12 +3862,12 @@ def revised_model():
|
|||
|
||||
def find_nearest_matching_property(coordinated_packages, home):
|
||||
filter_levels = [
|
||||
(["Postcode", "Property Type", "Walls", "Roofs", "Heating", "Main Fuel", "Age"], 1),
|
||||
(["Postal Region", "Property Type", "Walls", "Roofs", "Heating", "Main Fuel", "Age"], 2),
|
||||
(["Property Type", "Walls", "Roofs", "Heating", "Main Fuel", "Age"], 3),
|
||||
(["Property Type", "Walls", "Roof Simple", "Heating", "Main Fuel", "Age"], 4),
|
||||
(["Primary Property Type", "Walls", "Roofs", "Heating", "Main Fuel", "Age"], 5),
|
||||
(["Primary Property Type", "Walls", "Roof Simple", "Heating", "Main Fuel", "Age"], 6),
|
||||
(["Postcode", "Property Type", "Walls", "Roofs", "Heating", "Main Fuel", "Age"], 2),
|
||||
(["Postal Region", "Property Type", "Walls", "Roofs", "Heating", "Main Fuel", "Age"], 3),
|
||||
(["Property Type", "Walls", "Roofs", "Heating", "Main Fuel", "Age"], 4),
|
||||
(["Property Type", "Walls", "Roof Simple", "Heating", "Main Fuel", "Age"], 5),
|
||||
(["Primary Property Type", "Walls", "Roofs", "Heating", "Main Fuel", "Age"], 6),
|
||||
(["Primary Property Type", "Walls", "Roof Simple", "Heating", "Main Fuel", "Age"], 7),
|
||||
]
|
||||
|
||||
max_confidence = max([confidence for (_, confidence) in filter_levels])
|
||||
|
|
@ -3911,12 +3916,13 @@ def revised_model():
|
|||
{
|
||||
"Organisation Reference": home["Organisation Reference"],
|
||||
"Best Match Organisation Reference": m,
|
||||
"match_confidence": 1,
|
||||
"Was Surveyed": True
|
||||
} for m in survey_result["Organisation Reference"].values
|
||||
]
|
||||
matches.extend(to_extend)
|
||||
continue
|
||||
blah
|
||||
|
||||
closest_match, match_confidence = find_nearest_matching_property(coordinated_packages, home)
|
||||
if closest_match is None:
|
||||
no_match.append(home["Organisation Reference"])
|
||||
|
|
@ -3926,6 +3932,7 @@ def revised_model():
|
|||
{
|
||||
"Organisation Reference": home["Organisation Reference"],
|
||||
"Best Match Organisation Reference": m,
|
||||
"match_confidence": match_confidence,
|
||||
"Was Surveyed": False
|
||||
} for m in closest_match["Organisation Reference"].values
|
||||
]
|
||||
|
|
@ -3953,10 +3960,29 @@ def revised_model():
|
|||
suffixes=("", " - Closest Match")
|
||||
)
|
||||
|
||||
measures_columns = [
|
||||
'Main Wall Insulation', 'Secondary Wall Insulation', 'Loft insulation',
|
||||
'Flat Roof', 'Room in Roof', 'Window Upgrade', 'Door Upgrade',
|
||||
'Ventilation', 'Main Heating', 'Water Heating', 'Heating Controls',
|
||||
'Solar PV', 'Other measures'
|
||||
]
|
||||
|
||||
# We want to aggregate the matches, when we have multiple
|
||||
aggregated_matches_df = []
|
||||
for org_ref, mapped_matches in matches_df.groupby("Organisation Reference"):
|
||||
|
||||
measures = coordinated_packages[
|
||||
(
|
||||
coordinated_packages["Organisation Reference"].isin(
|
||||
mapped_matches['Best Match Organisation Reference'].values
|
||||
)
|
||||
)
|
||||
][measures_columns]
|
||||
|
||||
if mapped_matches.shape[0] == 1:
|
||||
# Get the measures for this property
|
||||
measures = measures.squeeze()
|
||||
|
||||
aggregated_matches_df.append(
|
||||
{
|
||||
"Organisation Reference": org_ref,
|
||||
|
|
@ -3965,6 +3991,7 @@ def revised_model():
|
|||
"Estimated SAP Rating": mapped_matches["Survey: Current SAP Rating"].values[0],
|
||||
"Estimated EPC Rating": mapped_matches["Survey: Current EPC Band"].values[0],
|
||||
"Was Surveyed": mapped_matches["Was Surveyed"].values[0],
|
||||
**measures
|
||||
}
|
||||
)
|
||||
continue
|
||||
|
|
@ -3978,6 +4005,17 @@ def revised_model():
|
|||
mapped_matches[mapped_matches["Survey: Current EPC Band"] == average_epc_rating].shape[
|
||||
0] / number_of_matches * 100
|
||||
)
|
||||
|
||||
measures_aggregated = {}
|
||||
for m in measures_columns:
|
||||
if any(~pd.isnull(measures[m])):
|
||||
# Check if we have 2 unique values
|
||||
vals = measures[~pd.isnull(measures[m])][m].unique()
|
||||
if len(vals) > 1:
|
||||
measures_aggregated[m] = ", ".join(vals)
|
||||
else:
|
||||
measures_aggregated[m] = vals[0]
|
||||
|
||||
aggregated_matches_df.append(
|
||||
{
|
||||
"Organisation Reference": org_ref,
|
||||
|
|
@ -3985,7 +4023,8 @@ def revised_model():
|
|||
"Proportion": proportion_with_this_epc,
|
||||
"Estimated SAP Rating": average_rating,
|
||||
"Estimated EPC Rating": average_epc_rating,
|
||||
"Was Surveyed": False
|
||||
"Was Surveyed": False,
|
||||
**measures_aggregated
|
||||
}
|
||||
)
|
||||
|
||||
|
|
@ -4002,7 +4041,6 @@ def revised_model():
|
|||
def remove_leading_zero(address):
|
||||
return re.sub(r"^0([1-9]) ", r"\1 ", address)
|
||||
|
||||
# Example usage
|
||||
mapped_priority_list["address1"] = mapped_priority_list["address1"].apply(remove_leading_zero)
|
||||
mapped_priority_list["address1"] = np.where(
|
||||
mapped_priority_list["Organisation Reference"] == 37004,
|
||||
|
|
@ -4020,6 +4058,13 @@ def revised_model():
|
|||
)
|
||||
mapped_priority_list["row_id"] = mapped_priority_list["Organisation Reference"]
|
||||
|
||||
# Flag where 2 out of the three columns have consensus
|
||||
mapped_priority_list["2 of 3 Data Sources Have Consensus on EPC"] = (
|
||||
(mapped_priority_list["SAP Band"] == mapped_priority_list["EPC Band"]) |
|
||||
(mapped_priority_list["SAP Band"] == mapped_priority_list["Estimated EPC Rating"]) |
|
||||
(mapped_priority_list["EPC Band"] == mapped_priority_list["Estimated EPC Rating"])
|
||||
)
|
||||
|
||||
# Let's get the newest EPC data for these properties
|
||||
# We merge on UPRN, when we have it
|
||||
# from etl.route_march_data_pull.app import get_data
|
||||
|
|
@ -4081,6 +4126,7 @@ def revised_model():
|
|||
'Survey: Main Building Alternative Wall Dry-lining',
|
||||
'Survey: Main Building Alternative Wall Thickness',
|
||||
'Survey: Main Fuel',
|
||||
'Survey: Main Building Age Band',
|
||||
'Walls', 'Roofs', 'Heating', 'Main Fuel', 'Age', 'Property Type'
|
||||
]
|
||||
].rename(
|
||||
|
|
@ -4133,7 +4179,8 @@ def revised_model():
|
|||
[
|
||||
"Organisation Reference", 'Survey: Main Wall Type', 'Survey: Main Wall Insulation',
|
||||
'Survey: Main Roof Type', 'Survey: Main Roof Insulation', 'Survey: Main Roof Insulation Thickness',
|
||||
'Survey: Existing Primary Heating System',
|
||||
'Survey: Existing Primary Heating System', 'Survey: Main Building Age Band',
|
||||
'Survey: Main Building Wall Area (m2)',
|
||||
]
|
||||
].rename(
|
||||
columns={
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue