attempting to debug missing walls description

This commit is contained in:
Khalim Conn-Kowlessar 2025-12-14 23:14:11 +08:00
parent 8b7c6086ae
commit f5fa97e179
4 changed files with 37 additions and 44 deletions

View file

@ -611,7 +611,10 @@ class Property:
if self.data[description] in self.DATA_ANOMALY_MATCHES:
template = cleaned[description][0]
fill_dict = dict(zip(template.keys(), [None] * len(template)))
# Handling edge case for walls
fill_with = False if description == "walls-description" else None
fill_dict = dict(zip(template.keys(), [fill_with] * len(template)))
fill_dict.update(
{
"original_description": self.data[description],

View file

@ -498,56 +498,43 @@ class TrainingDataset(BaseDataset):
Drop properties that have inconsistent data, i.e. changing material types
"""
starting_and_finishing_null = (
expanded_df["original_description"].isin([None, ""]) &
expanded_df["original_description_ending"].isin([None, ""])
)
if component == "walls":
expanded_df = expanded_df[
(expanded_df["is_cavity_wall"] == expanded_df["is_cavity_wall_ending"])
& (
expanded_df["is_solid_brick"]
== expanded_df["is_solid_brick_ending"]
)
& (
expanded_df["is_timber_frame"]
== expanded_df["is_timber_frame_ending"]
)
& (
expanded_df["is_granite_or_whinstone"]
== expanded_df["is_granite_or_whinstone_ending"]
)
& (expanded_df["is_cob"] == expanded_df["is_cob_ending"])
& (
expanded_df["is_sandstone_or_limestone"]
== expanded_df["is_sandstone_or_limestone_ending"]
starting_and_finishing_null | (
(expanded_df["is_cavity_wall"] == expanded_df["is_cavity_wall_ending"])
& (expanded_df["is_solid_brick"] == expanded_df["is_solid_brick_ending"])
& (expanded_df["is_timber_frame"] == expanded_df["is_timber_frame_ending"])
& (expanded_df["is_granite_or_whinstone"] == expanded_df["is_granite_or_whinstone_ending"])
& (expanded_df["is_cob"] == expanded_df["is_cob_ending"])
& (expanded_df["is_sandstone_or_limestone"] == expanded_df["is_sandstone_or_limestone_ending"])
)
]
elif component == "floor":
expanded_df = expanded_df[
(expanded_df["is_suspended"] == expanded_df["is_suspended_ending"])
& (expanded_df["is_solid"] == expanded_df["is_solid_ending"])
& (
expanded_df["another_property_below"]
== expanded_df["another_property_below_ending"]
)
& (
expanded_df["is_to_unheated_space"]
== expanded_df["is_to_unheated_space_ending"]
)
& (
expanded_df["is_to_external_air"]
== expanded_df["is_to_external_air_ending"]
starting_and_finishing_null | (
(expanded_df["is_suspended"] == expanded_df["is_suspended_ending"])
& (expanded_df["is_solid"] == expanded_df["is_solid_ending"])
& (expanded_df["another_property_below"] == expanded_df["another_property_below_ending"])
& (expanded_df["is_to_unheated_space"] == expanded_df["is_to_unheated_space_ending"])
& (expanded_df["is_to_external_air"] == expanded_df["is_to_external_air_ending"])
)
]
elif component == "roof":
expanded_df = expanded_df[
(expanded_df["is_pitched"] == expanded_df["is_pitched_ending"])
& (expanded_df["is_roof_room"] == expanded_df["is_roof_room_ending"])
& (expanded_df["is_loft"] == expanded_df["is_loft_ending"])
& (expanded_df["is_flat"] == expanded_df["is_flat_ending"])
& (expanded_df["is_thatched"] == expanded_df["is_thatched_ending"])
& (expanded_df["is_at_rafters"] == expanded_df["is_at_rafters_ending"])
& (
expanded_df["has_dwelling_above"]
== expanded_df["has_dwelling_above_ending"]
starting_and_finishing_null | (
(expanded_df["is_pitched"] == expanded_df["is_pitched_ending"])
& (expanded_df["is_roof_room"] == expanded_df["is_roof_room_ending"])
& (expanded_df["is_loft"] == expanded_df["is_loft_ending"])
& (expanded_df["is_flat"] == expanded_df["is_flat_ending"])
& (expanded_df["is_thatched"] == expanded_df["is_thatched_ending"])
& (expanded_df["is_at_rafters"] == expanded_df["is_at_rafters_ending"])
& (expanded_df["has_dwelling_above"] == expanded_df["has_dwelling_above_ending"])
)
]
@ -677,7 +664,6 @@ class TrainingDataset(BaseDataset):
}
for component in components_to_expand:
# TODO: change cleaned dataframe to have underscores instead of dashes
if component == "main-fuel":
cleaned_key = "main-fuel"
left_on_starting = "main_fuel_starting"

View file

@ -163,9 +163,8 @@ class WallRecommendations(Definitions):
if (
(insulation_thickness in ["average", "above average"])
or self.property.walls["is_filled_cavity"]
) and (
"cavity_extract_and_refill"
not in measures
or self.property.walls["clean_description"] is None
) and ("cavity_extract_and_refill" not in measures
):
return

View file

@ -201,6 +201,11 @@ def get_wall_u_value(
)
)
else:
# Handle rare edge case
if clean_description == "":
return 0
mapped_description = epc_wall_description_map[clean_description]
mapped_value = wall_uvalues_df[