patching roof description in cleaned further

This commit is contained in:
Khalim Conn-Kowlessar 2024-02-27 13:12:54 +00:00
parent 7b080094fd
commit 3ef346b248
2 changed files with 87 additions and 1 deletions

View file

@ -1755,7 +1755,16 @@ def patch_cleaned(cleaned):
]
)
# We treat unknown loft insulation as no insulation
cleaned["roof-description"].extend(
[
{'original_description': 'Pitched, Unknown loft insulation', 'clean_description': 'Pitched, no insulation',
'thermal_transmittance': None, 'thermal_transmittance_unit': None, 'is_pitched': True,
'is_roof_room': False,
'is_loft': False, 'is_flat': False, 'is_thatched': False, 'is_at_rafters': False, 'is_assumed': True,
'has_dwelling_above': False, 'is_valid': True, 'insulation_thickness': 'none'}
]
)
cleaned["roof-description"].extend(
[
{'original_description': 'Pitched, Unknown loft insulation', 'clean_description': 'Pitched, no insulation',
@ -1777,6 +1786,55 @@ def patch_cleaned(cleaned):
]
)
thermal_transmittance_values = list(np.arange(0, 2, 0.01))
for ttv in thermal_transmittance_values:
ttv_roundeded = round(ttv, 2)
# We look for an instance of that thermal transmittance value
rec = [
x for x in cleaned["roof-description"] if
(x["thermal_transmittance"] == ttv_roundeded) and "Average thermal transmittance" in x["clean_description"]
]
if rec:
continue
else:
# We patch the record
cleaned["roof-description"].extend(
[{'original_description': f'Average thermal transmittance {ttv_roundeded} W/m-¦K',
'clean_description': f'Average thermal transmittance {ttv_roundeded} w/m-¦k',
'thermal_transmittance': ttv_roundeded,
'thermal_transmittance_unit': 'w/m-¦k', 'is_pitched': False, 'is_roof_room': False, 'is_loft': False,
'is_flat': False, 'is_thatched': False, 'is_at_rafters': False, 'is_assumed': False,
'has_dwelling_above': False, 'is_valid': True, 'insulation_thickness': None}]
)
# We also patch a funny unit value we found
for ttv in thermal_transmittance_values:
ttv_rounded = round(ttv, 2)
# We look for an instance of that thermal transmittance value
rec = [
x for x in cleaned["roof-description"] if
(x["thermal_transmittance"] == ttv_rounded) and "Average thermal transmittance" in x["clean_description"]
and x["thermal_transmittance_unit"] == "w/m?K"
]
if rec:
continue
else:
# We patch the record
ttv_string = str(ttv_rounded)
if len(ttv_string) == 3:
ttv_string = f"{ttv_string}0"
cleaned["roof-description"].extend(
[{'original_description': f'Average thermal transmittance {ttv_string} W/m?K',
'clean_description': f'Average thermal transmittance {ttv_string} w/m-¦k',
'thermal_transmittance': ttv_rounded,
'thermal_transmittance_unit': 'w/m-¦k', 'is_pitched': False, 'is_roof_room': False, 'is_loft': False,
'is_flat': False, 'is_thatched': False, 'is_at_rafters': False, 'is_assumed': False,
'has_dwelling_above': False, 'is_valid': True, 'insulation_thickness': None}]
)
# Patch mainheatcont-description
cleaned["mainheatcont-description"].extend(
[

View file

@ -658,6 +658,34 @@ class TrainingDataset(BaseDataset):
components_to_expand = cols_to_drop.keys()
for comp in list(components_to_expand):
if comp == "main-fuel":
cleaned_key = "main-fuel"
left_on_starting = "main_fuel_starting"
left_on_ending = "main_fuel_ending"
original_cols = ["main_fuel_starting", "main_fuel_ending"]
else:
cleaned_key = f"{comp}-description"
left_on_starting = f"{comp}_description_starting"
left_on_ending = f"{comp}_description_ending"
original_cols = [
f"{comp}_description_starting",
f"{comp}_description_ending",
]
df = pd.DataFrame(cleaned_lookup[cleaned_key])
# Check for the existence
filtered_1 = df[df["original_description"] == self.df[left_on_starting].values[0]]
filtered_2 = df[df["original_description"] == self.df[left_on_ending].values[0]]
if filtered_1.empty:
print(comp)
print(self.df[left_on_starting].values[0])
if filtered_2.empty:
print(f"Original description {self.df[left_on_ending].values[0]} not found in lookup")
z = pd.DataFrame(cleaned_lookup["roof-description"])
z[z["original_description"] == "Average thermal transmittance 0.20 W/m?K"]
for component in components_to_expand:
# TODO: change cleaned dataframe to have underscores instead of dashes
if component == "main-fuel":