diff --git a/.idea/Model.iml b/.idea/Model.iml
index 4413bb06..b0f9c00d 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -7,7 +7,7 @@
-
+
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 3b05c6ac..ca0e1cd9 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -1,6 +1,6 @@
-
+
diff --git a/etl/epc/DataProcessor.py b/etl/epc/DataProcessor.py
index e9c84c3c..cb3de9f4 100644
--- a/etl/epc/DataProcessor.py
+++ b/etl/epc/DataProcessor.py
@@ -569,3 +569,31 @@ class DataProcessor:
df[col] = df[col].fillna("Unknown")
return df
+
+ @staticmethod
+ def clean_efficiency_variables(df):
+
+ """
+ These is scope to clean this by the model per corresponding description.
+ E.g. for WALLS_ENG_EFF we could look at the mode efficiency rating by description and
+ fill in the missing values with this.
+ When looking at this initially, there are a large volume of records with missing energy efficiency
+ values and therefore a simpler approach was taken just to test including these variables
+ :param df:
+ :return:
+ """
+
+ missings = pd.isnull(df).sum()
+ missings = missings[missings >= 1]
+
+ if len(missings) == 0:
+ return df
+
+ # Make sure they are all efficiency columns
+ if any(~missings.index.str.contains("ENERGY_EFF")):
+ raise ValueError("Non efficiency columns are missing")
+
+ for m in missings.index:
+ df[m] = df[m].fillna("NO_RATING")
+
+ return df
diff --git a/etl/epc/property_change_app.py b/etl/epc/property_change_app.py
index 067d7161..435b668d 100644
--- a/etl/epc/property_change_app.py
+++ b/etl/epc/property_change_app.py
@@ -12,6 +12,10 @@ from etl.epc.settings import (
HEAT_DEMAND_RESPONSE,
COLUMNS_TO_MERGE_ON,
CARBON_RESPONSE,
+ CORE_COMPONENT_FEATURES,
+ EFFICIENCY_FEATURES,
+ POTENTIAL_COLUMNS,
+ MINIMUM_FLOOR_HEIGHT
)
from etl.epc.DataProcessor import DataProcessor
from utils.s3 import save_dataframe_to_s3_parquet, read_from_s3
@@ -254,6 +258,13 @@ def make_uvalues(df):
# Roof
# ~~~~~~~~~~~~~~~~~~
+ if x["has_dwelling_above"]:
+ if x["roof_thermal_transmittance"] != 0:
+ raise ValueError("Should have 0 u-value for roof")
+
+ if x["roof_thermal_transmittance_ENDING"] != 0:
+ raise ValueError("Should have 0 u-value for roof")
+
starting_roof_uvalue = x["roof_thermal_transmittance"]
if pd.isnull(starting_roof_uvalue):
starting_roof_uvalue = get_roof_u_value(
@@ -297,6 +308,11 @@ def make_uvalues(df):
wall_type = get_wall_type(**x)
if x["another_property_below"]:
+ if x["floor_thermal_transmittance"] != 0:
+ raise ValueError("Should have 0 u-value for floor")
+
+ if x["floor_thermal_transmittance_ENDING"] != 0:
+ raise ValueError("Should have 0 u-value for floor")
starting_floor_uvalue, ending_floor_uvalue = 0, 0
else:
starting_floor_uvalue = x["floor_thermal_transmittance"]
@@ -363,6 +379,25 @@ def make_uvalues(df):
return df
+def compare_records(earliest_record: pd.Series, latest_record: pd.Series, columns: list):
+ """
+ For a list of columns, check if the earliest and latest record are the same
+ If they are the same, we indicate this, because we have example of SAP scores changing
+ without any feature changes
+ :param earliest_record: pd.Series
+ :param latest_record: pd.Series
+ :param columns: list of columns to compare
+ :return: boolean indicating whether or not all features are the same
+ """
+
+ all_equal = True
+ for col in columns:
+ if earliest_record[col] != latest_record[col]:
+ return False
+ if all_equal:
+ return True
+
+
def app():
# Get all the files in the directory
@@ -376,6 +411,8 @@ def app():
dataset = []
cleaning_dataset = []
+ # Keep track of the all equals
+ all_equal_rows = []
for directory in tqdm(directories):
@@ -422,7 +459,9 @@ def app():
# We include the lodgement date here as we probably need to factor time into the
# model, since EPC standards and rigour have changed over time
variable_data = property_data[
- COMPONENT_FEATURES + ["LODGEMENT_DATE", RDSAP_RESPONSE, HEAT_DEMAND_RESPONSE, CARBON_RESPONSE]
+ COMPONENT_FEATURES + EFFICIENCY_FEATURES + POTENTIAL_COLUMNS + [
+ "LODGEMENT_DATE", RDSAP_RESPONSE, HEAT_DEMAND_RESPONSE, CARBON_RESPONSE
+ ]
]
# Note: we look at changes between subsequent EPCS, however we could look at other permutations
@@ -439,6 +478,8 @@ def app():
# Check if the sap gets better or worse
gets_better = earliest_record[RDSAP_RESPONSE] <= latest_record[RDSAP_RESPONSE]
+ component_variables = COMPONENT_FEATURES + EFFICIENCY_FEATURES
+
if gets_better:
starting_sap = earliest_record[RDSAP_RESPONSE]
starting_heat_demand = earliest_record[HEAT_DEMAND_RESPONSE]
@@ -452,8 +493,8 @@ def app():
heat_demand_change = latest_record[HEAT_DEMAND_RESPONSE] - starting_heat_demand
carbon_change = latest_record[CARBON_RESPONSE] - starting_carbon
- starting_record = earliest_record[COMPONENT_FEATURES + ["LODGEMENT_DATE"]].add_suffix("_STARTING")
- ending_record = latest_record[COMPONENT_FEATURES + ["LODGEMENT_DATE"]].add_suffix("_ENDING")
+ starting_record = earliest_record[component_variables + ["LODGEMENT_DATE"]].add_suffix("_STARTING")
+ ending_record = latest_record[component_variables + ["LODGEMENT_DATE"]].add_suffix("_ENDING")
else:
starting_sap = latest_record[RDSAP_RESPONSE]
starting_heat_demand = latest_record[HEAT_DEMAND_RESPONSE]
@@ -467,12 +508,23 @@ def app():
heat_demand_change = earliest_record[HEAT_DEMAND_RESPONSE] - starting_heat_demand
carbon_change = earliest_record[CARBON_RESPONSE] - starting_carbon
- starting_record = latest_record[COMPONENT_FEATURES + ["LODGEMENT_DATE"]].add_suffix("_STARTING")
- ending_record = earliest_record[COMPONENT_FEATURES + ["LODGEMENT_DATE"]].add_suffix("_ENDING")
+ starting_record = latest_record[component_variables + ["LODGEMENT_DATE"]].add_suffix("_STARTING")
+ ending_record = earliest_record[component_variables + ["LODGEMENT_DATE"]].add_suffix("_ENDING")
if rdsap_change == 0:
continue
+ all_equal = compare_records(
+ earliest_record=earliest_record,
+ latest_record=latest_record,
+ columns=CORE_COMPONENT_FEATURES
+ )
+
+ if all_equal:
+ # Keep track of this for the moment so we can analyse
+ all_equal_rows.append({"uprn": uprn, "directory_name": directory.name})
+ continue
+
features = pd.concat([starting_record, ending_record])
property_model_data.append(
@@ -487,6 +539,10 @@ def app():
"HEAT_DEMAND_ENDING": ending_heat_demand,
"CARBON_STARTING": starting_carbon,
"CARBON_ENDING": ending_carbon,
+ "POTENTIAL_ENERGY_EFFICIENCY": earliest_record["POTENTIAL_ENERGY_EFFICIENCY"],
+ "ENVIRONMENT_IMPACT_POTENTIAL": earliest_record["ENVIRONMENT_IMPACT_POTENTIAL"],
+ "ENERGY_CONSUMPTION_POTENTIAL": earliest_record["ENERGY_CONSUMPTION_POTENTIAL"],
+ "CO2_EMISSIONS_POTENTIAL": earliest_record["CO2_EMISSIONS_POTENTIAL"],
**fixed_data,
**features.to_dict(),
}
@@ -496,8 +552,6 @@ def app():
data_by_urpn_df = pd.DataFrame(data_by_urpn)
- # Add some temporal features - we look at the days from the standard starting point in time
- # for the starting and ending date so all records are from a fixed point
data_by_urpn_df["DAYS_TO_STARTING"] = DataProcessor.calculate_days_to(
data_by_urpn_df["LODGEMENT_DATE_STARTING"]
)
@@ -508,6 +562,8 @@ def app():
data_by_urpn_df = data_by_urpn_df.drop(columns=["LODGEMENT_DATE_STARTING", "LODGEMENT_DATE_ENDING"])
+ data_by_urpn_df = DataProcessor.clean_efficiency_variables(data_by_urpn_df)
+
# We look for key building fabric features that have changed from one EPC to the next.
# if, for example, we see that a home has gone from being a cavity wall to a solid wall, we
# remove this record, as it indicates that the quality of the EPC conducted in the first instance
@@ -541,6 +597,8 @@ def app():
cleaning_averages["LOCAL_AUTHORITY"] = df["LOCAL_AUTHORITY"].values[0]
cleaning_dataset.append(cleaning_averages)
+ print("Final all equal count: %s" % str(len(all_equal_rows)))
+
# Store cleaning dataset in s3 as a parquet file
cleaning_dataset = pd.concat(cleaning_dataset)
save_dataframe_to_s3_parquet(
@@ -567,6 +625,14 @@ def app():
file_key="sap_change_model/dataset.parquet",
)
+ # Store all_equal_rows
+ all_equal_rows = pd.DataFrame(all_equal_rows)
+ save_dataframe_to_s3_parquet(
+ df=all_equal_rows,
+ bucket_name="retrofit-data-dev",
+ file_key="sap_change_model/all_equal_rows.parquet",
+ )
+
if __name__ == "__main__":
app()
diff --git a/etl/epc/settings.py b/etl/epc/settings.py
index fb8e464d..93b8929b 100644
--- a/etl/epc/settings.py
+++ b/etl/epc/settings.py
@@ -85,8 +85,7 @@ FIXED_FEATURES = [
"FIXED_LIGHTING_OUTLETS_COUNT",
]
-COMPONENT_FEATURES = [
- "TRANSACTION_TYPE",
+CORE_COMPONENT_FEATURES = [
"WALLS_DESCRIPTION",
"FLOOR_DESCRIPTION",
"LIGHTING_DESCRIPTION",
@@ -96,21 +95,49 @@ COMPONENT_FEATURES = [
"MAIN_FUEL",
"MECHANICAL_VENTILATION",
"SECONDHEAT_DESCRIPTION",
- "ENERGY_TARIFF", # Not sure if this is relevant
- "SOLAR_WATER_HEATING_FLAG",
- "PHOTO_SUPPLY",
"WINDOWS_DESCRIPTION",
"GLAZED_TYPE",
"MULTI_GLAZE_PROPORTION",
"LOW_ENERGY_LIGHTING",
"NUMBER_OPEN_FIREPLACES",
"MAINHEATCONT_DESCRIPTION",
+ "SOLAR_WATER_HEATING_FLAG",
+ "PHOTO_SUPPLY",
+]
+
+EFFICIENCY_FEATURES = [
+ 'HOT_WATER_ENERGY_EFF',
+ 'FLOOR_ENERGY_EFF',
+ 'WINDOWS_ENERGY_EFF',
+ 'WALLS_ENERGY_EFF',
+ 'SHEATING_ENERGY_EFF',
+ 'ROOF_ENERGY_EFF',
+ 'MAINHEAT_ENERGY_EFF',
+ 'MAINHEATC_ENERGY_EFF',
+ 'LIGHTING_ENERGY_EFF'
+]
+
+COMPONENT_FEATURES = CORE_COMPONENT_FEATURES + [
+ "TRANSACTION_TYPE",
+ "ENERGY_TARIFF", # Not sure if this is relevant
"EXTENSION_COUNT",
"TOTAL_FLOOR_AREA",
"FLOOR_HEIGHT",
# 'GLAZED_AREA', # May not need this since we have MULTI_GLAZE_PROPORTION
]
+POTENTIAL_COLUMNS = [
+ 'POTENTIAL_ENERGY_RATING',
+ 'POTENTIAL_ENERGY_EFFICIENCY',
+ 'ENVIRONMENT_IMPACT_POTENTIAL',
+ 'ENERGY_CONSUMPTION_POTENTIAL',
+ 'CO2_EMISSIONS_POTENTIAL',
+ # We don't include cost features for the moment
+ # 'LIGHTING_COST_POTENTIAL',
+ # 'HEATING_COST_POTENTIAL',
+ # 'HOT_WATER_COST_POTENTIAL'
+]
+
# For these fields, we take the latest value if we have multiple values
# Since more recent EPCs have been conducted with more rigour, we assume that the latest value is
# the most accurate
@@ -253,3 +280,7 @@ ENDING_SUFFIX_COMPONENT_COLS = [
'rate_control', 'glazing_type', 'fuel_type', 'main-fuel_tariff_type', 'is_community',
'no_individual_heating_or_community_network', 'complex_fuel_type', 'estimated_perimeter'
]
+
+# We found that without performing any filtering, the bottom 0.5% of homes had a floor height of 1.65m. We'll therefore
+# filter out any homes with a floor height below this
+MINIMUM_FLOOR_HEIGHT = 1.65
diff --git a/etl/epc_clean/epc_attributes/FloorAttributes.py b/etl/epc_clean/epc_attributes/FloorAttributes.py
index 6631b4d5..245a91bc 100644
--- a/etl/epc_clean/epc_attributes/FloorAttributes.py
+++ b/etl/epc_clean/epc_attributes/FloorAttributes.py
@@ -107,4 +107,8 @@ class FloorAttributes(Definitions):
else:
result['insulation_thickness'] = None
+ if result["another_property_below"]:
+ result["thermal_transmittance"] = 0
+ result["thermal_transmittance_unit"] = 'w/m-¦k'
+
return result
diff --git a/etl/epc_clean/epc_attributes/RoofAttributes.py b/etl/epc_clean/epc_attributes/RoofAttributes.py
index 9e400235..ed2b4d07 100644
--- a/etl/epc_clean/epc_attributes/RoofAttributes.py
+++ b/etl/epc_clean/epc_attributes/RoofAttributes.py
@@ -138,4 +138,8 @@ class RoofAttributes(Definitions):
if "insulation_thickness" not in result:
result['insulation_thickness'] = None
+ if result["has_dwelling_above"]:
+ result["thermal_transmittance"] = 0
+ result["thermal_transmittance_unit"] = 'w/m-¦k'
+
return result
diff --git a/etl/epc_clean/epc_attributes/WallAttributes.py b/etl/epc_clean/epc_attributes/WallAttributes.py
index 03fe6d67..40a5d5db 100644
--- a/etl/epc_clean/epc_attributes/WallAttributes.py
+++ b/etl/epc_clean/epc_attributes/WallAttributes.py
@@ -133,4 +133,13 @@ class WallAttributes(Definitions):
result['external_insulation'] = 'external insulation' in description
result['internal_insulation'] = 'internal insulation' in description
+ if result["is_filled_cavity"]:
+ # If it has a filled cavity + internal/external insulation, it's deemed to have above average insulation
+ if result["external_insulation"]:
+ result["insulation_thickness"] = "above average"
+ elif result["internal_insulation"]:
+ result["insulation_thickness"] = "above average"
+ else:
+ result["insulation_thickness"] = "average"
+
return result
diff --git a/etl/epc_clean/tests/test_data/test_floor_attributes_cases.py b/etl/epc_clean/tests/test_data/test_floor_attributes_cases.py
index 5738f77f..280e7459 100644
--- a/etl/epc_clean/tests/test_data/test_floor_attributes_cases.py
+++ b/etl/epc_clean/tests/test_data/test_floor_attributes_cases.py
@@ -1,14 +1,14 @@
clean_floor_cases = [
- {'original_description': '(another dwelling below)', 'thermal_transmittance': None,
- 'thermal_transmittance_unit': None, 'is_assumed': False, 'is_to_unheated_space': False,
+ {'original_description': '(another dwelling below)', 'thermal_transmittance': 0,
+ 'thermal_transmittance_unit': "w/m-¦k", 'is_assumed': False, 'is_to_unheated_space': False,
'is_to_external_air': False, 'is_suspended': False, 'is_solid': False, 'insulation_thickness': None,
"another_property_below": True},
- {'original_description': '(anheddiad arall islaw)', 'thermal_transmittance': None,
- 'thermal_transmittance_unit': None, 'is_assumed': False, 'is_to_unheated_space': False,
+ {'original_description': '(anheddiad arall islaw)', 'thermal_transmittance': 0,
+ 'thermal_transmittance_unit': "w/m-¦k", 'is_assumed': False, 'is_to_unheated_space': False,
'is_to_external_air': False, 'is_suspended': False, 'is_solid': False, 'insulation_thickness': None,
"another_property_below": True},
- {'original_description': '(other premises below)', 'thermal_transmittance': None,
- 'thermal_transmittance_unit': None,
+ {'original_description': '(other premises below)', 'thermal_transmittance': 0,
+ 'thermal_transmittance_unit': "w/m-¦k",
'is_assumed': False, 'is_to_unheated_space': False, 'is_to_external_air': False, 'is_suspended': False,
'is_solid': False, 'insulation_thickness': None,
"another_property_below": True},
@@ -342,8 +342,8 @@ clean_floor_cases = [
{'original_description': 'To unheated space, no insulation (assumed)', 'thermal_transmittance': None,
'thermal_transmittance_unit': None, 'is_assumed': True, 'is_to_unheated_space': True, 'is_to_external_air': False,
'is_suspended': False, 'is_solid': False, 'insulation_thickness': 'none', "another_property_below": False},
- {'original_description': '(eiddo arall islaw)', 'thermal_transmittance': None,
- 'thermal_transmittance_unit': None,
+ {'original_description': '(eiddo arall islaw)', 'thermal_transmittance': 0,
+ 'thermal_transmittance_unit': "w/m-¦k",
'is_assumed': False, 'is_to_unheated_space': False, 'is_to_external_air': False, 'is_suspended': False,
'is_solid': False, 'insulation_thickness': None,
"another_property_below": True},
diff --git a/etl/epc_clean/tests/test_data/test_roof_attributes_cases.py b/etl/epc_clean/tests/test_data/test_roof_attributes_cases.py
index ee7f865b..6b719afd 100644
--- a/etl/epc_clean/tests/test_data/test_roof_attributes_cases.py
+++ b/etl/epc_clean/tests/test_data/test_roof_attributes_cases.py
@@ -1,10 +1,11 @@
clean_roof_test_cases = [
- {'original_description': '(another dwelling above)', 'thermal_transmittance': None,
- 'thermal_transmittance_unit': None, 'is_pitched': False, 'is_roof_room': False, 'is_loft': False, 'is_flat': False,
+ {'original_description': '(another dwelling above)', 'thermal_transmittance': 0,
+ 'thermal_transmittance_unit': "w/m-¦k", 'is_pitched': False, 'is_roof_room': False, 'is_loft': False,
+ 'is_flat': False,
'is_thatched': False, 'is_at_rafters': False, 'is_assumed': False, 'has_dwelling_above': True, 'is_valid': True,
'insulation_thickness': None},
- {'original_description': '(other premises above)', 'thermal_transmittance': None,
- 'thermal_transmittance_unit': None,
+ {'original_description': '(other premises above)', 'thermal_transmittance': 0,
+ 'thermal_transmittance_unit': "w/m-¦k",
'is_pitched': False, 'is_roof_room': False, 'is_loft': False, 'is_flat': False, 'is_thatched': False,
'is_at_rafters': False, 'is_assumed': False, 'has_dwelling_above': True, 'is_valid': True,
'insulation_thickness': None},
@@ -362,8 +363,9 @@ clean_roof_test_cases = [
'thermal_transmittance_unit': None, 'is_pitched': True, 'is_roof_room': False, 'is_loft': False, 'is_flat': False,
'is_thatched': False, 'is_at_rafters': False, 'is_assumed': True, 'has_dwelling_above': False, 'is_valid': True,
'insulation_thickness': 'average'},
- {'original_description': '(eiddo arall uwchben)', 'thermal_transmittance': None,
- 'thermal_transmittance_unit': None, 'is_pitched': False, 'is_roof_room': False, 'is_loft': False, 'is_flat': False,
+ {'original_description': '(eiddo arall uwchben)', 'thermal_transmittance': 0,
+ 'thermal_transmittance_unit': "w/m-¦k", 'is_pitched': False, 'is_roof_room': False, 'is_loft': False,
+ 'is_flat': False,
'is_thatched': False, 'is_at_rafters': False, 'is_assumed': False, 'has_dwelling_above': True, 'is_valid': True,
'insulation_thickness': None},
{'original_description': 'Ar oleddf, inswleiddio cyfyngedig (rhagdybiaeth)', 'thermal_transmittance': None,
diff --git a/etl/epc_clean/tests/test_data/test_wall_attributes_cases.py b/etl/epc_clean/tests/test_data/test_wall_attributes_cases.py
index 40d6fb9c..300702a7 100644
--- a/etl/epc_clean/tests/test_data/test_wall_attributes_cases.py
+++ b/etl/epc_clean/tests/test_data/test_wall_attributes_cases.py
@@ -567,17 +567,17 @@ wall_cases = [
{'original_description': 'Cavity wall, filled cavity', 'thermal_transmittance': None,
'thermal_transmittance_unit': None, 'is_cavity_wall': True, 'is_filled_cavity': True, 'is_solid_brick': False,
'is_system_built': False, 'is_timber_frame': False, 'is_granite_or_whinstone': False, 'is_as_built': False,
- 'is_cob': False, 'is_assumed': False, 'is_sandstone_or_limestone': False, 'insulation_thickness': None,
+ 'is_cob': False, 'is_assumed': False, 'is_sandstone_or_limestone': False, 'insulation_thickness': "average",
'external_insulation': False, 'internal_insulation': False},
{'original_description': 'Cavity wall, filled cavity and external insulation', 'thermal_transmittance': None,
'thermal_transmittance_unit': None, 'is_cavity_wall': True, 'is_filled_cavity': True, 'is_solid_brick': False,
'is_system_built': False, 'is_timber_frame': False, 'is_granite_or_whinstone': False, 'is_as_built': False,
- 'is_cob': False, 'is_assumed': False, 'is_sandstone_or_limestone': False, 'insulation_thickness': 'average',
+ 'is_cob': False, 'is_assumed': False, 'is_sandstone_or_limestone': False, 'insulation_thickness': 'above average',
'external_insulation': True, 'internal_insulation': False},
{'original_description': 'Cavity wall, filled cavity and internal insulation', 'thermal_transmittance': None,
'thermal_transmittance_unit': None, 'is_cavity_wall': True, 'is_filled_cavity': True, 'is_solid_brick': False,
'is_system_built': False, 'is_timber_frame': False, 'is_granite_or_whinstone': False, 'is_as_built': False,
- 'is_cob': False, 'is_assumed': False, 'is_sandstone_or_limestone': False, 'insulation_thickness': 'average',
+ 'is_cob': False, 'is_assumed': False, 'is_sandstone_or_limestone': False, 'insulation_thickness': 'above average',
'external_insulation': False, 'internal_insulation': True},
{'original_description': 'Cavity wall, with external insulation', 'thermal_transmittance': None,
'thermal_transmittance_unit': None, 'is_cavity_wall': True, 'is_filled_cavity': False, 'is_solid_brick': False,
@@ -723,7 +723,7 @@ wall_cases = [
{'original_description': 'Waliau ceudod, ceudod wediGÇÖi lenwi', 'thermal_transmittance': None,
'thermal_transmittance_unit': None, 'is_cavity_wall': True, 'is_filled_cavity': True, 'is_solid_brick': False,
'is_system_built': False, 'is_timber_frame': False, 'is_granite_or_whinstone': False, 'is_as_built': False,
- 'is_cob': False, 'is_assumed': False, 'is_sandstone_or_limestone': False, 'insulation_thickness': None,
+ 'is_cob': False, 'is_assumed': False, 'is_sandstone_or_limestone': False, 'insulation_thickness': "average",
'external_insulation': False, 'internal_insulation': False},
{'original_description': 'Waliau ceudod, fel yGÇÖu hadeiladwyd, wediGÇÖu hinswleiddio (rhagdybiaeth)',
'thermal_transmittance': None,
@@ -778,7 +778,7 @@ wall_cases = [
{'original_description': 'Waliau ceudod, ynysydd allanol a llenwi ceudod', 'thermal_transmittance': None,
'thermal_transmittance_unit': None, 'is_cavity_wall': True, 'is_filled_cavity': True, 'is_solid_brick': False,
'is_system_built': False, 'is_timber_frame': False, 'is_granite_or_whinstone': False, 'is_as_built': False,
- 'is_cob': False, 'is_assumed': False, 'is_sandstone_or_limestone': False, 'insulation_thickness': 'average',
+ 'is_cob': False, 'is_assumed': False, 'is_sandstone_or_limestone': False, 'insulation_thickness': 'above average',
'external_insulation': True, 'internal_insulation': False},
{'original_description': 'Gwenithfaen neu risgraig, gydag inswleiddio mewnol', 'thermal_transmittance': None,
'thermal_transmittance_unit': None, 'is_cavity_wall': False, 'is_filled_cavity': False, 'is_solid_brick': False,
diff --git a/etl/epc_clean/tests/test_roof_attributes.py b/etl/epc_clean/tests/test_roof_attributes.py
index 54b59f1a..b0663a3e 100644
--- a/etl/epc_clean/tests/test_roof_attributes.py
+++ b/etl/epc_clean/tests/test_roof_attributes.py
@@ -75,8 +75,8 @@ class TestRoofAttributes:
"is_assumed": False,
"is_flat": False,
"is_thatched": False,
- "thermal_transmittance": None,
- "thermal_transmittance_unit": None,
+ "thermal_transmittance": 0,
+ "thermal_transmittance_unit": "w/m-¦k",
}
for k in expected_output: