diff --git a/etl/epc/Record.py b/etl/epc/Record.py index 0c420399..eb462850 100644 --- a/etl/epc/Record.py +++ b/etl/epc/Record.py @@ -102,11 +102,26 @@ CLEANING_RULES: dict[str, CleaningRule] = { "anomaly_to": None, }, + "number-open-fireplaces": { + "cast": int, + "default": 0 + }, + "extension-count": { "cast": int, "default": 0 }, + "flat-storey-count": { + "cast": int, + "anomaly_to": None + }, + + "number-habitable-rooms": { + "cast": int, + "anomaly_to": None + }, + # ----------------------------- # TO NONE # ----------------------------- @@ -410,7 +425,10 @@ class EPCRecord: if "cast" in rule and value is not None: try: - self._prepared_epc[field] = rule["cast"](value) + if rule["cast"] is int: + self._prepared_epc[field] = int(float(value)) + else: + self._prepared_epc[field] = rule["cast"](value) except Exception as e: logger.warning( f"Failed casting field '{field}' value '{value}': {e}" @@ -581,7 +599,6 @@ class EPCRecord: self._clean_built_form() self._clean_solar_hot_water() - self._clean_count_variables() self._clean_heat_loss_corridor() self._clean_age_band() self._clean_year_built() @@ -867,34 +884,6 @@ class EPCRecord: self._prepared_epc["heat-loss-corridor"] ] - def _clean_count_variables(self) -> None: - """ - This method will clean the count variables, if empty or invalid - """ - if not self._prepared_epc: - raise ValueError("EPC Recrod doesn not contain epc data") - - _fields = [ - "number-open-fireplaces", - "extension-count", - "flat-storey-count", - "number-habitable-rooms", - ] - - null_attributes = ["flat-storey-count", "number-habitable-rooms"] - - for attribute in _fields: - value = self._prepared_epc[attribute] - if value in DATA_ANOMALY_MATCHES or pd.isnull(value): - if attribute in null_attributes: - value = None - else: - value = 0 - else: - value = int(float(value)) - - self._prepared_epc[attribute] = value - def _clean_solar_hot_water(self) -> None: """ This method will clean the solar hot water, if empty or invalid