Model/epc_data/EpcClean.py
2023-06-08 16:46:52 +01:00

61 lines
1.8 KiB
Python

from typing import List, Dict, Any
from collections import Counter
from epc_data.cleaning.Roof import CleanRoof
class EpcClean:
"""
Container for methods which we utilise for cleaning EPC data
"""
CLEANING_FIELDS: List[str] = [
"roof-description",
"floor-description",
"walls-description",
"mainheat-description"
]
def __init__(self, data: List[Dict[str, Any]]) -> None:
"""
EpcClean constructor.
:param data: List of dictionaries containing EPC data.
"""
self.data: List[Dict[str, Any]] = data
self.unique_vals: Dict[str, Any] = {}
self.cleaned: Dict[str, List[Any]] = {}
def clean(self) -> None:
"""
Cleans the EPC data, mapping text fields to property attributes.
"""
self._init_empty_cleaned_obj()
for field in self.CLEANING_FIELDS:
self.unique_vals[field] = Counter([v[field] for v in self.data])
self.clean_roof()
# for description in self.unique_vals["floor-description"].keys():
# self.cleaned["floor-description"].append(
# {
# "original_description": description,
# **self.clean_floor(description)
# }
# )
def _init_empty_cleaned_obj(self) -> None:
"""
Initializes an empty object for cleaned data.
"""
self.cleaned = {field: [] for field in self.CLEANING_FIELDS}
def clean_roof(self):
for description in self.unique_vals["roof-description"].keys():
self.cleaned["roof-description"].append(
{
"original_description": description,
**CleanRoof(description).clean()
}
)