mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
127 lines
4.8 KiB
Python
127 lines
4.8 KiB
Python
from typing import List, Dict, Any
|
|
from collections import Counter
|
|
from collections import defaultdict
|
|
|
|
from model_data.utils import correct_spelling
|
|
from model_data.epc_attributes.FloorAttributes import FloorAttributes
|
|
from model_data.epc_attributes.HotWaterAttributes import HotWaterAttributes
|
|
from model_data.epc_attributes.MainFuelAttributes import MainFuelAttributes
|
|
from model_data.epc_attributes.MainheatAttributes import MainHeatAttributes
|
|
from model_data.epc_attributes.MainheatControlAttributes import MainheatControlAttributes
|
|
from model_data.epc_attributes.RoofAttributes import RoofAttributes
|
|
from model_data.epc_attributes.WallAttributes import WallAttributes
|
|
from model_data.epc_attributes.WindowAttributes import WindowAttributes
|
|
from model_data.epc_attributes.LightingAttributes import LightingAttributes
|
|
|
|
|
|
class EpcClean:
|
|
"""
|
|
Container for methods which we utilise for epc_attributes EPC data
|
|
"""
|
|
|
|
CLEANING_FIELDS: List[str] = [
|
|
"floor-description",
|
|
"hotwater-description",
|
|
"main-fuel",
|
|
"mainheat-description",
|
|
"mainheatcont-description",
|
|
"roof-description",
|
|
"walls-description",
|
|
"windows-description",
|
|
"lighting-description"
|
|
]
|
|
|
|
def __init__(self, data: List[Dict[str, Any]]) -> None:
|
|
"""
|
|
EpcClean constructor.
|
|
|
|
:param data: List of dictionaries containing EPC data.
|
|
"""
|
|
self.data: List[Dict[str, Any]] = data
|
|
self.unique_vals: Dict[str, Any] = {}
|
|
self.cleaned: Dict[str, List[Any]] = {}
|
|
|
|
self.lighting_averages = self._calculate_lighting_averages()
|
|
|
|
def _calculate_lighting_averages(self):
|
|
|
|
"""
|
|
This is a simple utility function that for few textual lighting descriptions, will calculate the average
|
|
low energy lighting proportion. This is only valid for a very tiny number of cases and so a very simple
|
|
methodology is applied
|
|
|
|
This is done without pandas so we can utilise this inside of our lambdas
|
|
|
|
:return: list of avergages for the corresponding descriptions
|
|
"""
|
|
|
|
data = self.data
|
|
|
|
# Filter rows with the specified lighting descriptions
|
|
filtered_data = [
|
|
row for row in data if row["lighting-description"] in [
|
|
'Below average lighting efficiency',
|
|
'Good lighting efficiency',
|
|
'Excelent lighting efficiency'
|
|
]
|
|
]
|
|
|
|
# Convert low-energy-lighting to float
|
|
for row in filtered_data:
|
|
row["low-energy-lighting"] = float(row["low-energy-lighting"])
|
|
|
|
# Calculate averages
|
|
sums = defaultdict(float)
|
|
counts = defaultdict(int)
|
|
|
|
for row in filtered_data:
|
|
description = row["lighting-description"]
|
|
sums[description] += row["low-energy-lighting"]
|
|
counts[description] += 1
|
|
|
|
averages = [{
|
|
"lighting-description": correct_spelling(description.lower()),
|
|
"low-energy-lighting": total / counts[description]
|
|
} for description, total in sums.items()]
|
|
|
|
return averages
|
|
|
|
def clean(self) -> None:
|
|
"""
|
|
Cleans the EPC data, mapping text fields to property epc_attributes.
|
|
"""
|
|
self._init_empty_cleaned_obj()
|
|
|
|
for field in self.CLEANING_FIELDS:
|
|
self.unique_vals[field] = Counter([v[field] for v in self.data])
|
|
|
|
self.clean_wrapper(field="floor-description", cleaning_cls=FloorAttributes)
|
|
self.clean_wrapper(field="hotwater-description", cleaning_cls=HotWaterAttributes)
|
|
self.clean_wrapper(field="main-fuel", cleaning_cls=MainFuelAttributes)
|
|
self.clean_wrapper(field="mainheat-description", cleaning_cls=MainHeatAttributes)
|
|
self.clean_wrapper(field="mainheatcont-description", cleaning_cls=MainheatControlAttributes)
|
|
self.clean_wrapper(field="roof-description", cleaning_cls=RoofAttributes)
|
|
self.clean_wrapper(field="walls-description", cleaning_cls=WallAttributes)
|
|
self.clean_wrapper(field="windows-description", cleaning_cls=WindowAttributes)
|
|
|
|
self.clean_wrapper(
|
|
field="lighting-description", cleaning_cls=LightingAttributes, averages=self.lighting_averages
|
|
)
|
|
|
|
def _init_empty_cleaned_obj(self) -> None:
|
|
"""
|
|
Initializes an empty object for cleaned data.
|
|
"""
|
|
self.cleaned = {field: [] for field in self.CLEANING_FIELDS}
|
|
|
|
def clean_wrapper(self, field, cleaning_cls, **kwargs):
|
|
for description in self.unique_vals[field].keys():
|
|
cln = cleaning_cls(description, **kwargs)
|
|
|
|
self.cleaned[field].append(
|
|
{
|
|
"original_description": description,
|
|
"clean_description": cln.description.capitalize(),
|
|
**cln.process()
|
|
}
|
|
)
|