mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
rough implementation of lighing description cleaning
This commit is contained in:
parent
ba201c8b6a
commit
d5e4baba05
3 changed files with 73 additions and 2 deletions
|
|
@ -1,6 +1,8 @@
|
|||
from typing import List, Dict, Any
|
||||
from collections import Counter
|
||||
|
||||
import pandas as pd
|
||||
|
||||
from model_data.epc_attributes.FloorAttributes import FloorAttributes
|
||||
from model_data.epc_attributes.HotWaterAttributes import HotWaterAttributes
|
||||
from model_data.epc_attributes.MainFuelAttributes import MainFuelAttributes
|
||||
|
|
@ -9,6 +11,7 @@ from model_data.epc_attributes.MainheatControlAttributes import MainheatControlA
|
|||
from model_data.epc_attributes.RoofAttributes import RoofAttributes
|
||||
from model_data.epc_attributes.WallAttributes import WallAttributes
|
||||
from model_data.epc_attributes.WindowAttributes import WindowAttributes
|
||||
from model_data.epc_attributes.LightingAttributes import LightingAttributes
|
||||
|
||||
|
||||
class EpcClean:
|
||||
|
|
@ -37,6 +40,33 @@ class EpcClean:
|
|||
self.unique_vals: Dict[str, Any] = {}
|
||||
self.cleaned: Dict[str, List[Any]] = {}
|
||||
|
||||
self.lighting_averages = self._calculate_lighting_averages()
|
||||
|
||||
def _calculate_lighting_averages(self):
|
||||
|
||||
"""
|
||||
This is a simple utility function that for few textual lighting descritpions, will calculate the average
|
||||
low energy lighting proportion. This is only valid for a very tiny number of cases and so a very simple
|
||||
methodology is applied
|
||||
:return: Dataframe of avergages for the corresponding descriptions
|
||||
"""
|
||||
|
||||
df = pd.DataFrame(self.data)
|
||||
aggs = df[
|
||||
df["lighting-description"].isin(
|
||||
[
|
||||
'Below average lighting efficiency',
|
||||
'Good lighting efficiency',
|
||||
'Excelent lighting efficiency'
|
||||
]
|
||||
)
|
||||
]
|
||||
aggs["low-energy-lighting"] = aggs["low-energy-lighting"].astype(float)
|
||||
|
||||
averages = aggs.groupby("lighting-description")["low-energy-lighting"].mean().reset_index()
|
||||
averages["lighting-description"] = averages["lighting-description"].str.lower()
|
||||
return averages
|
||||
|
||||
def clean(self) -> None:
|
||||
"""
|
||||
Cleans the EPC data, mapping text fields to property epc_attributes.
|
||||
|
|
@ -55,17 +85,21 @@ class EpcClean:
|
|||
self.clean_wrapper(field="walls-description", cleaning_cls=WallAttributes)
|
||||
self.clean_wrapper(field="windows-description", cleaning_cls=WindowAttributes)
|
||||
|
||||
self.clean_wrapper(
|
||||
field="lighting-description", cleaning_cls=LightingAttributes, averages=self.lighting_averages
|
||||
)
|
||||
|
||||
def _init_empty_cleaned_obj(self) -> None:
|
||||
"""
|
||||
Initializes an empty object for cleaned data.
|
||||
"""
|
||||
self.cleaned = {field: [] for field in self.CLEANING_FIELDS}
|
||||
|
||||
def clean_wrapper(self, field, cleaning_cls):
|
||||
def clean_wrapper(self, field, cleaning_cls, **kwargs):
|
||||
for description in self.unique_vals[field].keys():
|
||||
self.cleaned[field].append(
|
||||
{
|
||||
"original_description": description,
|
||||
**cleaning_cls(description).process()
|
||||
**cleaning_cls(description, **kwargs).process()
|
||||
}
|
||||
)
|
||||
|
|
|
|||
|
|
@ -264,3 +264,11 @@ self = SalModel(
|
|||
data=all_data["data"],
|
||||
cleaner=all_data["cleaner"]
|
||||
)
|
||||
|
||||
descs = []
|
||||
for x in all_data["data"]:
|
||||
descs.append(x["lighting-description"])
|
||||
|
||||
descs = list(set(descs))
|
||||
|
||||
df = pd.DataFrame(all_data['data'])
|
||||
|
|
|
|||
29
model_data/epc_attributes/LightingAttributes.py
Normal file
29
model_data/epc_attributes/LightingAttributes.py
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
import re
|
||||
from model_data.epc_attributes.attribute_utils import clean_description
|
||||
|
||||
|
||||
class LightingAttributes:
|
||||
def __init__(self, description, averages):
|
||||
self.description: str = clean_description(description.lower())
|
||||
self.averages = averages
|
||||
|
||||
def low_energy_proportions(self):
|
||||
|
||||
description = self.description
|
||||
|
||||
if 'no low energy lighting' in description:
|
||||
return 0
|
||||
|
||||
if "all fixed outlets" in description:
|
||||
return 1
|
||||
|
||||
if ('good lighting efficiency' in description) or ('excellent lighting efficiency' in description) or \
|
||||
('below average lighting efficiency' in description):
|
||||
return self.averages[self.averages == description]["low-energy-lighting"].values[0]
|
||||
|
||||
match = re.search(r'\d+', description)
|
||||
if match:
|
||||
proportion = int(match.group()) / 100.0
|
||||
return proportion
|
||||
|
||||
return 0
|
||||
Loading…
Add table
Reference in a new issue