Merge pull request #114 from Hestia-Homes/main

Implementing database pushes into the lambda - still recommendations to complete
This commit is contained in:
KhalimCK 2023-08-01 17:59:00 +01:00 committed by GitHub
commit 382bde3958
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 494 additions and 3672 deletions

View file

@ -26,6 +26,7 @@ class Property(BaseUtility):
roof = None
walls = None
windows = None
lighting = None
coordinates = None
@ -37,6 +38,19 @@ class Property(BaseUtility):
self.full_sap_epc = None
self.in_conservation_area = None
self.year_built = None
self.number_of_rooms = None
self.energy = None
self.ventilation = None
self.solar_pv = None
self.solar_hot_water = None
self.wind_turbine = None
self.number_of_open_fireplaces = None
self.number_of_extensions = None
self.number_of_storeys = None
self.heat_loss_corridor = None
self.mains_gas = None
self.floor_height = None
if epc_client:
self.epc_client = epc_client
@ -76,6 +90,134 @@ class Property(BaseUtility):
"""
self.coordinates = {key.lower(): value for key, value in coordinates.items()}
def set_energy(self):
"""
Extracts and formats data about the home's energy and co2 consumption
To being with, this is just formatting epc data
Data:
- primary_energy_consumption
This is based on the "energy-consumption-current" field in the EPC data.
Current estimated total energy consumption for the property in a 12 month period (kWh/m2). Displayed on EPC
as the current primary energy use per square metre of floor area.
- co2_emissions
This is based on the "co2-emissions-current" field in the EPC data.
CO₂ emissions per year in tonnes/year.
"""
self.energy = {
"primary_energy_consumption": float(self.data["energy-consumption-current"]),
"co2_emissions": float(self.data["co2-emissions-current"]),
}
def set_ventilation(self):
"""
Extracts and formats data about the home's ventilation
To being with, this is just formatting epc data
Data:
- ventilation
This is based on the "ventilation-type" field in the EPC data.
Ventilation type of the property.
"""
ventilation = self.data["mechanical-ventilation"]
# perform some simple cleaning - when checking 300k properties, the only unique values were
# {'', 'mechanical, supply and extract', 'NO DATA!', 'natural', 'mechanical, extract only'}
if ventilation in self.DATA_ANOMALY_MATCHES or ventilation in [""]:
ventilation = None
self.ventilation = {
"ventilation": ventilation,
}
def set_solar_pv(self):
"""
Extracts and formats data about the home's solar pv
To being with, this is just formatting epc data
Data:
- solar_pv
This is based on the "photo-supply" field in the EPC data.
When checking 100k properties, either the value was "" or a stringified number
"""
solar_pv = self.data["photo-supply"]
if solar_pv == "":
solar_pv = None
else:
solar_pv = float(solar_pv)
self.solar_pv = {
"solar_pv": solar_pv,
}
def set_solar_hot_water(self):
"""
Extracts and formats data about the home's solar hot water
We are just formatting the solar-water-heating-flag in the epc data
:return:
"""
value_map = {
"Y": True,
"N": False,
"": None,
}
self.solar_hot_water = {
"solar_hot_water": value_map[self.data["solar-water-heating-flag"]],
}
def set_wind_turbine(self):
"""
Extracts and formats data about the home's wind turbine
We are just formatting the wind-turbine-flag in the epc data
:return:
"""
wind_turbine_count = self.data["wind-turbine-count"]
if wind_turbine_count == "":
wind_turbine_count = None
else:
wind_turbine_count = int(wind_turbine_count)
self.wind_turbine = {
"wind_turbine": wind_turbine_count,
}
def set_count_variables(self):
"""
For EPC fields that are just counts, we'll set them here
These are fields that are integers but may contain additional values such as "" so we can't do a direct
conversion straight to an integer
:return:
"""
fields = {
"number_of_open_fireplaces": "number-open-fireplaces",
"number_of_extensions": "extension-count",
"number_of_storeys": "flat-storey-count",
"number_of_rooms": "number-habitable-rooms",
}
null_attributes = ["number_of_storeys", "number_of_rooms"]
for attribute, epc_field in fields.items():
value = self.data["extension-count"]
if value == "" or value in self.DATA_ANOMALY_MATCHES:
if attribute in null_attributes:
value = None
else:
value = 0
else:
value = int(value)
setattr(self, attribute, value)
def get_components(self, cleaned):
"""
Given the cleaning that has been performed, we'll use this to identify the property
@ -90,10 +232,24 @@ class Property(BaseUtility):
if not self.data:
raise ValueError("Property does not contain data")
self.set_energy()
self.set_ventilation()
self.set_solar_pv()
self.set_solar_hot_water()
self.set_wind_turbine()
self.set_count_variables()
self.set_heat_loss_corridor()
self.set_mains_gas()
self.set_floor_height()
for description, attribute in cleaned.items():
if self.data[description] in self.DATA_ANOMALY_MATCHES:
setattr(self, self.ATTRIBUTE_MAP[description], {"original_description": self.data[description]})
setattr(
self,
self.ATTRIBUTE_MAP[description],
{"original_description": self.data[description], "clean_description": self.data[description]}
)
continue
attributes = [
@ -131,3 +287,140 @@ class Property(BaseUtility):
# We don't know when the property was built
self.year_built = None
def set_heat_loss_corridor(self):
"""
cleans the heat-loss-corridor
:return:
"""
map = {
"no corridor": False,
"unheated corridor": True,
"heated corridor": False
}
if self.data["heat-loss-corridor"] in self.DATA_ANOMALY_MATCHES:
has_heat_loss_corridor = False
else:
has_heat_loss_corridor = map[self.data["heat-loss-corridor"]]
length = self.data["unheated-corridor-length"]
if length == "":
length = None
else:
length = float(length)
self.heat_loss_corridor = {
"heat_loss_corridor": has_heat_loss_corridor,
"length": length
}
def set_mains_gas(self):
"""
Sets whether the property has mains gas
:return:
"""
map = {
"Y": True,
"N": False,
}
if self.data["mains-gas-flag"] == "" or self.data["mains-gas-flag"] in self.DATA_ANOMALY_MATCHES:
self.mains_gas = None
else:
self.mains_gas = map[self.data["mains-gas-flag"]]
def set_floor_height(self):
"""
Sets the floor height of the property
:return:
"""
if self.data["floor-height"] == "" or self.data["floor-height"] in self.DATA_ANOMALY_MATCHES:
self.floor_height = None
else:
self.floor_height = float(self.data["floor-height"])
def _clean_upload_data(self, to_update):
for k, v in to_update.items():
if v in self.DATA_ANOMALY_MATCHES:
to_update[k] = None
return to_update
def get_full_property_data(self):
"""
This method extracts the data which is pushed to the database, containing core information, from the EPC
about a property
:return:
"""
property_data = {
"creation_status": "READY",
"uprn": int(self.data["uprn"]),
"building_reference_number": int(self.data["building-reference-number"]),
"has_pre_condition_report": True,
"has_recommendations": True,
"property_type": self.data["property-type"],
"built_form": self.data["built-form"],
"local_authority": self.data["local-authority-label"],
"constituency": self.data["constituency-label"],
"number_of_rooms": self.number_of_rooms,
"year_built": self.year_built,
"tenure": self.data["tenure"],
"current_epc_rating": self.data["current-energy-rating"],
"current_sap_points": self.data["current-energy-efficiency"]
}
property_data = self._clean_upload_data(property_data)
return property_data
@classmethod
def _prepare_rating_field(cls, field, rating_lookup):
"""
Utility function for usage in the lambda, for preparing the _rating fields
"""
return rating_lookup[field].value if field not in cls.DATA_ANOMALY_MATCHES else None
def get_property_details_epc(self, portfolio_id: int, rating_lookup):
property_details_epc = {
"property_id": self.id,
"portfolio_id": portfolio_id,
"full_address": self.data["address"],
"total_floor_area": float(self.data["total-floor-area"]),
"walls": self.walls["clean_description"],
"walls_rating": self._prepare_rating_field(self.data["walls-energy-eff"], rating_lookup),
"roof": self.roof["clean_description"],
"roof_rating": self._prepare_rating_field(self.data["roof-energy-eff"], rating_lookup),
"floor": self.floor["clean_description"],
"floor_rating": self._prepare_rating_field(self.data["floor-energy-eff"], rating_lookup),
"windows": self.windows["clean_description"],
"windows_rating": self._prepare_rating_field(self.data["windows-energy-eff"], rating_lookup),
"heating": self.main_heating["clean_description"],
"heating_rating": self._prepare_rating_field(self.data["mainheat-energy-eff"], rating_lookup),
"heating_controls": self.main_heating_controls["clean_description"],
"heating_controls_rating": self._prepare_rating_field(self.data["mainheatc-energy-eff"], rating_lookup),
"hot_water": self.hotwater["clean_description"],
"hot_water_rating": self._prepare_rating_field(self.data["hot-water-energy-eff"], rating_lookup),
"lighting": self.lighting["clean_description"],
"lighting_rating": self._prepare_rating_field(self.data["lighting-energy-eff"], rating_lookup),
"mainfuel": self.main_fuel["clean_description"],
"ventilation": self.ventilation["ventilation"],
"solar_pv": self.solar_pv["solar_pv"],
"solar_hot_water": self.solar_hot_water["solar_hot_water"],
"wind_turbine": self.wind_turbine["wind_turbine"],
"floor_height": self.floor_height,
"heat_loss_corridor": self.heat_loss_corridor["heat_loss_corridor"],
"unheated_corridor_length": self.heat_loss_corridor["length"],
"number_of_open_fireplaces": self.number_of_open_fireplaces,
"number_of_extensions": self.number_of_extensions,
"number_of_storeys": self.number_of_storeys,
"mains_gas": self.mains_gas,
"energy_tariff": self.data["energy-tariff"],
"primary_energy_consumption": self.energy["primary_energy_consumption"],
"co2_emissions": self.energy["co2_emissions"],
}
return property_details_epc

View file

@ -2,8 +2,11 @@
# This script contains methods for interacting with the property table in the database
###
import datetime
import pytz
from sqlalchemy.orm import sessionmaker
from backend.app.db.models.portfolio import PropertyModel, PropertyCreationStatus, PortfolioStatus
from backend.app.db.models.portfolio import (
PropertyModel, PropertyCreationStatus, PortfolioStatus, PropertyTargetsModel, PropertyDetailsEpcModel
)
from backend.app.db.connection import db_engine
from sqlalchemy.orm.exc import NoResultFound
@ -20,8 +23,6 @@ def create_property(portfolio_id: int, address: str, postcode: str) -> (int, boo
Session = sessionmaker(bind=db_engine)
with Session() as session:
now = datetime.datetime.now()
try:
# Attempt to fetch the existing property
existing_property = session.query(PropertyModel).filter_by(
@ -29,7 +30,7 @@ def create_property(portfolio_id: int, address: str, postcode: str) -> (int, boo
).one()
# Update the 'updated_at' field
existing_property.updated_at = now
existing_property.updated_at = datetime.datetime.now(pytz.utc)
# Merge the updated property back into the session
session.merge(existing_property)
@ -43,8 +44,6 @@ def create_property(portfolio_id: int, address: str, postcode: str) -> (int, boo
address=address,
postcode=postcode,
portfolio_id=portfolio_id,
created_at=now,
updated_at=now,
creation_status=PropertyCreationStatus.LOADING,
status=PortfolioStatus.ASSESSMENT.value,
has_pre_condition_report=False,
@ -57,3 +56,67 @@ def create_property(portfolio_id: int, address: str, postcode: str) -> (int, boo
session.commit()
return new_property.id, True
def create_property_targets(property_id: int, portfolio_id: int, epc_target=None, heat_demand_target=None):
"""
This function will create a record for the property targets in the database if it does not exist.
:param property_id: The ID of the property the targets belong to
:param portfolio_id: The ID of the portfolio the property belongs to
:param epc_target: Goal EPC value for the property
:param heat_demand_target: Heat demand target for the property in kwh/m^2/year
:return:
"""
Session = sessionmaker(bind=db_engine)
with Session() as session:
new_target = PropertyTargetsModel(
property_id=property_id,
portfolio_id=portfolio_id,
epc=epc_target,
heat_demand=heat_demand_target
)
session.add(new_target)
session.commit()
return True
def update_property_data(property_id: int, portfolio_id: int, property_data: dict):
Session = sessionmaker(bind=db_engine)
now = datetime.datetime.now(pytz.utc)
with Session() as session:
try:
# Attempt to fetch the existing property
existing_property = session.query(PropertyModel).filter_by(
id=property_id, portfolio_id=portfolio_id
).one()
# Update the fields with the data in property_data
for key, value in property_data.items():
setattr(existing_property, key, value)
existing_property.updated_at = now
# Merge the updated property back into the session and commit
session.merge(existing_property)
session.commit()
except NoResultFound:
raise Exception(f"Property with property_id {property_id} and portfolio_id {portfolio_id} not found")
return True
def create_property_details_epc(property_details_epc: dict):
"""
This function will create a record for the property details EPC in the database.
:param property_details_epc: A dictionary containing details about the property EPC.
:return: True if successful, False otherwise.
"""
Session = sessionmaker(bind=db_engine)
with Session() as session:
new_property_details_epc = PropertyDetailsEpcModel(**property_details_epc)
session.add(new_property_details_epc)
session.commit()
return True

View file

@ -1,5 +1,7 @@
import enum
from sqlalchemy import Column, Integer, Text, Boolean, Float, DateTime, Enum, ForeignKey
import pytz
import datetime
from sqlalchemy import Column, Integer, Text, Boolean, Float, DateTime, Enum, ForeignKey, CheckConstraint
from sqlalchemy.ext.declarative import declarative_base
Base = declarative_base()
@ -40,8 +42,8 @@ class Portfolio(Base):
property_valuation_increase = Column(Float) # Unit is always £ so we don't need to store the unit for the moment
rental_yield_increase = Column(Float) # Unit is always £ so we don't need to store the unit for the moment
total_work_hours = Column(Float)
created_at = Column(DateTime, nullable=False)
updated_at = Column(DateTime, nullable=False)
created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
updated_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
class PropertyCreationStatus(enum.Enum):
@ -66,13 +68,14 @@ class PropertyModel(Base):
portfolio_id = Column(Integer, ForeignKey('portfolio.id'), nullable=False)
creation_status = Column(Enum(PropertyCreationStatus), nullable=False)
uprn = Column(Integer)
building_reference_number = Column(Integer)
status = Column(Enum(PortfolioStatus, values_callable=lambda x: [e.value for e in x]), nullable=False)
address = Column(Text)
postcode = Column(Text)
has_pre_condition_report = Column(Boolean)
has_recommendations = Column(Boolean)
created_at = Column(DateTime, nullable=False)
updated_at = Column(DateTime, nullable=False)
created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
updated_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
property_type = Column(Text)
built_form = Column(Text)
local_authority = Column(Text)
@ -85,14 +88,29 @@ class PropertyModel(Base):
class FeatureRating(enum.Enum):
VERY_GOOD = "Very good"
GOOD = "Good"
POOR = "Poor"
VERY_POOR = "Very poor"
NA = "N/A"
VERY_GOOD = 5
GOOD = 4
AVERAGE = 3
POOR = 2
VERY_POOR = 1
NA = None
class PropertyDetailsEpc(Base):
rating_lookup = {
"Very Good": FeatureRating.VERY_GOOD,
"Good": FeatureRating.GOOD,
"Average": FeatureRating.AVERAGE,
"Poor": FeatureRating.POOR,
"Very Poor": FeatureRating.VERY_POOR,
"N/A": FeatureRating.NA
}
def get_feature_rating_from_string(rating_str: str):
return rating_lookup.get(rating_str, FeatureRating.NA)
class PropertyDetailsEpcModel(Base):
__tablename__ = 'property_details_epc'
id = Column(Integer, primary_key=True, autoincrement=True)
property_id = Column(Integer, ForeignKey('property.id'), nullable=False)
@ -100,21 +118,24 @@ class PropertyDetailsEpc(Base):
full_address = Column(Text)
total_floor_area = Column(Float)
walls = Column(Text)
walls_rating = Column(Enum(FeatureRating, values_callable=lambda x: [e.value for e in x]))
walls_rating = Column(Integer, CheckConstraint('walls_rating>=1 AND walls_rating<=5'))
roof = Column(Text)
roof_rating = Column(Enum(FeatureRating, values_callable=lambda x: [e.value for e in x]))
roof_rating = Column(Integer, CheckConstraint('roof_rating>=1 AND roof_rating<=5'))
floor = Column(Text)
floor_rating = Column(Enum(FeatureRating, values_callable=lambda x: [e.value for e in x]))
floor_rating = Column(Integer, CheckConstraint('floor_rating>=1 AND floor_rating<=5'))
windows = Column(Text)
windows_rating = Column(Enum(FeatureRating, values_callable=lambda x: [e.value for e in x]))
windows_rating = Column(Integer, CheckConstraint('windows_rating>=1 AND windows_rating<=5'))
heating = Column(Text)
heating_rating = Column(Enum(FeatureRating, values_callable=lambda x: [e.value for e in x]))
heating_contols = Column(Text)
heating_contols_rating = Column(Enum(FeatureRating, values_callable=lambda x: [e.value for e in x]))
heating_rating = Column(Integer, CheckConstraint('heating_rating>=1 AND heating_rating<=5'))
heating_controls = Column(Text)
heating_controls_rating = Column(
Integer, CheckConstraint('heating_controls_rating>=1 AND heating_controls_rating<=5')
)
hot_water = Column(Text)
hot_water_rating = Column(Enum(FeatureRating, values_callable=lambda x: [e.value for e in x]))
hot_water_rating = Column(Integer, CheckConstraint('hot_water_rating>=1 AND hot_water_rating<=5'))
lighting = Column(Text)
lighting_rating = Column(Enum(FeatureRating, values_callable=lambda x: [e.value for e in x]))
lighting_rating = Column(Integer, CheckConstraint('lighting_rating>=1 AND lighting_rating<=5'))
mainfuel = Column(Text)
ventilation = Column(Text)
solar_pv = Column(Text)
solar_hot_water = Column(Text)
@ -143,11 +164,11 @@ class PropertyDetailsMeter(Base):
meter_reading_gas = Column(Float)
class PropertyTargets(Base):
class PropertyTargetsModel(Base):
__tablename__ = 'property_targets'
id = Column(Integer, primary_key=True, autoincrement=True)
property_id = Column(Integer, ForeignKey('property.id'), nullable=False)
portfolio_id = Column(Integer, ForeignKey('portfolio.id'), nullable=False)
created_at = Column(DateTime, nullable=False)
created_at = Column(DateTime, nullable=False, default=datetime.datetime.now(pytz.utc))
epc = Column(Enum(Epc))
heat_demand = Column(Text)

View file

@ -1,4 +1,5 @@
from fastapi import APIRouter, Depends
from backend.app.db.models.portfolio import rating_lookup
from backend.app.dependencies import validate_token
from backend.app.plan.schemas import PlanTriggerRequest
from backend.app.utils import read_csv_from_s3
@ -9,12 +10,14 @@ from utils.logger import setup_logger
from recommendations.FloorRecommendations import FloorRecommendations
from recommendations.WallRecommendations import WallRecommendations
from utils.uvalue_estimates import classify_decile_newvalues
from model_data.EpcClean import EpcClean
# database interaction functions
from backend.app.db.functions.property_functions import create_property
from backend.app.db.functions.property_functions import (
create_property, create_property_targets, update_property_data, create_property_details_epc
)
# TODO: This is placeholder until data is stored in DB
from backend.app.plan.temp_cleaned_data import cleaned
from backend.app.plan.uvalue_estimates_walls import uvalue_estimates_walls
from backend.app.plan.uvalue_estimates_floors import uvalue_estimates_floors
@ -69,6 +72,12 @@ walls_decile_data = {
'Decile 9', 'Decile 10'], 'decile_boundaries': [6., 49., 51., 55., 64., 71., 76., 83., 96.,
120., 2279.]}
lighting_averages = [
{'lighting-description': 'good lighting efficiency', 'low-energy-lighting': 99.26666666666667},
{'lighting-description': 'excellent lighting efficiency', 'low-energy-lighting': 100.0},
{'lighting-description': 'below average lighting efficiency', 'low-energy-lighting': 0.0}
]
@router.post("/trigger")
async def trigger_plan(body: PlanTriggerRequest):
@ -93,6 +102,14 @@ async def trigger_plan(body: PlanTriggerRequest):
if not is_new:
continue
# TODO: Need to add heat demand target
create_property_targets(
property_id=property_id,
portfolio_id=body.portfolio_id,
epc_target=body.goal_value,
heat_demand_target=None
)
input_properties.append(
Property(
postcode=config['postcode'],
@ -120,6 +137,10 @@ async def trigger_plan(body: PlanTriggerRequest):
)
p.set_is_in_conservation_area(in_conservation_area)
# TODO: This won't work perfectly as we need the table of lighting averages by constituency
cleaner = EpcClean(data=[x.data for x in input_properties])
cleaner.clean()
logger.info("Getting components and properties recommendations")
recommendations = []
for property_id, p in enumerate(input_properties):
@ -131,7 +152,7 @@ async def trigger_plan(body: PlanTriggerRequest):
)[0]
# Property recommendations
p.get_components(cleaned)
p.get_components(cleaner.cleaned)
# This is placeholder, until the full dataset is loaded into the database and we just make a read to the
# database
@ -187,4 +208,17 @@ async def trigger_plan(body: PlanTriggerRequest):
recommendations.extend(wall_recomendations.recommendations)
# Once we're done, we'll store:
# 1) the property data
# 2) the property details (epc)
# 3) the recommendations
# Upload property data
for p in input_properties:
property_details_epc = p.get_property_details_epc(portfolio_id=body.portfolio_id, rating_lookup=rating_lookup)
create_property_details_epc(property_details_epc)
property_data = p.get_full_property_data()
update_property_data(property_id=p.id, portfolio_id=body.portfolio_id, property_data=property_data)
return {"recommendations": recommendations}

File diff suppressed because it is too large Load diff

View file

@ -1,7 +1,6 @@
from typing import List, Dict, Any
from collections import Counter
import pandas as pd
from collections import defaultdict
from model_data.utils import correct_spelling
from model_data.epc_attributes.FloorAttributes import FloorAttributes
@ -32,7 +31,8 @@ class EpcClean:
"lighting-description"
]
def __init__(self, data: List[Dict[str, Any]]) -> None:
def __init__(self, data: List[Dict[str, Any]],
lighting_averages: List[Dict[str, str | float]] | None = None) -> None:
"""
EpcClean constructor.
@ -42,34 +42,51 @@ class EpcClean:
self.unique_vals: Dict[str, Any] = {}
self.cleaned: Dict[str, List[Any]] = {}
self.lighting_averages = self._calculate_lighting_averages()
if not lighting_averages:
self.lighting_averages = self._calculate_lighting_averages()
else:
self.lighting_averages = lighting_averages
def _calculate_lighting_averages(self):
"""
This is a simple utility function that for few textual lighting descritpions, will calculate the average
This is a simple utility function that for few textual lighting descriptions, will calculate the average
low energy lighting proportion. This is only valid for a very tiny number of cases and so a very simple
methodology is applied
:return: Dataframe of avergages for the corresponding descriptions
This is done without pandas so we can utilise this inside of our lambdas
:return: list of avergages for the corresponding descriptions
"""
df = pd.DataFrame(self.data)
aggs = df[
df["lighting-description"].isin(
[
'Below average lighting efficiency',
'Good lighting efficiency',
'Excelent lighting efficiency'
]
)
].copy()
aggs["low-energy-lighting"] = aggs["low-energy-lighting"].astype(float)
data = self.data
averages = aggs.groupby("lighting-description")["low-energy-lighting"].mean().reset_index()
averages["lighting-description"] = averages["lighting-description"].str.lower()
# Filter rows with the specified lighting descriptions
filtered_data = [
row for row in data if row["lighting-description"] in [
'Below average lighting efficiency',
'Good lighting efficiency',
'Excelent lighting efficiency'
]
]
# Correct spelling mistakes in averages
averages["lighting-description"] = averages["lighting-description"].apply(correct_spelling)
# Convert low-energy-lighting to float
for row in filtered_data:
row["low-energy-lighting"] = float(row["low-energy-lighting"])
# Calculate averages
sums = defaultdict(float)
counts = defaultdict(int)
for row in filtered_data:
description = row["lighting-description"]
sums[description] += row["low-energy-lighting"]
counts[description] += 1
averages = [{
"lighting-description": correct_spelling(description.lower()),
"low-energy-lighting": total / counts[description]
} for description, total in sums.items()]
return averages
@ -103,9 +120,12 @@ class EpcClean:
def clean_wrapper(self, field, cleaning_cls, **kwargs):
for description in self.unique_vals[field].keys():
cln = cleaning_cls(description, **kwargs)
self.cleaned[field].append(
{
"original_description": description,
**cleaning_cls(description, **kwargs).process()
"clean_description": cln.description.capitalize(),
**cln.process()
}
)

View file

@ -74,6 +74,10 @@ def app():
# Incorporate input data into cleaning
cleaner = EpcClean(data)
lighting_averages = cleaner.lighting_averages
# TODO: WE need to store lighting_averages to a db
# We should also extend these averages so they're by more variables (property type, age band, constituency,
# etc)
cleaner.clean()
# TODO: cleaner.cleaned datasets to a db

View file

@ -22,10 +22,12 @@ class LightingAttributes:
if ('good lighting efficiency' in description) or ('excellent lighting efficiency' in description) or \
('below average lighting efficiency' in description):
average = [
x for x in self.averages if x["lighting-description"] == description
][0]["low-energy-lighting"]
return {
"low_energy_proportion": self.averages[
self.averages["lighting-description"] == description
]["low-energy-lighting"].values[0]
"low_energy_proportion": average
}
match = re.search(r'\d+', description)