Model/epc_data/tests/app.py
Khalim Conn-Kowlessar ec95fcf99c data cleaning wip
2023-06-07 23:00:59 +01:00

142 lines
4.6 KiB
Python

from tqdm import tqdm
from epc_data.temp_inputs import input_data
from epc_data.Property import Property
from epc_data.config import EPC_AUTH_TOKEN
from epc_api.client import EpcClient
from epc_data.downloader import pagenated_epc_download
def handler():
# To begin with, the input data is a list of dictionaries, however we would read this file in
epc_client = EpcClient(auth_token=EPC_AUTH_TOKEN)
input_properties = [
Property(postcode=config['postcode'], address1=config['address1'], epc_client=epc_client)
for config in input_data
]
for p in input_properties:
p.search_address_epc()
local_authorities = {p.data['local-authority'] for p in input_properties}
data = []
for la in tqdm(local_authorities):
data.extend(
pagenated_epc_download(
client=epc_client,
params={"local-authority": la},
page_size=5000,
n_pages=10,
)["rows"]
)
# TODO: Temp - pull in sample
from collections import Counter
import pickle
from pprint import pprint
with open("./epc_data/test_epc_data.obj", "rb") as f:
data = pickle.load(f)
# TODO: Fill this
ClEANING_FIELDS = [
"roof-description",
"floor-description",
"walls-description",
"mainheat-description"
]
field = "roof-description"
unique_vals = Counter([v[field] for v in data])
pprint(unique_vals)
def search_description_options(desc):
if desc == "insulated":
return "average"
raise Exception("Handle me")
def find_insulation_thickness(description_lower, is_pitched, is_roof_room, is_flat):
if "no insulation" in description_lower:
return 0
if is_pitched:
try:
return int(description_lower.split("pitched,")[-1].split("mm")[0].lstrip().rstrip())
except ValueError as _:
desc = description_lower.split("pitched,")[-1].lstrip().split(" ")[0]
return search_description_options(desc)
if is_roof_room:
# Just search for specific phrases
desc = description_lower.split("roof room(s),")[-1].lstrip().split(" ")[0]
return search_description_options(desc)
if is_flat:
# Just search for specific phrases
desc = description_lower.split("flat,")[-1].lstrip().split(" ")[0]
return search_description_options(desc)
raise Exception("Unhandled")
def clean_roof(description):
"""
We aim to extract features about the roof, so we can characterise it. We will check:
- If the roof is pitched
- If there is a room roof
- if there is a loft
- If it has insulation
- if so, what degree of insulation
-
:param x:
:return:
"""
description_lower = description.lower().lstrip().rstrip()
if "another dwelling above" in description_lower:
return {
"is_pitched": False,
"is_roof_room": False,
"has_loft": False,
"insulation_thickness": 0,
"has_dwelling_above": True,
"assumed": "assumed" in description_lower,
"is_flat": "flat" in description_lower
}
is_pitched = "pitched" in description_lower
is_roof_room = "roof room" in description_lower
has_loft = "loft" in description_lower
is_flat = "flat" in description_lower
if "insulation" in description_lower or "insulated" in description_lower:
# if has_loft and is_pitched:
# insulation_thickness = find_insulation_thickness(description_lower)
# elif not has_loft and is_pitched:
# insulation_thickness = find_insulation_thickness(description_lower)
# else:
# raise Exception("Implement me")
insulation_thickness = find_insulation_thickness(description_lower, is_pitched, is_roof_room, is_flat)
else:
raise Exception("Implment me 2")
attributes = {
"is_pitched": is_pitched,
"is_roof_room": is_roof_room,
"has_loft": has_loft,
"insulation_thickness": insulation_thickness,
"has_dwelling_above": False,
"assumed": "assumed" in description_lower,
"is_flat": is_flat
}
return attributes
cleaned_roof = []
for description in unique_vals.keys():
cleaned_roof.append(
{"original": description, "cleaned": clean_roof(description)}
)