from tqdm import tqdm from epc_data.temp_inputs import input_data from epc_data.Property import Property from epc_data.config import EPC_AUTH_TOKEN from epc_api.client import EpcClient from epc_data.downloader import pagenated_epc_download def handler(): # To begin with, the input data is a list of dictionaries, however we would read this file in epc_client = EpcClient(auth_token=EPC_AUTH_TOKEN) input_properties = [ Property(postcode=config['postcode'], address1=config['address1'], epc_client=epc_client) for config in input_data ] for p in input_properties: p.search_address_epc() local_authorities = {p.data['local-authority'] for p in input_properties} data = [] for la in tqdm(local_authorities): data.extend( pagenated_epc_download( client=epc_client, params={"local-authority": la}, page_size=5000, n_pages=10, )["rows"] ) # TODO: Temp - pull in sample from collections import Counter import pickle from pprint import pprint with open("./epc_data/test_epc_data.obj", "rb") as f: data = pickle.load(f) # TODO: Fill this ClEANING_FIELDS = [ "roof-description", "floor-description", "walls-description", "mainheat-description" ] field = "roof-description" unique_vals = Counter([v[field] for v in data]) pprint(unique_vals) def search_description_options(desc): if desc == "insulated": return "average" raise Exception("Handle me") def find_insulation_thickness(description_lower, is_pitched, is_roof_room, is_flat): if "no insulation" in description_lower: return 0 if is_pitched: try: return int(description_lower.split("pitched,")[-1].split("mm")[0].lstrip().rstrip()) except ValueError as _: desc = description_lower.split("pitched,")[-1].lstrip().split(" ")[0] return search_description_options(desc) if is_roof_room: # Just search for specific phrases desc = description_lower.split("roof room(s),")[-1].lstrip().split(" ")[0] return search_description_options(desc) if is_flat: # Just search for specific phrases desc = description_lower.split("flat,")[-1].lstrip().split(" ")[0] return search_description_options(desc) raise Exception("Unhandled") def clean_roof(description): """ We aim to extract features about the roof, so we can characterise it. We will check: - If the roof is pitched - If there is a room roof - if there is a loft - If it has insulation - if so, what degree of insulation - :param x: :return: """ description_lower = description.lower().lstrip().rstrip() if "another dwelling above" in description_lower: return { "is_pitched": False, "is_roof_room": False, "has_loft": False, "insulation_thickness": 0, "has_dwelling_above": True, "assumed": "assumed" in description_lower, "is_flat": "flat" in description_lower } is_pitched = "pitched" in description_lower is_roof_room = "roof room" in description_lower has_loft = "loft" in description_lower is_flat = "flat" in description_lower if "insulation" in description_lower or "insulated" in description_lower: # if has_loft and is_pitched: # insulation_thickness = find_insulation_thickness(description_lower) # elif not has_loft and is_pitched: # insulation_thickness = find_insulation_thickness(description_lower) # else: # raise Exception("Implement me") insulation_thickness = find_insulation_thickness(description_lower, is_pitched, is_roof_room, is_flat) else: raise Exception("Implment me 2") attributes = { "is_pitched": is_pitched, "is_roof_room": is_roof_room, "has_loft": has_loft, "insulation_thickness": insulation_thickness, "has_dwelling_above": False, "assumed": "assumed" in description_lower, "is_flat": is_flat } return attributes cleaned_roof = [] for description in unique_vals.keys(): cleaned_roof.append( {"original": description, "cleaned": clean_roof(description)} )