from tqdm import tqdm from epc_data.temp_inputs import input_data from epc_data.Property import Property from epc_data.config import EPC_AUTH_TOKEN from epc_api.client import EpcClient from epc_data.downloader import pagenated_epc_download def handler(): # To begin with, the input data is a list of dictionaries, however we would read this file in epc_client = EpcClient(auth_token=EPC_AUTH_TOKEN) input_properties = [ Property(postcode=config['postcode'], address1=config['address1'], epc_client=epc_client) for config in input_data ] for p in input_properties: p.search_address_epc() local_authorities = {p.data['local-authority'] for p in input_properties} data = [] for la in tqdm(local_authorities): data.extend( pagenated_epc_download( client=epc_client, params={"local-authority": la}, page_size=5000, n_pages=10, ) ) # TODO: Fill this ClEANING_FIELDS = [ "roof-description", "floor-description", "walls-description", "mainheat-description" ] field = "roof-description" unique_vals = Counter([v[field] for v in data]) def search_description_options(desc): if desc == "insulated": return "average" if desc == "limited": return "below average" raise Exception("Handle me") def find_insulation_thickness(description_lower, is_pitched, is_roof_room, is_flat): if "no insulation" in description_lower: return 0 if is_pitched: try: thickness = description_lower.split("pitched,")[-1].split("mm")[0].lstrip().rstrip() if "+" in thickness: return thickness return int(thickness) except ValueError as _: if "invalid input" in description_lower: return None desc = description_lower.split("pitched,")[-1].lstrip().split(" ")[0] return search_description_options(desc) if is_roof_room: desc_split_lookup = { "ceiling insulated": "average", "thatched": "average", } # Just search for specific phrases desc_split = description_lower.split("roof room(s),")[-1].lstrip() res = desc_split_lookup.get(desc_split) if res: return res desc = desc_split.split(" ")[0] return search_description_options(desc) if is_flat: # Just search for specific phrases desc = description_lower.split("flat,")[-1].lstrip().split(" ")[0] return search_description_options(desc) return None import re def extract_thermal_transmittence(description_lower): # Find U-value u_value = re.search(r"(\d+\.\d+)", description_lower) if u_value is not None: u_value = float(u_value.group(1)) else: u_value = None # Find unit unit = re.search(r"(w/m-¦k)", description_lower) if unit is not None: unit = unit.group(1) else: unit = None return u_value, unit def clean_roof(description): """ We aim to extract features about the roof, so we can characterise it. We will check: - If the roof is pitched - If there is a room roof - if there is a loft - If it has insulation - if so, what degree of insulation - :param x: :return: """ description_lower = description.lower().lstrip().rstrip() if "another dwelling above" in description_lower or "other premises above" in description_lower: return { "is_pitched": False, "is_roof_room": False, "has_loft": False, "insulation_thickness": 0, "has_dwelling_above": True, "assumed": "assumed" in description_lower, "is_flat": "flat" in description_lower, "is_thatched": False, "thermal_transmittence": None, "thermal_transmittence_unit": None, } is_pitched = "pitched" in description_lower is_roof_room = "roof room" in description_lower has_loft = "loft" in description_lower is_flat = "flat" in description_lower is_thatched = "thatched" in description_lower thermal_transmittence, thermal_transmittence_unit, insulation_thickness = None, None, None if "insulation" in description_lower or "insulated" in description_lower: insulation_thickness = find_insulation_thickness(description_lower, is_pitched, is_roof_room, is_flat) elif "thermal transmittance" in description_lower: thermal_transmittence, thermal_transmittence_unit = extract_thermal_transmittence(description_lower) elif is_thatched: # Search for these features: thermal_transmittence, thermal_transmittence_unit = extract_thermal_transmittence(description_lower) insulation_thickness = find_insulation_thickness( description_lower, is_pitched, is_roof_room, is_flat ) else: raise Exception("Implment me 2") attributes = { "is_pitched": is_pitched, "is_roof_room": is_roof_room, "has_loft": has_loft, "insulation_thickness": insulation_thickness, "has_dwelling_above": False, "assumed": "assumed" in description_lower, "is_flat": is_flat, "thermal_transmittence": thermal_transmittence, "thermal_transmittence_unit": thermal_transmittence_unit } return attributes cleaned_roof = [] for description in unique_vals.keys(): cleaned_roof.append( {"original": description, "cleaned": clean_roof(description)} )