Model/epc_data/app.py
2023-06-14 18:53:37 +01:00

75 lines
2.3 KiB
Python

import pandas as pd
from tqdm import tqdm
from epc_data.temp_inputs import input_data
from epc_data.Property import Property
from epc_data.config import EPC_AUTH_TOKEN
from epc_api.client import EpcClient
from epc_data.downloader import pagenated_epc_download
from epc_data.EpcClean import EpcClean
def handler():
# To begin with, the input data is a list of dictionaries, however we would read this file in
epc_client = EpcClient(auth_token=EPC_AUTH_TOKEN)
input_properties = [
Property(postcode=config['postcode'], address1=config['address1'], epc_client=epc_client)
for config in input_data
]
for p in input_properties:
p.search_address_epc()
local_authorities = {p.data['local-authority'] for p in input_properties}
data = []
for la in tqdm(local_authorities):
data.extend(
pagenated_epc_download(
client=epc_client,
params={"local-authority": la},
page_size=5000,
n_pages=10,
)
)
cleaner = EpcClean(data)
cleaner.clean()
# For testing:
from epc_data.attributes.MainFuelAttributes import MainFuelAttributes
from collections import Counter
count = Counter([x["main-fuel"] for x in data])
descriptions = {x["main-fuel"] for x in data}
out = []
for description in descriptions:
res = MainFuelAttributes(description).process()
out.append(
{
"original_description": description,
**res
}
)
df = pd.DataFrame(out)
df = df.sort_values("original_description")
df = df.reset_index(drop=True)
import numpy as np
idx = 1
record = df[df.index == idx].to_dict("records")[0]
record = {k: v for k, v in record.items() if v not in [None, np.nan, False]}
from pprint import pprint
pprint(record)
# Issues:
# 1) '2207 Time and temperature zone control' - we don't pick up any reference to the fact this is a time and
# temperature zone control
# and we only pick up temperature zone control at the moment. Can we capture this too
# 2) 'Charging system linked to use of community heating, programmer and at least two room stats' - what are room
# stats and how should
# we capture this?
df.to_dict("records")