Added date filtering on data download but got lots of cases to debug

This commit is contained in:
Khalim Conn-Kowlessar 2023-07-01 16:44:25 +01:00
parent 7724c216a8
commit 0f12f9cae4

View file

@ -76,18 +76,30 @@ def handler():
# We pull properties from local authorities, by property type. This will allow us to build
# a dataset of up to 10k properties per local authority/property type combination
# For particularly old EPC data, we have inconsistent records so we'll only include EPCS that were
# conducted after 2010, since SAP09 was introduced in 2009 an later SAP12 was introduced in England
# and Wales from 31 July 2014
# Download data from August 2014 onwards
data = []
for c in tqdm(constituencies):
for pt in property_types:
data.extend(
pagenated_epc_download(
client=epc_client,
params={"constituency": c, "property-type": pt},
params={
"constituency": c,
"property-type": pt,
"from-month": 8,
"from-year": 2014,
},
page_size=5000,
n_pages=10,
)
)
test = [x for x in data if "Conservatory" in x["floor-description"]]
test = pd.DataFrame(test)
# Incorporate input data into cleaning
cleaner = EpcClean(data + [p.data for p in input_properties])
cleaner.clean()