mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
Added date filtering on data download but got lots of cases to debug
This commit is contained in:
parent
7724c216a8
commit
0f12f9cae4
1 changed files with 13 additions and 1 deletions
|
|
@ -76,18 +76,30 @@ def handler():
|
|||
|
||||
# We pull properties from local authorities, by property type. This will allow us to build
|
||||
# a dataset of up to 10k properties per local authority/property type combination
|
||||
# For particularly old EPC data, we have inconsistent records so we'll only include EPCS that were
|
||||
# conducted after 2010, since SAP09 was introduced in 2009 an later SAP12 was introduced in England
|
||||
# and Wales from 31 July 2014
|
||||
# Download data from August 2014 onwards
|
||||
data = []
|
||||
for c in tqdm(constituencies):
|
||||
for pt in property_types:
|
||||
data.extend(
|
||||
pagenated_epc_download(
|
||||
client=epc_client,
|
||||
params={"constituency": c, "property-type": pt},
|
||||
params={
|
||||
"constituency": c,
|
||||
"property-type": pt,
|
||||
"from-month": 8,
|
||||
"from-year": 2014,
|
||||
},
|
||||
page_size=5000,
|
||||
n_pages=10,
|
||||
)
|
||||
)
|
||||
|
||||
test = [x for x in data if "Conservatory" in x["floor-description"]]
|
||||
test = pd.DataFrame(test)
|
||||
|
||||
# Incorporate input data into cleaning
|
||||
cleaner = EpcClean(data + [p.data for p in input_properties])
|
||||
cleaner.clean()
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue