Model/model_data/downloader.py
2023-06-15 15:05:50 +01:00

29 lines
928 B
Python

import time
def pagenated_epc_download(client, params, page_size, n_pages, verbose=0, slowdown=0.1):
offset_from = 0
n_completed = 0
results = []
complete = False
while not complete:
if verbose:
print("Pulling for page %s" % str(int(offset_from / page_size) + 1))
time.sleep(slowdown)
search_resp = client.domestic.search(params=params, offset_from=offset_from, size=page_size)
# Note: We can only make 10k queries for a single set of search queries.
# It might make sense to download data via zip for machine learning since we don't need this
# data to be perfectly up to date
if search_resp is None:
break
n_completed += 1
results.extend(search_resp["rows"])
if n_completed == n_pages:
complete = True
else:
offset_from += page_size
return results