From 0f12f9cae426f21bd61642c3fbb7d03f6e210db5 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Sat, 1 Jul 2023 16:44:25 +0100 Subject: [PATCH] Added date filtering on data download but got lots of cases to debug --- model_data/app.py | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/model_data/app.py b/model_data/app.py index 0a6055f1..913ff546 100644 --- a/model_data/app.py +++ b/model_data/app.py @@ -76,18 +76,30 @@ def handler(): # We pull properties from local authorities, by property type. This will allow us to build # a dataset of up to 10k properties per local authority/property type combination + # For particularly old EPC data, we have inconsistent records so we'll only include EPCS that were + # conducted after 2010, since SAP09 was introduced in 2009 an later SAP12 was introduced in England + # and Wales from 31 July 2014 + # Download data from August 2014 onwards data = [] for c in tqdm(constituencies): for pt in property_types: data.extend( pagenated_epc_download( client=epc_client, - params={"constituency": c, "property-type": pt}, + params={ + "constituency": c, + "property-type": pt, + "from-month": 8, + "from-year": 2014, + }, page_size=5000, n_pages=10, ) ) + test = [x for x in data if "Conservatory" in x["floor-description"]] + test = pd.DataFrame(test) + # Incorporate input data into cleaning cleaner = EpcClean(data + [p.data for p in input_properties]) cleaner.clean()