From c5b15be5c363ea33a3269c60c64fb2efe89797af Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Thu, 20 Jul 2023 13:48:59 +0100 Subject: [PATCH] restructuign wip --- .idea/Model.iml | 2 +- .idea/misc.xml | 2 +- backend/app/plan/router.py | 2 ++ model_data/app.py | 37 +++++++++++++++++++++++- model_data/requirements/requirements.txt | 6 +++- model_data/requirements/static.txt | 3 -- 6 files changed, 45 insertions(+), 7 deletions(-) diff --git a/.idea/Model.iml b/.idea/Model.iml index 091102ce..80d3522c 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -6,7 +6,7 @@ - + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml index 987d6c57..ca0e1cd9 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -1,6 +1,6 @@ - + diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index 1d57c323..1bb270ae 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -81,4 +81,6 @@ async def trigger_plan(body: PlanTriggerRequest): ) p.set_is_in_conservation_area(in_conservation_area) + logger.info() + return {"message": "Plan complete"} diff --git a/model_data/app.py b/model_data/app.py index 076f3095..ae76f7d5 100644 --- a/model_data/app.py +++ b/model_data/app.py @@ -2,7 +2,6 @@ from tqdm import tqdm import os from model_data.BoreholeClient import BoreholeClient from model_data.LandRegistryClient import LandRegistryClient -from conservation_areas.ConservationAreaClient import ConservationAreaClient from model_data.temp_inputs import input_data from model_data.Property import Property @@ -299,3 +298,39 @@ def handler(): result = correct_spelling("excelent lighting in this hosehold") print(result) 'excellent lighting in this household' + + +def app(): + """ + For a pre-defined list of constituencies and property types, we'll download EPC data from the API + and produce a dataset of cleaned fields so that when we get new properties, we can quickly + sanitise any description data + :return: + """ + + # We pull properties from local authorities, by property type. This will allow us to build + # a dataset of up to 10k properties per local authority/property type combination + # For particularly old EPC data, we have inconsistent records so we'll only include EPCS that were + # conducted after 2010, since SAP09 was introduced in 2009 an later SAP12 was introduced in England + # and Wales from 31 July 2014 + # Download data from August 2014 onwards + data = [] + for c in tqdm(constituencies): + for pt in property_types: + data.extend( + pagenated_epc_download( + client=epc_client, + params={ + "constituency": c, + "property-type": pt, + "from-month": 8, + "from-year": 2014, + }, + page_size=5000, + n_pages=10, + ) + ) + + # Incorporate input data into cleaning + cleaner = EpcClean(data + [p.data for p in input_properties]) + cleaner.clean() diff --git a/model_data/requirements/requirements.txt b/model_data/requirements/requirements.txt index 06b4804e..72d31100 100644 --- a/model_data/requirements/requirements.txt +++ b/model_data/requirements/requirements.txt @@ -10,4 +10,8 @@ pyproj pint mip pyspellchecker -textblob \ No newline at end of file +textblob +pandas==2.0.3 +numpy==1.25.1 +python-dateutil==2.8.2 +six==1.16.0 \ No newline at end of file diff --git a/model_data/requirements/static.txt b/model_data/requirements/static.txt index e2ffa2ce..95a6a6dd 100644 --- a/model_data/requirements/static.txt +++ b/model_data/requirements/static.txt @@ -1,6 +1,3 @@ -geopandas xgboost statsmodels scikit-learn -pandas==2.0.3 -numpy==1.25.1 \ No newline at end of file