From c5b15be5c363ea33a3269c60c64fb2efe89797af Mon Sep 17 00:00:00 2001
From: Khalim Conn-Kowlessar <kconnkowlessar@gmail.com>
Date: Thu, 20 Jul 2023 13:48:59 +0100
Subject: [PATCH] restructuign wip

---
 .idea/Model.iml                          |  2 +-
 .idea/misc.xml                           |  2 +-
 backend/app/plan/router.py               |  2 ++
 model_data/app.py                        | 37 +++++++++++++++++++++++-
 model_data/requirements/requirements.txt |  6 +++-
 model_data/requirements/static.txt       |  3 --
 6 files changed, 45 insertions(+), 7 deletions(-)
diff --git a/.idea/Model.iml b/.idea/Model.iml
index 091102ce..80d3522c 100644
--- a/.idea/Model.iml
+++ b/.idea/Model.iml
@@ -6,7 +6,7 @@
       <sourceFolder url="file://$MODULE_DIR$/model_data" isTestSource="false" />
       <sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
     </content>
-    <orderEntry type="jdk" jdkName="Python 3.10 Model-fastapi" jdkType="Python SDK" />
+    <orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
     <orderEntry type="sourceFolder" forTests="false" />
   </component>
 </module>
\ No newline at end of file
diff --git a/.idea/misc.xml b/.idea/misc.xml
index 987d6c57..ca0e1cd9 100644
--- a/.idea/misc.xml
+++ b/.idea/misc.xml
@@ -1,6 +1,6 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <project version="4">
-  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 Model-fastapi" project-jdk-type="Python SDK" />
+  <component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
   <component name="PythonCompatibilityInspectionAdvertiser">
     <option name="version" value="3" />
   </component>
diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py
index 1d57c323..1bb270ae 100644
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@@ -81,4 +81,6 @@ async def trigger_plan(body: PlanTriggerRequest):
         )
         p.set_is_in_conservation_area(in_conservation_area)
 
+    logger.info()
+
     return {"message": "Plan complete"}
diff --git a/model_data/app.py b/model_data/app.py
index 076f3095..ae76f7d5 100644
--- a/model_data/app.py
+++ b/model_data/app.py
@@ -2,7 +2,6 @@ from tqdm import tqdm
 import os
 from model_data.BoreholeClient import BoreholeClient
 from model_data.LandRegistryClient import LandRegistryClient
-from conservation_areas.ConservationAreaClient import ConservationAreaClient
 
 from model_data.temp_inputs import input_data
 from model_data.Property import Property
@@ -299,3 +298,39 @@ def handler():
     result = correct_spelling("excelent lighting in this hosehold")
     print(result)
     'excellent lighting in this household'
+
+
+def app():
+    """
+    For a pre-defined list of constituencies and property types, we'll download EPC data from the API
+    and produce a dataset of cleaned fields so that when we get new properties, we can quickly
+    sanitise any description data
+    :return:
+    """
+
+    # We pull properties from local authorities, by property type. This will allow us to build
+    # a dataset of up to 10k properties per local authority/property type combination
+    # For particularly old EPC data, we have inconsistent records so we'll only include EPCS that were
+    # conducted after 2010, since SAP09 was introduced in 2009 an later SAP12 was introduced in England
+    # and Wales from 31 July 2014
+    # Download data from August 2014 onwards
+    data = []
+    for c in tqdm(constituencies):
+        for pt in property_types:
+            data.extend(
+                pagenated_epc_download(
+                    client=epc_client,
+                    params={
+                        "constituency": c,
+                        "property-type": pt,
+                        "from-month": 8,
+                        "from-year": 2014,
+                    },
+                    page_size=5000,
+                    n_pages=10,
+                )
+            )
+
+    # Incorporate input data into cleaning
+    cleaner = EpcClean(data + [p.data for p in input_properties])
+    cleaner.clean()
diff --git a/model_data/requirements/requirements.txt b/model_data/requirements/requirements.txt
index 06b4804e..72d31100 100644
--- a/model_data/requirements/requirements.txt
+++ b/model_data/requirements/requirements.txt
@@ -10,4 +10,8 @@ pyproj
 pint
 mip
 pyspellchecker
-textblob
\ No newline at end of file
+textblob
+pandas==2.0.3
+numpy==1.25.1
+python-dateutil==2.8.2
+six==1.16.0
\ No newline at end of file
diff --git a/model_data/requirements/static.txt b/model_data/requirements/static.txt
index e2ffa2ce..95a6a6dd 100644
--- a/model_data/requirements/static.txt
+++ b/model_data/requirements/static.txt
@@ -1,6 +1,3 @@
-geopandas
 xgboost
 statsmodels
 scikit-learn
-pandas==2.0.3
-numpy==1.25.1
\ No newline at end of file