restructuign wip

This commit is contained in:
Khalim Conn-Kowlessar 2023-07-20 13:48:59 +01:00
parent 14ba7581d9
commit c5b15be5c3
6 changed files with 45 additions and 7 deletions

2
.idea/Model.iml generated
View file

@ -6,7 +6,7 @@
<sourceFolder url="file://$MODULE_DIR$/model_data" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
</content>
<orderEntry type="jdk" jdkName="Python 3.10 Model-fastapi" jdkType="Python SDK" />
<orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
<orderEntry type="sourceFolder" forTests="false" />
</component>
</module>

2
.idea/misc.xml generated
View file

@ -1,6 +1,6 @@
<?xml version="1.0" encoding="UTF-8"?>
<project version="4">
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 Model-fastapi" project-jdk-type="Python SDK" />
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
<component name="PythonCompatibilityInspectionAdvertiser">
<option name="version" value="3" />
</component>

View file

@ -81,4 +81,6 @@ async def trigger_plan(body: PlanTriggerRequest):
)
p.set_is_in_conservation_area(in_conservation_area)
logger.info()
return {"message": "Plan complete"}

View file

@ -2,7 +2,6 @@ from tqdm import tqdm
import os
from model_data.BoreholeClient import BoreholeClient
from model_data.LandRegistryClient import LandRegistryClient
from conservation_areas.ConservationAreaClient import ConservationAreaClient
from model_data.temp_inputs import input_data
from model_data.Property import Property
@ -299,3 +298,39 @@ def handler():
result = correct_spelling("excelent lighting in this hosehold")
print(result)
'excellent lighting in this household'
def app():
"""
For a pre-defined list of constituencies and property types, we'll download EPC data from the API
and produce a dataset of cleaned fields so that when we get new properties, we can quickly
sanitise any description data
:return:
"""
# We pull properties from local authorities, by property type. This will allow us to build
# a dataset of up to 10k properties per local authority/property type combination
# For particularly old EPC data, we have inconsistent records so we'll only include EPCS that were
# conducted after 2010, since SAP09 was introduced in 2009 an later SAP12 was introduced in England
# and Wales from 31 July 2014
# Download data from August 2014 onwards
data = []
for c in tqdm(constituencies):
for pt in property_types:
data.extend(
pagenated_epc_download(
client=epc_client,
params={
"constituency": c,
"property-type": pt,
"from-month": 8,
"from-year": 2014,
},
page_size=5000,
n_pages=10,
)
)
# Incorporate input data into cleaning
cleaner = EpcClean(data + [p.data for p in input_properties])
cleaner.clean()

View file

@ -10,4 +10,8 @@ pyproj
pint
mip
pyspellchecker
textblob
textblob
pandas==2.0.3
numpy==1.25.1
python-dateutil==2.8.2
six==1.16.0

View file

@ -1,6 +1,3 @@
geopandas
xgboost
statsmodels
scikit-learn
pandas==2.0.3
numpy==1.25.1