restructured backend api, moving epc clean back to how it was

This commit is contained in:
Khalim Conn-Kowlessar 2023-08-02 10:25:07 +01:00
parent 9af1a0f6a8
commit 3a48fa1993
6 changed files with 5515 additions and 26 deletions

View file

@ -11,7 +11,4 @@ db_string = connection_string.format(
dbname=get_settings().DB_NAME,
)
db_engine = create_engine(
db_string,
echo=True
)
db_engine = create_engine(db_string)

View file

@ -10,7 +10,6 @@ from utils.logger import setup_logger
from recommendations.FloorRecommendations import FloorRecommendations
from recommendations.WallRecommendations import WallRecommendations
from utils.uvalue_estimates import classify_decile_newvalues
from model_data.EpcClean import EpcClean
# database interaction functions
from backend.app.db.functions.property_functions import (
@ -20,6 +19,7 @@ from backend.app.db.functions.property_functions import (
# TODO: This is placeholder until data is stored in DB
from backend.app.plan.uvalue_estimates_walls import uvalue_estimates_walls
from backend.app.plan.uvalue_estimates_floors import uvalue_estimates_floors
from backend.app.plan.temp_cleaned_data import cleaned
logger = setup_logger()
@ -137,10 +137,6 @@ async def trigger_plan(body: PlanTriggerRequest):
)
p.set_is_in_conservation_area(in_conservation_area)
# TODO: This won't work perfectly as we need the table of lighting averages by constituency
cleaner = EpcClean(data=[x.data for x in input_properties])
cleaner.clean()
logger.info("Getting components and properties recommendations")
recommendations = []
for property_id, p in enumerate(input_properties):
@ -152,7 +148,7 @@ async def trigger_plan(body: PlanTriggerRequest):
)[0]
# Property recommendations
p.get_components(cleaner.cleaned)
p.get_components(cleaned)
# This is placeholder, until the full dataset is loaded into the database and we just make a read to the
# database

File diff suppressed because it is too large Load diff

View file

@ -30,4 +30,3 @@ websockets==11.0.3
sqlalchemy==2.0.19
psycopg2-binary
pytz==2023.3
pyspellchecker==0.7.2

View file

@ -32,6 +32,7 @@ def app():
constituencies = {'E14000555', 'E14000726', 'E14000720', 'E14000721', 'E14000553', 'E14000752'}
property_types = ["bungalow", "flat", "house", "maisonette", "park home"]
floor_areas = ["unknown", "s", "m", "l", "xl", "xxl", "xxxl"]
# We pull properties from local authorities, by property type. This will allow us to build
# a dataset of up to 10k properties per local authority/property type combination
@ -42,19 +43,21 @@ def app():
data = []
for c in tqdm(constituencies):
for pt in property_types:
data.extend(
pagenated_epc_download(
client=epc_client,
params={
"constituency": c,
"property-type": pt,
"from-month": 8,
"from-year": 2014,
},
page_size=5000,
n_pages=10,
for fa in floor_areas:
data.extend(
pagenated_epc_download(
client=epc_client,
params={
"constituency": c,
"property-type": pt,
"from-month": 8,
"from-year": 2014,
"floor-area": fa,
},
page_size=5000,
n_pages=10,
)
)
)
# Production of sample data for land registry
# address_meta = [

View file

@ -1,5 +1,5 @@
import re
from spellchecker import SpellChecker
from textblob import TextBlob
# Pre-compile the regular expression
PERCENTAGE_PATTERN = re.compile(r'^\d+%?$')
@ -18,8 +18,8 @@ def correct_spelling(text):
if is_percentage_or_number(word):
corrected_words.append(word)
else:
spell = SpellChecker()
corrected_word = spell.correction(text)
blob = TextBlob(word) # create a TextBlob object
corrected_word = blob.correct() # use the correct method to correct spelling
corrected_words.append(str(corrected_word)) # convert corrected word back to string
corrected_text = ' '.join(corrected_words)