restructured backend api, moving epc clean back to how it was

2026-07-27 23:35:01 +00:00 · 2023-08-02 10:25:07 +01:00 · 2023-08-02 10:25:07 +01:00 · 3a48fa1993
commit 3a48fa1993
parent 9af1a0f6a8
6 changed files with 5515 additions and 26 deletions
--- a/backend/app/db/connection.py
+++ b/backend/app/db/connection.py
@ -11,7 +11,4 @@ db_string = connection_string.format(
    dbname=get_settings().DB_NAME,
 )

-db_engine = create_engine(
-    db_string,
-    echo=True
-)
+db_engine = create_engine(db_string)
--- a/backend/app/plan/router.py
+++ b/backend/app/plan/router.py
@ -10,7 +10,6 @@ from utils.logger import setup_logger
 from recommendations.FloorRecommendations import FloorRecommendations
 from recommendations.WallRecommendations import WallRecommendations
 from utils.uvalue_estimates import classify_decile_newvalues
-from model_data.EpcClean import EpcClean

 # database interaction functions
 from backend.app.db.functions.property_functions import (
@ -20,6 +19,7 @@ from backend.app.db.functions.property_functions import (
 # TODO: This is placeholder until data is stored in DB
 from backend.app.plan.uvalue_estimates_walls import uvalue_estimates_walls
 from backend.app.plan.uvalue_estimates_floors import uvalue_estimates_floors
+from backend.app.plan.temp_cleaned_data import cleaned

 logger = setup_logger()

@ -137,10 +137,6 @@ async def trigger_plan(body: PlanTriggerRequest):
        )
        p.set_is_in_conservation_area(in_conservation_area)

-    # TODO: This won't work perfectly as we need the table of lighting averages by constituency
-    cleaner = EpcClean(data=[x.data for x in input_properties])
-    cleaner.clean()
-
    logger.info("Getting components and properties recommendations")
    recommendations = []
    for property_id, p in enumerate(input_properties):
@ -152,7 +148,7 @@ async def trigger_plan(body: PlanTriggerRequest):
        )[0]

        # Property recommendations
-        p.get_components(cleaner.cleaned)
+        p.get_components(cleaned)

        # This is placeholder, until the full dataset is loaded into the database and we just make a read to the
        # database
--- a/backend/app/plan/temp_cleaned_data.py
+++ b/backend/app/plan/temp_cleaned_data.py
--- a/backend/requirements/base.txt
+++ b/backend/requirements/base.txt
@ -30,4 +30,3 @@ websockets==11.0.3
 sqlalchemy==2.0.19
 psycopg2-binary
 pytz==2023.3
-pyspellchecker==0.7.2
--- a/model_data/app.py
+++ b/model_data/app.py
@ -32,6 +32,7 @@ def app():

    constituencies = {'E14000555', 'E14000726', 'E14000720', 'E14000721', 'E14000553', 'E14000752'}
    property_types = ["bungalow", "flat", "house", "maisonette", "park home"]
+    floor_areas = ["unknown", "s", "m", "l", "xl", "xxl", "xxxl"]

    # We pull properties from local authorities, by property type. This will allow us to build
    # a dataset of up to 10k properties per local authority/property type combination
@ -42,19 +43,21 @@ def app():
    data = []
    for c in tqdm(constituencies):
        for pt in property_types:
-            data.extend(
-                pagenated_epc_download(
-                    client=epc_client,
-                    params={
-                        "constituency": c,
-                        "property-type": pt,
-                        "from-month": 8,
-                        "from-year": 2014,
-                    },
-                    page_size=5000,
-                    n_pages=10,
+            for fa in floor_areas:
+                data.extend(
+                    pagenated_epc_download(
+                        client=epc_client,
+                        params={
+                            "constituency": c,
+                            "property-type": pt,
+                            "from-month": 8,
+                            "from-year": 2014,
+                            "floor-area": fa,
+                        },
+                        page_size=5000,
+                        n_pages=10,
+                    )
                )
-            )

    # Production of sample data for land registry
    # address_meta = [
--- a/model_data/utils.py
+++ b/model_data/utils.py
@ -1,5 +1,5 @@
 import re
-from spellchecker import SpellChecker
+from textblob import TextBlob

 # Pre-compile the regular expression
 PERCENTAGE_PATTERN = re.compile(r'^\d+%?$')
@ -18,8 +18,8 @@ def correct_spelling(text):
        if is_percentage_or_number(word):
            corrected_words.append(word)
        else:
-            spell = SpellChecker()
-            corrected_word = spell.correction(text)
+            blob = TextBlob(word)  # create a TextBlob object
+            corrected_word = blob.correct()  # use the correct method to correct spelling
            corrected_words.append(str(corrected_word))  # convert corrected word back to string

    corrected_text = ' '.join(corrected_words)