From 7a091842c9cca562a23f2e5248a84627913cf9c5 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Tue, 18 Jul 2023 16:45:11 +0100 Subject: [PATCH 1/3] Trying to get /backend and /model_data working together --- .idea/Model.iml | 7 ++-- .idea/misc.xml | 2 +- backend/__init__.py | 0 backend/app/config.py | 3 +- backend/app/dependencies.py | 6 ++-- backend/app/main.py | 11 +++--- backend/app/plan/router.py | 35 +++++++++++++------ backend/app/portfolio/router.py | 2 +- backend/app/utils.py | 34 ++++++++++++++++-- backend/requirements/lambda.txt | 1 - backend/requirements/local.txt | 28 +++++++++++++++ model_data/requirements/dev.txt | 5 +++ .../{ => requirements}/requirements.txt | 10 +++--- 13 files changed, 111 insertions(+), 33 deletions(-) create mode 100644 backend/__init__.py delete mode 100644 backend/requirements/lambda.txt create mode 100644 backend/requirements/local.txt create mode 100644 model_data/requirements/dev.txt rename model_data/{ => requirements}/requirements.txt (74%) diff --git a/.idea/Model.iml b/.idea/Model.iml index a7ea3cf1..a940294e 100644 --- a/.idea/Model.iml +++ b/.idea/Model.iml @@ -1,8 +1,11 @@ - - + + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml index 242c02bb..ba1153d7 100644 --- a/.idea/misc.xml +++ b/.idea/misc.xml @@ -1,6 +1,6 @@ - + diff --git a/backend/__init__.py b/backend/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/backend/app/config.py b/backend/app/config.py index 07f5505f..cfd87ec4 100644 --- a/backend/app/config.py +++ b/backend/app/config.py @@ -8,9 +8,10 @@ class Settings(BaseSettings): SECRET_KEY: str ENVIRONMENT: str PLAN_TRIGGER_BUCKET: str + EPC_AUTH_TOKEN: str class Config: - env_file = ".env" + env_file = "backend/.env" @lru_cache() diff --git a/backend/app/dependencies.py b/backend/app/dependencies.py index 078f36a5..c78426bd 100644 --- a/backend/app/dependencies.py +++ b/backend/app/dependencies.py @@ -6,9 +6,10 @@ from cryptography.hazmat.primitives import hashes from cryptography.hazmat.backends import default_backend from typing import Any import json -from app.config import get_settings -from app.utils import logger +from backend.app.config import get_settings +from backend.app.utils import setup_logger +logger = setup_logger() api_key_header = APIKeyHeader(name=get_settings().API_KEY_NAME, auto_error=False) oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token") @@ -94,7 +95,6 @@ def validate_jwt_token(token: str = Depends(oauth2_scheme)): async def validate_token(token: str = Depends(oauth2_scheme), request: Request = None): - print("VALIDATING - PRINT") logger.info("Validating token") logger.info(token) logger.info("Secret") diff --git a/backend/app/main.py b/backend/app/main.py index ce7f2ad4..a7dfe279 100644 --- a/backend/app/main.py +++ b/backend/app/main.py @@ -1,19 +1,18 @@ from fastapi import FastAPI, Depends from mangum import Mangum -from app.portfolio import router as portfolio_router -from app.plan import router as plan_router -from app.dependencies import validate_api_key -from app.config import get_settings - +from backend.app.portfolio import router as portfolio_router +from backend.app.plan import router as plan_router +from backend.app.dependencies import validate_api_key +from backend.app.config import get_settings app = FastAPI(dependencies=[Depends(validate_api_key)]) - app.include_router(portfolio_router.router, prefix="/v1") app.include_router(plan_router.router, prefix="/v1") if get_settings().ENVIRONMENT == "local": from app.local import router as local_router + app.include_router(local_router.router) handler = Mangum(app) diff --git a/backend/app/plan/router.py b/backend/app/plan/router.py index e0c975b3..d8e3003d 100644 --- a/backend/app/plan/router.py +++ b/backend/app/plan/router.py @@ -1,9 +1,12 @@ from fastapi import APIRouter, Depends -from app.dependencies import validate_token -from app.plan.schemas import PlanTriggerRequest -from app.utils import read_csv_from_s3, logger -from app.config import get_settings +from backend.app.dependencies import validate_token +from backend.app.plan.schemas import PlanTriggerRequest +from backend.app.utils import read_csv_from_s3, setup_logger +from backend.app.config import get_settings +from model_data.Property import Property +from epc_api.client import EpcClient +logger = setup_logger() router = APIRouter( prefix="/plan", @@ -18,13 +21,25 @@ async def trigger_plan(body: PlanTriggerRequest): logger.info("Getting the inputs") # Read in the trigger file from s3 bucket_name = get_settings().PLAN_TRIGGER_BUCKET - logger.info("bucket_name: ", bucket_name) - logger.info("body.trigger_file_path: ", body.trigger_file_path) plan_input = read_csv_from_s3(bucket_name=bucket_name, filepath=body.trigger_file_path) - logger.info("Got the inputs") - logger.info(plan_input) + print(plan_input) - # TODO: Parse the file - # TODO: Put messages on the queue + epc_client = EpcClient(auth_token=get_settings().EPC_AUTH_TOKEN) + input_properties = [ + Property(postcode=config['postcode'], address1=config['address'], epc_client=epc_client) + for config in plan_input + ] + + logger.info("Getting EPC data") + for p in input_properties: + p.search_address_epc() + p.set_year_built() + + logger.info("Parsing and validating the file") + # TODO: Add validation + logger.info("properties") + logger.info(input_properties) + + logger.info("Reading in EPC data") return {"message": "Plan triggered"} diff --git a/backend/app/portfolio/router.py b/backend/app/portfolio/router.py index d1675d14..bc3d8879 100644 --- a/backend/app/portfolio/router.py +++ b/backend/app/portfolio/router.py @@ -1,5 +1,5 @@ from fastapi import APIRouter, Depends -from app.dependencies import validate_token +from backend.app.dependencies import validate_token router = APIRouter( prefix="/portfolio", diff --git a/backend/app/utils.py b/backend/app/utils.py index 77d220ae..a3eac3f2 100644 --- a/backend/app/utils.py +++ b/backend/app/utils.py @@ -6,8 +6,38 @@ import secrets import logging -logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) +def setup_logger(log_file=None, level=logging.INFO, overwrite_handler=False): + # Create a logger and set the logging level + logger = logging.getLogger() + logger.setLevel(level) + + # if logger already has handlers, just return it + if logger.hasHandlers() and not overwrite_handler: + return logger + + # Define the log message format + log_format = "%(asctime)s [%(levelname)s] %(message)s" + date_format = "%Y-%m-%d %H:%M:%S" + formatter = logging.Formatter(log_format, datefmt=date_format) + + # Create a file handler and set the file path and format + if log_file: + file_handler = logging.FileHandler(log_file) + file_handler.setLevel(level) + file_handler.setFormatter(formatter) + logger.addHandler(file_handler) + + # Create a console handler and set the format + console_handler = logging.StreamHandler() + console_handler.setLevel(level) + + # Set the formatter for the handlers + console_handler.setFormatter(formatter) + + # Add the handlers to the logger + logger.addHandler(console_handler) + + return logger def read_csv_from_s3(bucket_name, filepath): diff --git a/backend/requirements/lambda.txt b/backend/requirements/lambda.txt deleted file mode 100644 index bd86ef9b..00000000 --- a/backend/requirements/lambda.txt +++ /dev/null @@ -1 +0,0 @@ -cryptography==41.0.2 \ No newline at end of file diff --git a/backend/requirements/local.txt b/backend/requirements/local.txt new file mode 100644 index 00000000..5a1693c4 --- /dev/null +++ b/backend/requirements/local.txt @@ -0,0 +1,28 @@ +anyio==3.7.1 +cffi==1.15.1 +click==8.1.3 +cryptography==37.0.4 +ecdsa==0.18.0 +exceptiongroup==1.1.2 +fastapi==0.99.1 +h11==0.14.0 +httptools==0.5.0 +idna==3.4 +mangum==0.17.0 +pyasn1==0.5.0 +pycparser==2.21 +pydantic==1.10.11 +PyJWT==2.7.0 +python-dotenv==1.0.0 +python-jose==3.3.0 +PyYAML==6.0 +rsa==4.9 +six==1.16.0 +sniffio==1.3.0 +starlette==0.27.0 +typing_extensions==4.7.1 +uvicorn==0.22.0 +uvloop==0.17.0 +watchfiles==0.19.0 +websockets==11.0.3 +boto3 \ No newline at end of file diff --git a/model_data/requirements/dev.txt b/model_data/requirements/dev.txt new file mode 100644 index 00000000..1bfe0872 --- /dev/null +++ b/model_data/requirements/dev.txt @@ -0,0 +1,5 @@ +python-dotenv +pytest +mock +pytest-cov +pytest-mock diff --git a/model_data/requirements.txt b/model_data/requirements/requirements.txt similarity index 74% rename from model_data/requirements.txt rename to model_data/requirements/requirements.txt index 13012d8a..e12edb31 100644 --- a/model_data/requirements.txt +++ b/model_data/requirements/requirements.txt @@ -1,12 +1,10 @@ +pandas==2.0.3 +numpy==1.25.1 +pytz==2023.3 +tzdata==2023.3 epc-api-python==1.0.2 -python-dotenv tqdm -pandas mypy -pytest -mock -pytest-cov -pytest-mock fuzzywuzzy python-Levenshtein dbfread From 8c609d282ac0851d62ab0ea13561bd19927d7653 Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 19 Jul 2023 09:45:23 +0100 Subject: [PATCH 2/3] refactoring backend to incorporate model_data parts --- .dockerignore | 1 + backend/docker/Dockerfile | 15 ++++++----- backend/requirements/base.txt | 12 ++++++++- model_data/ConservationAreaClient.py | 33 +++++++++++++++++++++++- model_data/Property.py | 29 +++++---------------- model_data/app.py | 3 ++- model_data/requirements/dev.txt | 1 + model_data/requirements/requirements.txt | 1 - model_data/requirements/static.txt | 1 + 9 files changed, 63 insertions(+), 33 deletions(-) create mode 100644 .dockerignore create mode 100644 model_data/requirements/static.txt diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 00000000..f5c7b106 --- /dev/null +++ b/.dockerignore @@ -0,0 +1 @@ +model_data/local_data/* diff --git a/backend/docker/Dockerfile b/backend/docker/Dockerfile index 16aa9b4b..7d35a53a 100644 --- a/backend/docker/Dockerfile +++ b/backend/docker/Dockerfile @@ -5,19 +5,22 @@ FROM python:3.10.12-slim-buster ENV PYTHONDONTWRITEBYTECODE 1 ENV PYTHONUNBUFFERED 1 -# Set work directory -WORKDIR /app +# Set work directory to the root of your project +WORKDIR /Model # Install system dependencies RUN apt-get update && apt-get install -y netcat-openbsd # Install python dependencies -COPY ./requirements/base.txt ./requirements/base.txt +COPY ./backend/requirements/base.txt ./backend/requirements/base.txt +COPY ./model_data/requirements/requirements.txt ./model_data/requirements/requirements.txt RUN pip install --upgrade pip -RUN pip install -r requirements/base.txt +RUN pip install -r backend/requirements/base.txt +RUN pip install -r model_data/requirements/requirements.txt # Copy project -COPY . . +COPY ./backend ./backend +COPY ./model_data ./model_data # command to run on container start -CMD ["uvicorn", "app.main:app", "--host", "0.0.0.0", "--port", "8000"] \ No newline at end of file +CMD ["uvicorn", "backend.app.main:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/backend/requirements/base.txt b/backend/requirements/base.txt index 33748cea..5a1693c4 100644 --- a/backend/requirements/base.txt +++ b/backend/requirements/base.txt @@ -1,6 +1,7 @@ anyio==3.7.1 cffi==1.15.1 click==8.1.3 +cryptography==37.0.4 ecdsa==0.18.0 exceptiongroup==1.1.2 fastapi==0.99.1 @@ -15,4 +16,13 @@ PyJWT==2.7.0 python-dotenv==1.0.0 python-jose==3.3.0 PyYAML==6.0 -cryptography==37.0.4 \ No newline at end of file +rsa==4.9 +six==1.16.0 +sniffio==1.3.0 +starlette==0.27.0 +typing_extensions==4.7.1 +uvicorn==0.22.0 +uvloop==0.17.0 +watchfiles==0.19.0 +websockets==11.0.3 +boto3 \ No newline at end of file diff --git a/model_data/ConservationAreaClient.py b/model_data/ConservationAreaClient.py index a7f1063a..7df8c69f 100644 --- a/model_data/ConservationAreaClient.py +++ b/model_data/ConservationAreaClient.py @@ -1,4 +1,4 @@ -import pandas as pd +from enum import Enum import geopandas as gpd from shapely.geometry import Point from model_data.utils import setup_logger @@ -39,6 +39,31 @@ class ConservationAreaClient: self.gov_data = gpd.read_file(self.gov_path) self.gov_data = self.gov_data.drop(columns=["dataset"]) + def is_in_conservation_area(self, coordinates: dict): + + if not coordinates: + raise ValueError("Coordinates have not been set, run get_coordinates() first") + + is_in_conservation_area = self.is_in_conservation_area_historic_england( + x_bng=coordinates["x_coordinate"], + y_bng=coordinates["y_coordinate"] + ) + + if is_in_conservation_area != "unknown": + return is_in_conservation_area + + if is_in_conservation_area == "unknown": + # We double check the secondary data source + backup = self.is_in_conservation_area_historic_gov( + longitude=coordinates["longitude"], + latitude=coordinates["latitude"] + ) + + if backup: + return ConservationAreaClient.IN_CONSERVATION_AREA + else: + return ConservationAreaClient.UNKNOWN + def is_in_conservation_area_historic_england(self, x_bng: float, y_bng: float) -> str: """ Check if a property is in a conservation area @@ -103,3 +128,9 @@ class ConservationAreaClient: distance_meters = distances.min() return distance_meters + + +class InConservationArea(Enum): + IN_CONSERVATION_AREA = ConservationAreaClient.IN_CONSERVATION_AREA + NOT_IN_CONSERVATION_AREA = ConservationAreaClient.NOT_IN_CONSERVATION_AREA + UNKNOWN = ConservationAreaClient.UNKNOWN diff --git a/model_data/Property.py b/model_data/Property.py index 3627a1d1..98b7a0b0 100644 --- a/model_data/Property.py +++ b/model_data/Property.py @@ -5,7 +5,6 @@ from model_data.config import EPC_AUTH_TOKEN from model_data.OpenUprnClient import OpenUprnClient from model_data.EpcClean import EpcClean from model_data.BaseUtility import BaseUtility -from model_data.ConservationAreaClient import ConservationAreaClient class Property(BaseUtility): @@ -117,28 +116,12 @@ class Property(BaseUtility): raise ValueError("Either No attributes or multiple found for %s" % description) setattr(self, self.ATTRIBUTE_MAP[description], attributes[0]) - def set_is_in_conservation_area(self, conservation_area_client: ConservationAreaClient): - - if not self.coordinates: - raise ValueError("Coordinates have not been set, run get_coordinates() first") - - is_in_conservation_area = conservation_area_client.is_in_conservation_area_historic_england( - x_bng=self.coordinates["x_coordinate"], - y_bng=self.coordinates["y_coordinate"] - ) - - self.in_conservation_area = is_in_conservation_area - if is_in_conservation_area == "unknown": - # We double check the secondary data source - backup = conservation_area_client.is_in_conservation_area_historic_gov( - longitude=self.coordinates["longitude"], - latitude=self.coordinates["latitude"] - ) - - if backup: - self.in_conservation_area = ConservationAreaClient.IN_CONSERVATION_AREA - else: - self.in_conservation_area = ConservationAreaClient.UNKNOWN + def set_is_in_conservation_area(self, in_conservation_area): + """ + Sets whether the property is in a conservation area given the output of the ConservationAreaClient + :param in_conservation_area: string value, indicating whether the property is in a conservation area + """ + self.in_conservation_area = in_conservation_area def set_year_built(self): """ diff --git a/model_data/app.py b/model_data/app.py index 8e340e9e..7b4f057e 100644 --- a/model_data/app.py +++ b/model_data/app.py @@ -67,7 +67,8 @@ def handler(): # Check if the property is in a conversation area for p in input_properties: - p.set_is_in_conservation_area(conservation_area_client) + in_conservation_area = conservation_area_client.is_in_conservation_area(p.coordinates) + p.set_is_in_conservation_area(in_conservation_area) local_authorities = {p.data['local-authority'] for p in input_properties} # TODO: Do this at a constituency level diff --git a/model_data/requirements/dev.txt b/model_data/requirements/dev.txt index 1bfe0872..c48188c6 100644 --- a/model_data/requirements/dev.txt +++ b/model_data/requirements/dev.txt @@ -3,3 +3,4 @@ pytest mock pytest-cov pytest-mock +pip-check-reqs diff --git a/model_data/requirements/requirements.txt b/model_data/requirements/requirements.txt index e12edb31..42e3e369 100644 --- a/model_data/requirements/requirements.txt +++ b/model_data/requirements/requirements.txt @@ -10,7 +10,6 @@ python-Levenshtein dbfread pyproj pint -geopandas mip seaborn statsmodels diff --git a/model_data/requirements/static.txt b/model_data/requirements/static.txt new file mode 100644 index 00000000..b9b8e7c5 --- /dev/null +++ b/model_data/requirements/static.txt @@ -0,0 +1 @@ +geopandas \ No newline at end of file From 1aeec976ad6d79fd9108569823ce173085843e5b Mon Sep 17 00:00:00 2001 From: Khalim Conn-Kowlessar Date: Wed, 19 Jul 2023 09:46:10 +0100 Subject: [PATCH 3/3] including model data in serverless --- backend/serverless.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/backend/serverless.yml b/backend/serverless.yml index 3aa1aa95..0ffab0ad 100644 --- a/backend/serverless.yml +++ b/backend/serverless.yml @@ -17,6 +17,8 @@ package: individually: true include: - Model/backend/** + # Might need to refine the paths that are included + - Model/model_data/** plugins: - serverless-python-requirements