mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
Merge pull request #95 from Hestia-Homes/main
Restructuing repo and trying to complete the fastapi plan trigger endpoint
This commit is contained in:
commit
ab932eb1cc
28 changed files with 7886 additions and 93 deletions
9
.gitignore
vendored
9
.gitignore
vendored
|
|
@ -170,7 +170,7 @@ cython_debug/
|
|||
.idea/**/shelf
|
||||
|
||||
# AWS User-specific
|
||||
.idea/**/aws.xml
|
||||
.idea/misc.xml
|
||||
|
||||
# Generated files
|
||||
.idea/**/contentModel.xml
|
||||
|
|
@ -245,3 +245,10 @@ infrastructure/terraform/.terraform*
|
|||
|
||||
# Don't commit packages up serverless packages
|
||||
.serverless
|
||||
|
||||
backend/node_modules
|
||||
node_modules/
|
||||
backend/.idea
|
||||
open_uprn/.idea/
|
||||
conservation_areas/.idea/
|
||||
|
||||
|
|
|
|||
3
.idea/Model.iml
generated
3
.idea/Model.iml
generated
|
|
@ -4,8 +4,9 @@
|
|||
<content url="file://$MODULE_DIR$">
|
||||
<sourceFolder url="file://$MODULE_DIR$/backend" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/model_data" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/open_uprn" isTestSource="false" />
|
||||
</content>
|
||||
<orderEntry type="jdk" jdkName="Python 3.10 fastapi" jdkType="Python SDK" />
|
||||
<orderEntry type="jdk" jdkName="Python 3.10 (model_data)" jdkType="Python SDK" />
|
||||
<orderEntry type="sourceFolder" forTests="false" />
|
||||
</component>
|
||||
</module>
|
||||
2
.idea/misc.xml
generated
2
.idea/misc.xml
generated
|
|
@ -1,6 +1,6 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project version="4">
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 fastapi" project-jdk-type="Python SDK" />
|
||||
<component name="ProjectRootManager" version="2" project-jdk-name="Python 3.10 (model_data)" project-jdk-type="Python SDK" />
|
||||
<component name="PythonCompatibilityInspectionAdvertiser">
|
||||
<option name="version" value="3" />
|
||||
</component>
|
||||
|
|
|
|||
|
|
@ -1,10 +1,11 @@
|
|||
from fastapi import APIRouter, Depends
|
||||
from backend.app.dependencies import validate_token
|
||||
from backend.app.plan.schemas import PlanTriggerRequest
|
||||
from backend.app.utils import read_csv_from_s3, setup_logger
|
||||
from backend.app.utils import read_csv_from_s3
|
||||
from backend.app.config import get_settings
|
||||
from model_data.Property import Property
|
||||
from epc_api.client import EpcClient
|
||||
from utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
|
@ -15,6 +16,37 @@ router = APIRouter(
|
|||
responses={404: {"description": "Not found"}}
|
||||
)
|
||||
|
||||
# TODO: Load this data from db
|
||||
open_uprn_data = [
|
||||
{'UPRN': 6032920, 'X_COORDINATE': 535110.0, 'Y_COORDINATE': 181819.0, 'LATITUDE': 51.5191407,
|
||||
'LONGITUDE': -0.0540506},
|
||||
{'UPRN': 6038625, 'X_COORDINATE': 535374.0, 'Y_COORDINATE': 182784.0, 'LATITUDE': 51.5277492,
|
||||
'LONGITUDE': -0.0498772},
|
||||
{'UPRN': 34153991, 'X_COORDINATE': 523238.74, 'Y_COORDINATE': 178003.02, 'LATITUDE': 51.4875579,
|
||||
'LONGITUDE': -0.226392},
|
||||
{'UPRN': 10008299676, 'X_COORDINATE': 533285.0, 'Y_COORDINATE': 184711.0, 'LATITUDE': 51.5455629,
|
||||
'LONGITUDE': -0.0792445},
|
||||
{'UPRN': 10008299677, 'X_COORDINATE': 533285.0, 'Y_COORDINATE': 184711.0, 'LATITUDE': 51.5455629,
|
||||
'LONGITUDE': -0.0792445},
|
||||
{'UPRN': 100021039066, 'X_COORDINATE': 535506.0, 'Y_COORDINATE': 185624.0, 'LATITUDE': 51.5532385,
|
||||
'LONGITUDE': -0.0468833},
|
||||
{'UPRN': 100021226060, 'X_COORDINATE': 529247.0, 'Y_COORDINATE': 187959.0, 'LATITUDE': 51.5756908,
|
||||
'LONGITUDE': -0.1362513},
|
||||
{'UPRN': 200003489276, 'X_COORDINATE': 533210.0, 'Y_COORDINATE': 179442.0, 'LATITUDE': 51.4982309,
|
||||
'LONGITUDE': -0.0823165}
|
||||
]
|
||||
|
||||
in_conservation_area_data = [
|
||||
{'uprn': 6032920, 'is_in_conservation_area': 'not_in_conservation_area'},
|
||||
{'uprn': 6038625, 'is_in_conservation_area': 'not_in_conservation_area'},
|
||||
{'uprn': 34153991, 'is_in_conservation_area': 'unknown'},
|
||||
{'uprn': 10008299676, 'is_in_conservation_area': 'in_conservation_area'},
|
||||
{'uprn': 10008299677, 'is_in_conservation_area': 'in_conservation_area'},
|
||||
{'uprn': 100021039066, 'is_in_conservation_area': 'not_in_conservation_area'},
|
||||
{'uprn': 100021226060, 'is_in_conservation_area': 'in_conservation_area'},
|
||||
{'uprn': 200003489276, 'is_in_conservation_area': 'in_conservation_area'}
|
||||
]
|
||||
|
||||
|
||||
@router.post("/trigger")
|
||||
async def trigger_plan(body: PlanTriggerRequest):
|
||||
|
|
@ -22,30 +54,33 @@ async def trigger_plan(body: PlanTriggerRequest):
|
|||
# Read in the trigger file from s3
|
||||
bucket_name = get_settings().PLAN_TRIGGER_BUCKET
|
||||
plan_input = read_csv_from_s3(bucket_name=bucket_name, filepath=body.trigger_file_path)
|
||||
print(plan_input)
|
||||
|
||||
# TODO: Add validation to the file
|
||||
|
||||
print("What's the token")
|
||||
print(get_settings().EPC_AUTH_TOKEN)
|
||||
|
||||
logger.info("Getting EPC data")
|
||||
epc_client = EpcClient(auth_token=get_settings().EPC_AUTH_TOKEN)
|
||||
input_properties = [
|
||||
Property(postcode=config['postcode'], address1=config['address'], epc_client=epc_client)
|
||||
for config in plan_input
|
||||
]
|
||||
|
||||
logger.info("Getting EPC data")
|
||||
for p in input_properties:
|
||||
p.search_address_epc()
|
||||
p.set_year_built()
|
||||
|
||||
logger.info("Parsing and validating the file")
|
||||
logger.info("Getting coordinates")
|
||||
# This is placeholder, until the full dataset is loaded into the database
|
||||
for p in input_properties:
|
||||
coordinate_data = [x for x in open_uprn_data if x['UPRN'] == int(p.data['uprn'])][0]
|
||||
p.set_coordinates(coordinate_data)
|
||||
|
||||
logger.info("properties")
|
||||
logger.info(input_properties)
|
||||
logger.info("Check if property is in conservation area")
|
||||
for p in input_properties:
|
||||
in_conservation_area = [x for x in in_conservation_area_data if x['uprn'] == int(p.data['uprn'])][0].get(
|
||||
"is_in_conservation_area"
|
||||
)
|
||||
p.set_is_in_conservation_area(in_conservation_area)
|
||||
|
||||
# TODO: get co-ordinates
|
||||
logger.info()
|
||||
|
||||
logger.info("Reading in EPC data")
|
||||
|
||||
return {"message": "Plan triggered"}
|
||||
return {"message": "Plan complete"}
|
||||
|
|
|
|||
|
|
@ -1,7 +1,8 @@
|
|||
from enum import Enum
|
||||
import geopandas as gpd
|
||||
from shapely.geometry import Point
|
||||
from model_data.utils import setup_logger
|
||||
from utils.logger import setup_logger
|
||||
from datatypes.datatypes import OpenUprnCoordinateData
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
|
@ -39,14 +40,20 @@ class ConservationAreaClient:
|
|||
self.gov_data = gpd.read_file(self.gov_path)
|
||||
self.gov_data = self.gov_data.drop(columns=["dataset"])
|
||||
|
||||
def is_in_conservation_area(self, coordinates: dict):
|
||||
def is_in_conservation_area(self, coordinates: OpenUprnCoordinateData):
|
||||
|
||||
"""
|
||||
Check if a property is in a conservation area
|
||||
:param coordinates: dictionary, which should have the OpenUprnCoordinateData format
|
||||
:return:
|
||||
"""
|
||||
|
||||
if not coordinates:
|
||||
raise ValueError("Coordinates have not been set, run get_coordinates() first")
|
||||
|
||||
is_in_conservation_area = self.is_in_conservation_area_historic_england(
|
||||
x_bng=coordinates["x_coordinate"],
|
||||
y_bng=coordinates["y_coordinate"]
|
||||
x_bng=coordinates.X_COORDINATE,
|
||||
y_bng=coordinates.Y_COORDINATE
|
||||
)
|
||||
|
||||
if is_in_conservation_area != "unknown":
|
||||
|
|
@ -55,8 +62,8 @@ class ConservationAreaClient:
|
|||
if is_in_conservation_area == "unknown":
|
||||
# We double check the secondary data source
|
||||
backup = self.is_in_conservation_area_historic_gov(
|
||||
longitude=coordinates["longitude"],
|
||||
latitude=coordinates["latitude"]
|
||||
longitude=coordinates.LONGITUDE,
|
||||
latitude=coordinates.LATITUDE
|
||||
)
|
||||
|
||||
if backup:
|
||||
51
conservation_areas/app.py
Normal file
51
conservation_areas/app.py
Normal file
|
|
@ -0,0 +1,51 @@
|
|||
"""
|
||||
This application reads in the open uprn data from a static location and loads it into
|
||||
our database for querying from other services
|
||||
"""
|
||||
|
||||
import os
|
||||
from conservation_areas.ConservationAreaClient import ConservationAreaClient
|
||||
from datatypes.datatypes import OpenUprnCoordinateData
|
||||
|
||||
|
||||
def app():
|
||||
conservation_area_client = ConservationAreaClient(
|
||||
historic_england_path=os.path.abspath(
|
||||
os.path.dirname(__file__)
|
||||
) + "/model_data/local_data/Historic_Eng_Conservation_Areas/Conservation_Areas.shp",
|
||||
gov_path=os.path.abspath(
|
||||
os.path.dirname(__file__)
|
||||
) + "/model_data/local_data/gov-conservation-area.geojson"
|
||||
)
|
||||
conservation_area_client.read()
|
||||
|
||||
# We need to iterate through the open uprn data and check if the coordinates are in a conservation area
|
||||
open_uprn_data = [
|
||||
{'UPRN': 6032920, 'X_COORDINATE': 535110.0, 'Y_COORDINATE': 181819.0, 'LATITUDE': 51.5191407,
|
||||
'LONGITUDE': -0.0540506},
|
||||
{'UPRN': 6038625, 'X_COORDINATE': 535374.0, 'Y_COORDINATE': 182784.0, 'LATITUDE': 51.5277492,
|
||||
'LONGITUDE': -0.0498772},
|
||||
{'UPRN': 34153991, 'X_COORDINATE': 523238.74, 'Y_COORDINATE': 178003.02, 'LATITUDE': 51.4875579,
|
||||
'LONGITUDE': -0.226392},
|
||||
{'UPRN': 10008299676, 'X_COORDINATE': 533285.0, 'Y_COORDINATE': 184711.0, 'LATITUDE': 51.5455629,
|
||||
'LONGITUDE': -0.0792445},
|
||||
{'UPRN': 10008299677, 'X_COORDINATE': 533285.0, 'Y_COORDINATE': 184711.0, 'LATITUDE': 51.5455629,
|
||||
'LONGITUDE': -0.0792445},
|
||||
{'UPRN': 100021039066, 'X_COORDINATE': 535506.0, 'Y_COORDINATE': 185624.0, 'LATITUDE': 51.5532385,
|
||||
'LONGITUDE': -0.0468833},
|
||||
{'UPRN': 100021226060, 'X_COORDINATE': 529247.0, 'Y_COORDINATE': 187959.0, 'LATITUDE': 51.5756908,
|
||||
'LONGITUDE': -0.1362513},
|
||||
{'UPRN': 200003489276, 'X_COORDINATE': 533210.0, 'Y_COORDINATE': 179442.0, 'LATITUDE': 51.4982309,
|
||||
'LONGITUDE': -0.0823165}
|
||||
]
|
||||
|
||||
result = [
|
||||
{
|
||||
"uprn": coordinates["UPRN"],
|
||||
"is_in_conservation_area": conservation_area_client.is_in_conservation_area(
|
||||
OpenUprnCoordinateData(**coordinates))
|
||||
} for coordinates in
|
||||
open_uprn_data
|
||||
]
|
||||
|
||||
# TODO: Add a method to write to the database
|
||||
16
conservation_areas/requirements.txt
Normal file
16
conservation_areas/requirements.txt
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
attrs==23.1.0
|
||||
certifi==2023.5.7
|
||||
click==8.1.6
|
||||
click-plugins==1.1.1
|
||||
cligj==0.7.2
|
||||
fiona==1.9.4.post1
|
||||
geopandas==0.13.2
|
||||
numpy==1.25.1
|
||||
packaging==23.1
|
||||
pandas==2.0.3
|
||||
pyproj==3.6.0
|
||||
python-dateutil==2.8.2
|
||||
pytz==2023.3
|
||||
shapely==2.0.1
|
||||
six==1.16.0
|
||||
tzdata==2023.3
|
||||
0
datatypes/__init__.py
Normal file
0
datatypes/__init__.py
Normal file
10
datatypes/datatypes.py
Normal file
10
datatypes/datatypes.py
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
from dataclasses import dataclass
|
||||
|
||||
|
||||
@dataclass
|
||||
class OpenUprnCoordinateData:
|
||||
UPRN: int
|
||||
X_COORDINATE: float
|
||||
Y_COORDINATE: float
|
||||
LATITUDE: float
|
||||
LONGITUDE: float
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
import math
|
||||
from tqdm import tqdm
|
||||
from dbfread import DBF
|
||||
from model_data.utils import setup_logger
|
||||
from utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ from typing import List, Dict
|
|||
import pandas as pd
|
||||
from tqdm import tqdm
|
||||
import string
|
||||
from model_data.utils import setup_logger
|
||||
from utils.logger import setup_logger
|
||||
from fuzzywuzzy import fuzz
|
||||
import numpy as np
|
||||
|
||||
|
|
|
|||
|
|
@ -68,24 +68,12 @@ class Property(BaseUtility):
|
|||
|
||||
self.data = response["rows"][0]
|
||||
|
||||
def get_coordinates(self, open_uprn_client):
|
||||
def set_coordinates(self, coordinates):
|
||||
"""
|
||||
This method utlises the OpenOprnClient to get the coordinates of the property
|
||||
The OpenOprnClient interfactes with the Ordinance Survey Open UPRN database to extract
|
||||
property coordinates. This database holds lookups between UPRN and coordinates.
|
||||
:param open_uprn_client: Instance of OpenOprnClient. This method expects the client to have already read
|
||||
the data
|
||||
This method sets the coordinates of the property, given the open uprn data
|
||||
:param coordinates: dictionary
|
||||
"""
|
||||
|
||||
if open_uprn_client.data is None:
|
||||
raise ValueError("OpenUprnClient has not read data")
|
||||
|
||||
self.coordinates = (
|
||||
open_uprn_client.data[open_uprn_client.data["UPRN"] == int(self.data["uprn"])]
|
||||
.to_dict("records")[0]
|
||||
)
|
||||
|
||||
self.coordinates = {key.lower(): value for key, value in self.coordinates.items()}
|
||||
self.coordinates = {key.lower(): value for key, value in coordinates.items()}
|
||||
|
||||
def get_components(self, cleaner):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ from model_data.EpcClean import EpcClean
|
|||
|
||||
from statsmodels.stats.outliers_influence import variance_inflation_factor
|
||||
from tqdm import tqdm
|
||||
from model_data.utils import setup_logger
|
||||
from utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@ from tqdm import tqdm
|
|||
import os
|
||||
from model_data.BoreholeClient import BoreholeClient
|
||||
from model_data.LandRegistryClient import LandRegistryClient
|
||||
from model_data.ConservationAreaClient import ConservationAreaClient
|
||||
|
||||
from model_data.temp_inputs import input_data
|
||||
from model_data.Property import Property
|
||||
|
|
@ -10,7 +9,7 @@ from model_data.config import EPC_AUTH_TOKEN
|
|||
from epc_api.client import EpcClient
|
||||
from model_data.downloader import pagenated_epc_download
|
||||
from model_data.EpcClean import EpcClean
|
||||
from model_data.OpenUprnClient import OpenUprnClient
|
||||
from open_uprn.OpenUprnClient import OpenUprnClient
|
||||
from model_data.analysis.UvalueEstimations import UvalueEstimations
|
||||
|
||||
LAND_REGISTRY_PATHS = [
|
||||
|
|
@ -245,11 +244,6 @@ def handler():
|
|||
|
||||
print(results.summary())
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
|
||||
import numpy as np
|
||||
|
||||
grouped_error = []
|
||||
groupby = ["mainheat-description"]
|
||||
for group, data in model_data.groupby(groupby, observed=True):
|
||||
|
|
@ -304,3 +298,39 @@ def handler():
|
|||
result = correct_spelling("excelent lighting in this hosehold")
|
||||
print(result)
|
||||
'excellent lighting in this household'
|
||||
|
||||
|
||||
def app():
|
||||
"""
|
||||
For a pre-defined list of constituencies and property types, we'll download EPC data from the API
|
||||
and produce a dataset of cleaned fields so that when we get new properties, we can quickly
|
||||
sanitise any description data
|
||||
:return:
|
||||
"""
|
||||
|
||||
# We pull properties from local authorities, by property type. This will allow us to build
|
||||
# a dataset of up to 10k properties per local authority/property type combination
|
||||
# For particularly old EPC data, we have inconsistent records so we'll only include EPCS that were
|
||||
# conducted after 2010, since SAP09 was introduced in 2009 an later SAP12 was introduced in England
|
||||
# and Wales from 31 July 2014
|
||||
# Download data from August 2014 onwards
|
||||
data = []
|
||||
for c in tqdm(constituencies):
|
||||
for pt in property_types:
|
||||
data.extend(
|
||||
pagenated_epc_download(
|
||||
client=epc_client,
|
||||
params={
|
||||
"constituency": c,
|
||||
"property-type": pt,
|
||||
"from-month": 8,
|
||||
"from-year": 2014,
|
||||
},
|
||||
page_size=5000,
|
||||
n_pages=10,
|
||||
)
|
||||
)
|
||||
|
||||
# Incorporate input data into cleaning
|
||||
cleaner = EpcClean(data + [p.data for p in input_properties])
|
||||
cleaner.clean()
|
||||
|
|
|
|||
|
|
@ -10,4 +10,8 @@ pyproj
|
|||
pint
|
||||
mip
|
||||
pyspellchecker
|
||||
textblob
|
||||
textblob
|
||||
pandas==2.0.3
|
||||
numpy==1.25.1
|
||||
python-dateutil==2.8.2
|
||||
six==1.16.0
|
||||
|
|
@ -1,6 +1,3 @@
|
|||
geopandas
|
||||
xgboost
|
||||
statsmodels
|
||||
scikit-learn
|
||||
pandas==2.0.3
|
||||
numpy==1.25.1
|
||||
|
|
@ -3,7 +3,7 @@ import pandas as pd
|
|||
from unittest.mock import Mock
|
||||
from epc_api.client import EpcClient
|
||||
from model_data.Property import Property
|
||||
from model_data.OpenUprnClient import OpenUprnClient
|
||||
from open_uprn.OpenUprnClient import OpenUprnClient
|
||||
from model_data.EpcClean import EpcClean
|
||||
|
||||
# Define some test data
|
||||
|
|
|
|||
|
|
@ -1,7 +1,8 @@
|
|||
import logging
|
||||
from io import StringIO
|
||||
from unittest.mock import patch
|
||||
from model_data.utils import setup_logger, is_percentage_or_number, correct_spelling
|
||||
from model_data.utils import is_percentage_or_number, correct_spelling
|
||||
from utils.logger import setup_logger
|
||||
|
||||
|
||||
class TestLogger:
|
||||
|
|
|
|||
|
|
@ -1,4 +1,3 @@
|
|||
import logging
|
||||
import re
|
||||
from textblob import TextBlob
|
||||
|
||||
|
|
@ -6,40 +5,6 @@ from textblob import TextBlob
|
|||
PERCENTAGE_PATTERN = re.compile(r'^\d+%?$')
|
||||
|
||||
|
||||
def setup_logger(log_file=None, level=logging.INFO, overwrite_handler=False):
|
||||
# Create a logger and set the logging level
|
||||
logger = logging.getLogger()
|
||||
logger.setLevel(level)
|
||||
|
||||
# if logger already has handlers, just return it
|
||||
if logger.hasHandlers() and not overwrite_handler:
|
||||
return logger
|
||||
|
||||
# Define the log message format
|
||||
log_format = "%(asctime)s [%(levelname)s] %(message)s"
|
||||
date_format = "%Y-%m-%d %H:%M:%S"
|
||||
formatter = logging.Formatter(log_format, datefmt=date_format)
|
||||
|
||||
# Create a file handler and set the file path and format
|
||||
if log_file:
|
||||
file_handler = logging.FileHandler(log_file)
|
||||
file_handler.setLevel(level)
|
||||
file_handler.setFormatter(formatter)
|
||||
logger.addHandler(file_handler)
|
||||
|
||||
# Create a console handler and set the format
|
||||
console_handler = logging.StreamHandler()
|
||||
console_handler.setLevel(level)
|
||||
|
||||
# Set the formatter for the handlers
|
||||
console_handler.setFormatter(formatter)
|
||||
|
||||
# Add the handlers to the logger
|
||||
logger.addHandler(console_handler)
|
||||
|
||||
return logger
|
||||
|
||||
|
||||
def is_percentage_or_number(s):
|
||||
# re.match returns None if the string does not match the pattern
|
||||
return PERCENTAGE_PATTERN.match(s) is not None
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
import pandas as pd
|
||||
from model_data.utils import setup_logger
|
||||
from utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
|
@ -12,9 +12,9 @@ class OpenUprnClient:
|
|||
|
||||
# TODO: Document this
|
||||
|
||||
def __init__(self, path, uprns):
|
||||
def __init__(self, path, uprns=None):
|
||||
self.path = path
|
||||
self.uprns = [int(x) for x in uprns]
|
||||
self.uprns = [int(x) for x in uprns] if uprns else None
|
||||
self.data = None
|
||||
|
||||
def read(self):
|
||||
|
|
@ -25,6 +25,7 @@ class OpenUprnClient:
|
|||
logger.info("Reading in open uprn data")
|
||||
|
||||
df = pd.read_csv(self.path)
|
||||
df = df[df["UPRN"].isin(self.uprns)]
|
||||
if self.uprns:
|
||||
df = df[df["UPRN"].isin(self.uprns)]
|
||||
|
||||
self.data = df
|
||||
0
open_uprn/__init__.py
Normal file
0
open_uprn/__init__.py
Normal file
18
open_uprn/app.py
Normal file
18
open_uprn/app.py
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
"""
|
||||
This application reads in the open uprn data from a static location and loads it into
|
||||
our database for querying from other services
|
||||
"""
|
||||
|
||||
import os
|
||||
from open_uprn.OpenUprnClient import OpenUprnClient
|
||||
|
||||
|
||||
def app():
|
||||
open_uprn_client = OpenUprnClient(
|
||||
path=os.path.abspath(
|
||||
os.path.dirname(__file__)
|
||||
) + "/model_data/local_data/osopenuprn_202306_csv/osopenuprn_202305.csv",
|
||||
)
|
||||
open_uprn_client.read()
|
||||
|
||||
# TODO: Add a method to write to the database
|
||||
13
open_uprn/requirements.txt
Normal file
13
open_uprn/requirements.txt
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
numpy==1.25.1
|
||||
pandas==2.0.3
|
||||
python-dateutil==2.8.2
|
||||
pytz==2023.3
|
||||
six==1.16.0
|
||||
tzdata==2023.3
|
||||
click==8.1.6
|
||||
joblib==1.3.1
|
||||
nltk==3.8.1
|
||||
regex==2023.6.3
|
||||
textblob==0.17.1
|
||||
tqdm==4.65.0
|
||||
|
||||
7606
package-lock.json
generated
Normal file
7606
package-lock.json
generated
Normal file
File diff suppressed because it is too large
Load diff
6
package.json
Normal file
6
package.json
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
{
|
||||
"devDependencies": {
|
||||
"serverless-domain-manager": "^7.1.0",
|
||||
"serverless-python-requirements": "^6.0.0"
|
||||
}
|
||||
}
|
||||
|
|
@ -38,6 +38,8 @@ package:
|
|||
- infrastructure/**
|
||||
- data_collection/**
|
||||
- node_modules/**
|
||||
- conservation_areas/**
|
||||
- open_uprn/**
|
||||
|
||||
plugins:
|
||||
- serverless-python-requirements
|
||||
|
|
|
|||
0
utils/__init__.py
Normal file
0
utils/__init__.py
Normal file
35
utils/logger.py
Normal file
35
utils/logger.py
Normal file
|
|
@ -0,0 +1,35 @@
|
|||
import logging
|
||||
|
||||
|
||||
def setup_logger(log_file=None, level=logging.INFO, overwrite_handler=False):
|
||||
# Create a logger and set the logging level
|
||||
logger = logging.getLogger()
|
||||
logger.setLevel(level)
|
||||
|
||||
# if logger already has handlers, just return it
|
||||
if logger.hasHandlers() and not overwrite_handler:
|
||||
return logger
|
||||
|
||||
# Define the log message format
|
||||
log_format = "%(asctime)s [%(levelname)s] %(message)s"
|
||||
date_format = "%Y-%m-%d %H:%M:%S"
|
||||
formatter = logging.Formatter(log_format, datefmt=date_format)
|
||||
|
||||
# Create a file handler and set the file path and format
|
||||
if log_file:
|
||||
file_handler = logging.FileHandler(log_file)
|
||||
file_handler.setLevel(level)
|
||||
file_handler.setFormatter(formatter)
|
||||
logger.addHandler(file_handler)
|
||||
|
||||
# Create a console handler and set the format
|
||||
console_handler = logging.StreamHandler()
|
||||
console_handler.setLevel(level)
|
||||
|
||||
# Set the formatter for the handlers
|
||||
console_handler.setFormatter(formatter)
|
||||
|
||||
# Add the handlers to the logger
|
||||
logger.addHandler(console_handler)
|
||||
|
||||
return logger
|
||||
Loading…
Add table
Reference in a new issue