Model/backend/app/utils.py
2024-01-16 17:11:35 +00:00

140 lines
3.7 KiB
Python

import boto3
import csv
from io import StringIO
import string
import secrets
import logging
from io import BytesIO
def setup_logger(log_file=None, level=logging.INFO, overwrite_handler=False):
# Create a logger and set the logging level
logger = logging.getLogger()
logger.setLevel(level)
# if logger already has handlers, just return it
if logger.hasHandlers() and not overwrite_handler:
return logger
# Define the log message format
log_format = "%(asctime)s [%(levelname)s] %(message)s"
date_format = "%Y-%m-%d %H:%M:%S"
formatter = logging.Formatter(log_format, datefmt=date_format)
# Create a file handler and set the file path and format
if log_file:
file_handler = logging.FileHandler(log_file)
file_handler.setLevel(level)
file_handler.setFormatter(formatter)
logger.addHandler(file_handler)
# Create a console handler and set the format
console_handler = logging.StreamHandler()
console_handler.setLevel(level)
# Set the formatter for the handlers
console_handler.setFormatter(formatter)
# Add the handlers to the logger
logger.addHandler(console_handler)
return logger
def read_csv_from_s3(bucket_name, filepath):
s3 = boto3.client('s3')
# Get the object from s3
s3_object = s3.get_object(Bucket=bucket_name, Key=filepath)
# Read the CSV body from the s3 object
body = s3_object['Body'].read()
# Use StringIO to create a file-like object from the string
csv_data = StringIO(body.decode('utf-8'))
# Use csv library to read it into a list of dictionaries
reader = csv.DictReader(csv_data)
data = list(reader)
return data
def generate_api_key():
# Define the characters that will be used to generate the api key
characters = string.ascii_letters + string.digits
# Generate a 40 character long api key
api_key = ''.join(secrets.choice(characters) for _ in range(40))
return api_key
def sap_to_epc(sap_points: int | float):
"""
Simple utility function to convert SAP points to EPC rating.
:param sap_points: numerical value of SAP points, typically between 0 and 100
:return:
"""
if sap_points <= 0 or sap_points > 100:
raise ValueError("SAP points should be between 1 and 100.")
if sap_points >= 92:
return "A"
elif sap_points >= 81:
return "B"
elif sap_points >= 69:
return "C"
elif sap_points >= 55:
return "D"
elif sap_points >= 39:
return "E"
elif sap_points >= 21:
return "F"
else:
return "G"
def epc_to_sap_lower_bound(epc: str):
"""
Given an EPC rating, returns the lower bound SAP score required
to hit that EPC rating
:param epc: EPC rating, between A and G
:return:
"""
if epc == "A":
return 92
elif epc == "B":
return 81
elif epc == "C":
return 69
elif epc == "D":
return 55
elif epc == "E":
return 39
elif epc == "F":
return 21
elif epc == "G":
return 1
else:
raise ValueError("EPC rating should be between A and G")
def save_dataframe_to_s3_parquet(df, bucket_name, file_key):
"""
Save a pandas DataFrame to S3 as a Parquet file.
:param df: The pandas DataFrame.
:param bucket_name: Name of the S3 bucket.
:param file_key: Key of the file (including directory path within the bucket)
"""
# Convert the DataFrame to a Parquet format in memory
parquet_buffer = BytesIO()
df.to_parquet(parquet_buffer)
# Create the boto3 client
s3 = boto3.resource('s3')
# Upload the Parquet file to S3
s3.Object(bucket_name, file_key).put(Body=parquet_buffer.getvalue())