mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
141 lines
3.7 KiB
Python
141 lines
3.7 KiB
Python
import boto3
|
|
import csv
|
|
from io import StringIO
|
|
import string
|
|
import secrets
|
|
import logging
|
|
import pandas as pd
|
|
from io import BytesIO
|
|
|
|
|
|
def setup_logger(log_file=None, level=logging.INFO, overwrite_handler=False):
|
|
# Create a logger and set the logging level
|
|
logger = logging.getLogger()
|
|
logger.setLevel(level)
|
|
|
|
# if logger already has handlers, just return it
|
|
if logger.hasHandlers() and not overwrite_handler:
|
|
return logger
|
|
|
|
# Define the log message format
|
|
log_format = "%(asctime)s [%(levelname)s] %(message)s"
|
|
date_format = "%Y-%m-%d %H:%M:%S"
|
|
formatter = logging.Formatter(log_format, datefmt=date_format)
|
|
|
|
# Create a file handler and set the file path and format
|
|
if log_file:
|
|
file_handler = logging.FileHandler(log_file)
|
|
file_handler.setLevel(level)
|
|
file_handler.setFormatter(formatter)
|
|
logger.addHandler(file_handler)
|
|
|
|
# Create a console handler and set the format
|
|
console_handler = logging.StreamHandler()
|
|
console_handler.setLevel(level)
|
|
|
|
# Set the formatter for the handlers
|
|
console_handler.setFormatter(formatter)
|
|
|
|
# Add the handlers to the logger
|
|
logger.addHandler(console_handler)
|
|
|
|
return logger
|
|
|
|
|
|
def read_csv_from_s3(bucket_name, filepath):
|
|
s3 = boto3.client('s3')
|
|
|
|
# Get the object from s3
|
|
s3_object = s3.get_object(Bucket=bucket_name, Key=filepath)
|
|
|
|
# Read the CSV body from the s3 object
|
|
body = s3_object['Body'].read()
|
|
|
|
# Use StringIO to create a file-like object from the string
|
|
csv_data = StringIO(body.decode('utf-8'))
|
|
|
|
# Use csv library to read it into a list of dictionaries
|
|
reader = csv.DictReader(csv_data)
|
|
data = list(reader)
|
|
|
|
return data
|
|
|
|
|
|
def generate_api_key():
|
|
# Define the characters that will be used to generate the api key
|
|
characters = string.ascii_letters + string.digits
|
|
# Generate a 40 character long api key
|
|
api_key = ''.join(secrets.choice(characters) for _ in range(40))
|
|
return api_key
|
|
|
|
|
|
def sap_to_epc(sap_points: int | float):
|
|
"""
|
|
Simple utility function to convert SAP points to EPC rating.
|
|
:param sap_points: numerical value of SAP points, typically between 0 and 100
|
|
:return:
|
|
"""
|
|
|
|
if sap_points <= 0 or sap_points > 100:
|
|
raise ValueError("SAP points should be between 1 and 100.")
|
|
|
|
if sap_points >= 92:
|
|
return "A"
|
|
elif sap_points >= 81:
|
|
return "B"
|
|
elif sap_points >= 69:
|
|
return "C"
|
|
elif sap_points >= 55:
|
|
return "D"
|
|
elif sap_points >= 39:
|
|
return "E"
|
|
elif sap_points >= 21:
|
|
return "F"
|
|
else:
|
|
return "G"
|
|
|
|
|
|
def epc_to_sap_lower_bound(epc: str):
|
|
"""
|
|
Given an EPC rating, returns the lower bound SAP score required
|
|
to hit that EPC rating
|
|
:param epc: EPC rating, between A and G
|
|
:return:
|
|
"""
|
|
|
|
if epc == "A":
|
|
return 92
|
|
elif epc == "B":
|
|
return 81
|
|
elif epc == "C":
|
|
return 69
|
|
elif epc == "D":
|
|
return 55
|
|
elif epc == "E":
|
|
return 39
|
|
elif epc == "F":
|
|
return 21
|
|
elif epc == "G":
|
|
return 1
|
|
else:
|
|
raise ValueError("EPC rating should be between A and G")
|
|
|
|
|
|
def save_dataframe_to_s3_parquet(df, bucket_name, file_key):
|
|
"""
|
|
Save a pandas DataFrame to S3 as a Parquet file.
|
|
|
|
:param df: The pandas DataFrame.
|
|
:param bucket_name: Name of the S3 bucket.
|
|
:param file_key: Key of the file (including directory path within the bucket)
|
|
"""
|
|
|
|
# Convert the DataFrame to a Parquet format in memory
|
|
parquet_buffer = BytesIO()
|
|
df.to_parquet(parquet_buffer)
|
|
|
|
# Create the boto3 client
|
|
s3 = boto3.resource('s3')
|
|
|
|
# Upload the Parquet file to S3
|
|
s3.Object(bucket_name, file_key).put(Body=parquet_buffer.getvalue())
|