import boto3 import csv from io import StringIO import string import secrets import logging import pandas as pd from io import BytesIO def setup_logger(log_file=None, level=logging.INFO, overwrite_handler=False): # Create a logger and set the logging level logger = logging.getLogger() logger.setLevel(level) # if logger already has handlers, just return it if logger.hasHandlers() and not overwrite_handler: return logger # Define the log message format log_format = "%(asctime)s [%(levelname)s] %(message)s" date_format = "%Y-%m-%d %H:%M:%S" formatter = logging.Formatter(log_format, datefmt=date_format) # Create a file handler and set the file path and format if log_file: file_handler = logging.FileHandler(log_file) file_handler.setLevel(level) file_handler.setFormatter(formatter) logger.addHandler(file_handler) # Create a console handler and set the format console_handler = logging.StreamHandler() console_handler.setLevel(level) # Set the formatter for the handlers console_handler.setFormatter(formatter) # Add the handlers to the logger logger.addHandler(console_handler) return logger def read_csv_from_s3(bucket_name, filepath): s3 = boto3.client('s3') # Get the object from s3 s3_object = s3.get_object(Bucket=bucket_name, Key=filepath) # Read the CSV body from the s3 object body = s3_object['Body'].read() # Use StringIO to create a file-like object from the string csv_data = StringIO(body.decode('utf-8')) # Use csv library to read it into a list of dictionaries reader = csv.DictReader(csv_data) data = list(reader) return data def generate_api_key(): # Define the characters that will be used to generate the api key characters = string.ascii_letters + string.digits # Generate a 40 character long api key api_key = ''.join(secrets.choice(characters) for _ in range(40)) return api_key def sap_to_epc(sap_points: int): """ Simple utility function to convert SAP points to EPC rating. :param sapPoints: numerical value of SAP points, typically between 0 and 100 :return: """ if sap_points <= 0 or sap_points > 100: raise ValueError("SAP points should be between 1 and 100.") if sap_points >= 92: return "A" elif sap_points >= 81: return "B" elif sap_points >= 69: return "C" elif sap_points >= 55: return "D" elif sap_points >= 39: return "E" elif sap_points >= 21: return "F" else: return "G" def epc_to_sap_lower_bound(epc: str): """ Given an EPC rating, returns the lower bound SAP score required to hit that EPC rating :param epc: EPC rating, between A and G :return: """ if epc == "A": return 92 elif epc == "B": return 81 elif epc == "C": return 69 elif epc == "D": return 55 elif epc == "E": return 39 elif epc == "F": return 21 elif epc == "G": return 1 else: raise ValueError("EPC rating should be between A and G") def read_parquet_from_s3(bucket_name, file_key): client = boto3.client('s3') # Get the object s3_object = client.get_object(Bucket=bucket_name, Key=file_key) # Read the CSV body into a DataFrame csv_body = s3_object["Body"].read() df = pd.read_parquet(BytesIO(csv_body)) return df def save_dataframe_to_s3_parquet(df, bucket_name, file_key): """ Save a pandas DataFrame to S3 as a Parquet file. :param df: The pandas DataFrame. :param bucket_name: Name of the S3 bucket. :param file_key: Key of the file (including directory path within the bucket) """ # Convert the DataFrame to a Parquet format in memory parquet_buffer = BytesIO() df.to_parquet(parquet_buffer) # Create the boto3 client s3 = boto3.resource('s3') # Upload the Parquet file to S3 s3.Object(bucket_name, file_key).put(Body=parquet_buffer.getvalue())