mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
Adding in get_cleaned function
This commit is contained in:
parent
98b6261fe1
commit
dd90065874
6 changed files with 69 additions and 5540 deletions
|
|
@ -8,6 +8,7 @@ from backend.app.config import get_settings
|
|||
from backend.Property import Property
|
||||
from epc_api.client import EpcClient
|
||||
from utils.logger import setup_logger
|
||||
from utils.s3 import read_from_s3
|
||||
from recommendations.FloorRecommendations import FloorRecommendations
|
||||
from recommendations.WallRecommendations import WallRecommendations
|
||||
from utils.uvalue_estimates import classify_decile_newvalues
|
||||
|
|
@ -17,6 +18,7 @@ from sqlalchemy.orm import sessionmaker
|
|||
from sqlalchemy.exc import IntegrityError, OperationalError
|
||||
from datetime import datetime
|
||||
import pandas as pd
|
||||
import msgpack
|
||||
|
||||
# model apis
|
||||
from backend.ml_models.sap_change_model.api import SAPChangeModelAPI
|
||||
|
|
@ -45,7 +47,6 @@ from model_data.simulation_system.core.Settings import (
|
|||
# TODO: This is placeholder until data is stored in DB
|
||||
from backend.app.plan.uvalue_estimates_walls import uvalue_estimates_walls
|
||||
from backend.app.plan.uvalue_estimates_floors import uvalue_estimates_floors
|
||||
from backend.app.plan.temp_cleaned_data import cleaned
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
|
@ -138,6 +139,25 @@ def insert_temp_recommendation_id(property_recommendations):
|
|||
return property_recommendations
|
||||
|
||||
|
||||
def get_cleaned():
|
||||
"""
|
||||
This function will retrieve the cleaned dataset from s3 which has the cleaned
|
||||
descriptions for the epc dataset
|
||||
|
||||
This data is stored in MessagePack format and therefore needs to be decoded
|
||||
:return:
|
||||
"""
|
||||
|
||||
cleaned = read_from_s3(
|
||||
s3_file_name="cleaned_epc_data/cleaned.bson",
|
||||
bucket_name="retrofit-data-{environment}".format(environment=get_settings().ENVIRONMENT)
|
||||
)
|
||||
|
||||
cleaned = msgpack.unpackb(cleaned, raw=False)
|
||||
|
||||
return cleaned
|
||||
|
||||
|
||||
def score_measures():
|
||||
"""
|
||||
This wrapper function prepares data to be passed to the sap model api
|
||||
|
|
@ -220,8 +240,10 @@ async def trigger_plan(body: PlanTriggerRequest):
|
|||
# table probably won't be very large and won't be updated that often. It might be better to
|
||||
# store this data in s3 load it into memory when the app starts up. We will test this
|
||||
|
||||
logger.info("Reading in materials and cleaned datasets")
|
||||
materials = get_materials(session)
|
||||
materials_by_type = filter_materials(materials)
|
||||
cleaned = get_cleaned()
|
||||
|
||||
logger.info("Getting components and properties recommendations")
|
||||
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -1,3 +1,4 @@
|
|||
msgpack==1.0.5
|
||||
anyio==3.7.1
|
||||
cffi==1.15.1
|
||||
click==8.1.3
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ from model_data.EpcClean import EpcClean
|
|||
from model_data.analysis.UvalueEstimations import UvalueEstimations
|
||||
from model_data.simulation_system.core.Settings import EARLIEST_EPC_DATE
|
||||
from pathlib import Path
|
||||
from model_data.utils import save_data_to_s3
|
||||
from utils.s3 import save_data_to_s3
|
||||
|
||||
LAND_REGISTRY_PATHS = [
|
||||
os.path.abspath(os.path.dirname(__file__)) + "/model_data/local_data/pp-monthly-update-new-version.csv",
|
||||
|
|
|
|||
|
|
@ -1,6 +1,4 @@
|
|||
import boto3
|
||||
from botocore.exceptions import NoCredentialsError, PartialCredentialsError
|
||||
import pandas as pd
|
||||
from io import BytesIO
|
||||
import re
|
||||
from textblob import TextBlob
|
||||
|
|
@ -48,45 +46,3 @@ def save_dataframe_to_s3_parquet(df, bucket_name, file_key):
|
|||
|
||||
# Upload the Parquet file to S3
|
||||
client.put_object(Bucket=bucket_name, Key=file_key, Body=parquet_buffer.getvalue())
|
||||
|
||||
|
||||
def save_data_to_s3(data, bucket_name, s3_file_name):
|
||||
"""
|
||||
Save an object to an S3 bucket
|
||||
|
||||
:param data: The data to save
|
||||
:param bucket_name: The name of the S3 bucket
|
||||
:param s3_file_name: The file name to use for the saved data in S3
|
||||
"""
|
||||
# Ensure you have AWS credentials set up - either via environment variables, AWS CLI, or IAM roles
|
||||
try:
|
||||
s3 = boto3.client('s3')
|
||||
except NoCredentialsError:
|
||||
print("Credentials not available.")
|
||||
return
|
||||
except PartialCredentialsError:
|
||||
print("Incomplete credentials provided.")
|
||||
return
|
||||
|
||||
try:
|
||||
s3.put_object(Bucket=bucket_name, Key=s3_file_name, Body=data)
|
||||
print(f'Successfully uploaded data to {bucket_name}/{s3_file_name}')
|
||||
except Exception as e:
|
||||
print(f'Failed to upload data to {bucket_name}/{s3_file_name}: {str(e)}')
|
||||
|
||||
|
||||
def read_from_s3(bucket_name, s3_file_name):
|
||||
"""
|
||||
Read an object from s3. Decoding of the data is left for outside of this function
|
||||
|
||||
:param bucket_name: The name of the S3 bucket
|
||||
:param s3_file_name: The file name to use for the saved data in S3
|
||||
"""
|
||||
# Initialize a session using Amazon S3
|
||||
s3 = boto3.resource('s3')
|
||||
|
||||
# Get the MessagePack data from S3
|
||||
obj = s3.Object(bucket_name, s3_file_name)
|
||||
data = obj.get()['Body'].read()
|
||||
|
||||
return data
|
||||
|
|
|
|||
44
utils/s3.py
Normal file
44
utils/s3.py
Normal file
|
|
@ -0,0 +1,44 @@
|
|||
import boto3
|
||||
from botocore.exceptions import NoCredentialsError, PartialCredentialsError
|
||||
|
||||
|
||||
def read_from_s3(bucket_name, s3_file_name):
|
||||
"""
|
||||
Read an object from s3. Decoding of the data is left for outside of this function
|
||||
|
||||
:param bucket_name: The name of the S3 bucket
|
||||
:param s3_file_name: The file name to use for the saved data in S3
|
||||
"""
|
||||
# Initialize a session using Amazon S3
|
||||
s3 = boto3.resource('s3')
|
||||
|
||||
# Get the MessagePack data from S3
|
||||
obj = s3.Object(bucket_name, s3_file_name)
|
||||
data = obj.get()['Body'].read()
|
||||
|
||||
return data
|
||||
|
||||
|
||||
def save_data_to_s3(data, bucket_name, s3_file_name):
|
||||
"""
|
||||
Save an object to an S3 bucket
|
||||
|
||||
:param data: The data to save
|
||||
:param bucket_name: The name of the S3 bucket
|
||||
:param s3_file_name: The file name to use for the saved data in S3
|
||||
"""
|
||||
# Ensure you have AWS credentials set up - either via environment variables, AWS CLI, or IAM roles
|
||||
try:
|
||||
s3 = boto3.client('s3')
|
||||
except NoCredentialsError:
|
||||
print("Credentials not available.")
|
||||
return
|
||||
except PartialCredentialsError:
|
||||
print("Incomplete credentials provided.")
|
||||
return
|
||||
|
||||
try:
|
||||
s3.put_object(Bucket=bucket_name, Key=s3_file_name, Body=data)
|
||||
print(f'Successfully uploaded data to {bucket_name}/{s3_file_name}')
|
||||
except Exception as e:
|
||||
print(f'Failed to upload data to {bucket_name}/{s3_file_name}: {str(e)}')
|
||||
Loading…
Add table
Reference in a new issue