mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
53 lines
2.1 KiB
Python
53 lines
2.1 KiB
Python
from utils.s3 import read_from_s3, list_files_and_subfolders_in_s3_folder, list_xmls_in_s3_folder
|
|
from utils.logger import setup_logger
|
|
from etl.xml_survey_extraction.XmlParser import XmlParser
|
|
import os
|
|
from io import BytesIO
|
|
|
|
logger = setup_logger()
|
|
|
|
SURVEYORS = "JAFFERSONS ENERGY CONSULTANTS"
|
|
PROJECT_CODE = "VDE001"
|
|
BUCKET = "retrofit-energy-assessments-dev"
|
|
PORTFOLIO_ID = None
|
|
|
|
|
|
def main():
|
|
"""
|
|
This function executes the main process, which will retrieve data from the specified locations, extract the data
|
|
fields and store them to our database
|
|
:return:
|
|
"""
|
|
|
|
# TODO: Build solution to get this data from Onedrive and store what we need in S3
|
|
# In s3, we have a bucket called retrofit-energy-assessments-{stage} which contains the data we need
|
|
# The data is stored in a folder called {surveyors}/{project_code}/{uprn}
|
|
# We'll need to get the uprn from the folder name, which we can do with EpcSearcher class
|
|
|
|
#
|
|
energy_assessments = list_files_and_subfolders_in_s3_folder(
|
|
bucket_name=BUCKET, folder_name=f"{SURVEYORS}/{PROJECT_CODE}/"
|
|
)
|
|
|
|
logger.info(f"Found {len(energy_assessments)} energy assessments for {SURVEYORS} and {PROJECT_CODE}")
|
|
assessments_map = {}
|
|
for assessment in energy_assessments:
|
|
uploaded_xmls = list_xmls_in_s3_folder(
|
|
bucket_name=BUCKET, folder_name=os.path.join(assessment, "docs & plans")
|
|
)
|
|
uprn = int(assessment.rstrip("/").split("/")[-1])
|
|
assessments_map[uprn] = uploaded_xmls
|
|
|
|
logger.info(f"Exatracted XMLS for the energy assessments")
|
|
|
|
# For each property, we download the xmls and extract the data
|
|
for uprn, xmls in assessments_map.items():
|
|
extracted_data = {}
|
|
for xml in xmls:
|
|
xml_data = read_from_s3(bucket_name=BUCKET, s3_file_name=xml)
|
|
xml_data_io = BytesIO(xml_data)
|
|
xml_parser = XmlParser(file=xml_data_io, filekey=xml, uprn=uprn)
|
|
xml_parser.run()
|
|
logger.info(f"Extracted data from {xml}")
|
|
|
|
# TODO: Set a portfolio ID, Target and Automatically upload the asset list and create the event for the portfolio
|