added pre_ste_note

This commit is contained in:
Jun-te Kim 2025-04-28 14:06:47 +00:00
parent 9049704888
commit 581b86a89e
7 changed files with 52 additions and 6 deletions

View file

@ -1,6 +1,8 @@
from etl.hubSpotClient.hubspot import HubSpotClient, DealStage
from etl.surveyPrice.surveyPrice import SurveyPrice
from etl.surveyedData.surveryedData import surveyedDataProcessor
from etl.scraper.scraper import SharePointScraper, SharePointInstaller
from urllib.parse import unquote
@ -36,8 +38,52 @@ class HubspotTodb():
return final_path
else:
return decoded_path.strip('/')
def get_sharepoint_scraper(self, installer):
sp = None
if installer.upper() == "J & J CRUMP":
sp = SharePointScraper(SharePointInstaller.JJC)
elif installer.upper() == "SCIS":
sp = SharePointScraper(SharePointInstaller.SOUTH_COAST_INSULATION)
else:
sp = None
return sp
def create_files_locally(self, sp, path, address):
address_paths = {}
file_names_to_download = {}
avoid = [".jpg",".mov", ".JPG", ".heic", ".HEIC", ".png", ".PNG", ".jpeg", ".JPEG", ".mov", ".MOV", ".mp4", ".MP4"]
microsoft_graph_data = sp.get_folders_in_path(path)
for file in microsoft_graph_data['value']:
if 'file' in file:
if any(file["name"].endswith(ext) for ext in avoid):
continue
file_names_to_download.update({file["name"]: file['@microsoft.graph.downloadUrl']})
each_file = []
for file_name, url in file_names_to_download.items():
content = sp.get_file_content(url)
file_path = sp.create_temp_file(content, f"{address}/{file_name}")
each_file.append(file_path)
address_paths.update({address: each_file})
return address_paths
def gather_data_from_each_sharepoint(self):
self.get_all_deals()
for _, row in self.deals_in_hubspot.iterrows():
print(self.get_sharepoint_path(row["HUBSPOT_SHAREPOINT_PATH"]))
sp = self.get_sharepoint_scraper(row["HUBSPOT_INSTALLER"])
path = self.get_sharepoint_path(row["HUBSPOT_SHAREPOINT_PATH"])
data_loc = self.create_files_locally(sp, path, row["HUBSPOT_DEAL_ADDRESS"])
for add, file_loc in data_loc.items():
self.data_in_sharepoint.append(surveyedDataProcessor(add, file_loc))
def print_address(self):
self.gather_data_from_each_sharepoint()
for dp in self.data_in_sharepoint:
print(dp.address)

View file

@ -10,4 +10,4 @@ from etl.db.load import HubspotTodb
dbLoader = HubspotTodb()
dbLoader.gather_data_from_each_sharepoint()
dbLoader.print_address()

View file

@ -1,5 +1,5 @@
from etl.pdfReader.reportType import ReportType
from etl.transform.types import (
from etl.transform.preSiteNoteTypes import (
CompanyInfo, SurverySummaryInfo, AssessorInfo,
PropertyDescription, PropertyDetail, Dimension,
Walls, Roofs, Floors, Door, VentilationAndCooling,

View file

@ -2,7 +2,7 @@
import os
from etl.surveyedData.surveryedData import surveyedDataProcessor
from etl.db.db import get_db_session, init_db
from etl.transform.types import AssessorInfo
from etl.transform.preSiteNoteTypes import AssessorInfo
pre_site_note_path = os.path.join(os.getcwd(), "..", "example_data", "pre_site_note.pdf")

View file

@ -2,7 +2,7 @@
import os
from etl.surveyedData.surveryedData import surveyedDataProcessor
from etl.db.db import get_db_session, init_db
from etl.transform.types import AssessorInfo
from etl.transform.preSiteNoteTypes import AssessorInfo
import pytest
from etl.jjc_old_lewis_manual_way_ import work_out_total_floor_area

View file

@ -2,7 +2,7 @@
import os
from etl.surveyedData.surveryedData import surveyedDataProcessor
from etl.db.db import get_db_session, init_db
from etl.transform.types import AssessorInfo
from etl.transform.preSiteNoteTypes import AssessorInfo
import pytest
from etl.jjc_old_lewis_manual_way_ import work_out_total_floor_area