From ae4ddf49ebb3e7fcc5f30640158f8a3c6a2d8262 Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Mon, 14 Apr 2025 15:10:39 +0000 Subject: [PATCH] save current work --- etl/hubSpotClient/hubspot.py | 4 ++- etl/hubspot_to_invoice.py | 3 +- etl/scraper/scraper.py | 7 +++-- etl/surveyPrice/surveyPrice.py | 46 +++++++++++++++++++++++++++- etl/surveyedData/surveryedData.py | 51 +++++++++++++++++++++++++++++++ 5 files changed, 105 insertions(+), 6 deletions(-) diff --git a/etl/hubSpotClient/hubspot.py b/etl/hubSpotClient/hubspot.py index 353ecd2..a4a6de5 100644 --- a/etl/hubSpotClient/hubspot.py +++ b/etl/hubSpotClient/hubspot.py @@ -67,4 +67,6 @@ class HubSpotClient(): print(f"Pipeline: {pipeline.label}") for stage in pipeline.stages: print(f" - Label: {stage.label}") - print(f" ID: {stage.id}") # + print(f" ID: {stage.id}") + + diff --git a/etl/hubspot_to_invoice.py b/etl/hubspot_to_invoice.py index 4b6fc4c..9bee806 100644 --- a/etl/hubspot_to_invoice.py +++ b/etl/hubspot_to_invoice.py @@ -12,5 +12,6 @@ from etl.surveyPrice.surveyPrice import SurveyPrice sp = SurveyPrice() sp.get_cavity_pricing_table("JJC - EMPTIES") + df = sp.get_all_surveys_from_hubspot() -df \ No newline at end of file +df = sp.get_all_surveyed_data_from_sharepoint() \ No newline at end of file diff --git a/etl/scraper/scraper.py b/etl/scraper/scraper.py index 21a76f5..f3ac352 100644 --- a/etl/scraper/scraper.py +++ b/etl/scraper/scraper.py @@ -14,8 +14,8 @@ from datetime import datetime, timedelta def previous_monday(): today = datetime.today() last_monday = today - timedelta(days=today.weekday() + 7) # Go back to last week's Monday - # return f"W.C. 31.03.2025" - return f"W.C. {last_monday.strftime('%d.%m.%Y')}" + return f"W.C. 31.03.2025" + # return f"W.C. {last_monday.strftime('%d.%m.%Y')}" WEEK_COMMENCING = os.getenv("WEEK_COMMENCING", previous_monday()) @@ -309,4 +309,5 @@ class SharePointScraper(): temp_file.write(content.getvalue()) self.logger.info(f"Temporary file created at: {path}") - return path \ No newline at end of file + return path + \ No newline at end of file diff --git a/etl/surveyPrice/surveyPrice.py b/etl/surveyPrice/surveyPrice.py index 10fb18f..7e437f0 100644 --- a/etl/surveyPrice/surveyPrice.py +++ b/etl/surveyPrice/surveyPrice.py @@ -1,5 +1,6 @@ from etl.scraper.scraper import SharePointScraper, SharePointInstaller from etl.hubSpotClient.hubspot import HubSpotClient, DealStage +from etl.surveyedData.surveryedData import surveyedDataProcessor import pandas as pd @@ -100,7 +101,50 @@ class SurveyPrice(): self.all_hubspot_submissions = pd.DataFrame(all_deals) return self.all_hubspot_submissions - + + def get_all_surveyed_data_from_sharepoint(self): + self.sharepoint_data_for_jjc() + + def sharepoint_data_for_jjc(self): + jjc_sp = SharePointScraper(SharePointInstaller.JJC, development=True) + file_paths = jjc_sp.download_file_for_each_address() + jjc_surveys = [] + + for eachAddress in file_paths: + for address, files in eachAddress.items(): + jjc_surveys.append(surveyedDataProcessor(address, files)) + + all_survey_info = [] + for surveyInfo in jjc_surveys: + info = { + "SHAREPOINT INSTALLER": "J & J Crumps", + "SHAREPOINT PRE_SITE_NOTES FOUND": True if surveyInfo.pre_site_note else False, + "SHAREPOINT CSR FOUND": True if surveyInfo.csr else False, + "SHAREPOINT TOTAL_FLOOR_AREA": "NO PRE SITE NOTES FOUND", + "SHAREPOINT FLOOR_AREA_BANDING": "NO PRE SITE NOTES FOUND", + "SHAREPOINT PRE_INSTALL_SAP_SCORE": "NO PRE SITE NOTES FOUND", + "SHAREPOINT INSULATION MATERIAL": None, + } + + if surveyInfo.pre_site_note: + floor_banding, total_floor_area = surveyInfo.work_out_total_floor_area() + info.update({ + "SHAREPOINT TOTAL_FLOOR_AREA": total_floor_area, + "SHAREPOINT FLOOR_AREA_BANDING": floor_banding, + "SHAREPOINT PRE_INSTALL_SAP_SCORE": surveyInfo.get_current_sap_score(), + }) + + if surveyInfo.csr: + info.update({ + "SHAREPOINT INSULATION MATERIAL": surveyInfo.get_insulation_info(), + }) + + + return jjc_surveys + + + + diff --git a/etl/surveyedData/surveryedData.py b/etl/surveyedData/surveryedData.py index bc3054a..d83f186 100644 --- a/etl/surveyedData/surveryedData.py +++ b/etl/surveyedData/surveryedData.py @@ -1,5 +1,6 @@ from etl.pdfReader.pdfReaderToText import pdfReaderToText from etl.pdfReader.reportType import ReportType +import math class surveyedDataProcessor(): def __init__(self, address, files): @@ -17,5 +18,55 @@ class surveyedDataProcessor(): if pdf: if pdf.type == ReportType.QUIDOS_PRESITE_NOTE: self.pre_site_note = pdf.get_reader() + self.address = self.pre_site_note.survey_information.address elif pdf.type == ReportType.CHARTED_SURVEYOR_REPORT: self.csr = pdf.get_reader() + + def get_insulation_info(self): + if self.csr: + if self.csr.insulation_info: + insultation = self.csr.insulation_info.type.upper() + return insultation + return None + + + def work_out_total_floor_area(self): + + total = 0 + def add_all_floors(floor_list): + total = 0 + for floor in floor_list: + total += floor.floor_area_m2 + + return total + + main = True if self.pre_site_note.property_description.no_of_main_property > 0 else False + ext1 = True if self.pre_site_note.property_description.no_of_extension_1 > 0 else False + ext2 = True if self.pre_site_note.property_description.no_of_extension_2 > 0 else False + ext3 = True if self.pre_site_note.property_description.no_of_extension_3 > 0 else False + ext4 = True if self.pre_site_note.property_description.no_of_extension_4 > 0 else False + + total += add_all_floors(self.pre_site_note.property_description.main_property.dimensions) if main is True else 0 + total += add_all_floors(self.pre_site_note.property_description.ex1_property.dimensions) if ext1 is True else 0 + total += add_all_floors(self.pre_site_note.property_description.ex2_property.dimensions) if ext2 is True else 0 + total += add_all_floors(self.pre_site_note.property_description.ex3_property.dimensions) if ext3 is True else 0 + total += add_all_floors(self.pre_site_note.property_description.ex4_proprerty.dimensions) if ext4 is True else 0 + + + floor_area = math.ceil(total) if total%1 >=0.5 else math.floor(total) + if 0 <= floor_area <= 72: + return '0-72m', floor_area + elif 72 < floor_area <= 97: + return '73-97m', floor_area + elif 97 < floor_area <= 199: + return '98-199m', floor_area + elif 199 <= floor_area: + return 'over 200m', floor_area + else: + raise NotImplementedError(f"unknown floor area {floor_area} {self.pre_site_note.summary_information.address}") + + def get_current_sap_score(self): + return self.pre_site_note.survey_information.current_sap.split(" ")[1] + + +