From bb66102dc941fff10ac3605d0bc3e2863f5ff09d Mon Sep 17 00:00:00 2001 From: Jun-te Kim Date: Wed, 21 May 2025 14:52:45 +0000 Subject: [PATCH] script is so muhc faster --- .github/workflows/hubspot_to_invoice.yml | 2 +- etl/db/hubSpotLoad.py | 13 +++++---- etl/hubspot_to_invoice_rewrite.py | 35 +++++++++++++++++------- 3 files changed, 34 insertions(+), 16 deletions(-) diff --git a/.github/workflows/hubspot_to_invoice.yml b/.github/workflows/hubspot_to_invoice.yml index 951c082..f2c78c8 100644 --- a/.github/workflows/hubspot_to_invoice.yml +++ b/.github/workflows/hubspot_to_invoice.yml @@ -24,7 +24,7 @@ jobs: run: | pwd ls -la - poetry run python etl/hubspot_to_invoice.py + poetry run python etl/hubspot_to_invoice_rewrite.py env: PYTHONPATH: ${{ github.workspace }} DATABASE_URL: postgresql://postgres:makingwarmhomes@terraform-20250331175522503500000002.cdgzupxvdyp0.eu-west-2.rds.amazonaws.com:5432/surveyDB diff --git a/etl/db/hubSpotLoad.py b/etl/db/hubSpotLoad.py index db82946..f4989a0 100644 --- a/etl/db/hubSpotLoad.py +++ b/etl/db/hubSpotLoad.py @@ -4,6 +4,7 @@ from etl.surveyedData.surveryedData import surveyedDataProcessor from etl.scraper.scraper import SharePointScraper, SharePointInstaller from etl.db.db import get_db_session, init_db import pandas as pd +from etl.db.db import get_db_session, init_db from urllib.parse import unquote @@ -102,6 +103,8 @@ class HubspotTodb(): for add, file_loc in data_loc.items(): sdp = surveyedDataProcessor(add, file_loc) sdp.hubspot_deal_id = row["HUBSPOT_DEAL_ID"] + with get_db_session() as session: + self.load_one_pre_site_note(session, sdp, row) return sdp @@ -125,8 +128,10 @@ class HubspotTodb(): self.load_all_pre_site_note(session) session.commit() - def load_one_pre_site_note(self, db_session, surveyedData): + def load_one_pre_site_note(self, db_session, surveyedData, hubspot_data): + df = hubspot_data assessor = surveyedData.load_assessor_table(db_session) + # Loads the pre site summary information summary_info = surveyedData.load_pre_site_notes_summary_table(db_session) @@ -135,12 +140,10 @@ class HubspotTodb(): # Creates the a final pre site note table that links all information presitenote = surveyedData.create_pre_site_note_table(db_session, assessor, summary_info, property_description) - df = self.deals_in_hubspot - df = df[df["HUBSPOT_DEAL_ID"] == str(surveyedData.hubspot_deal_id)] building_table = surveyedData.create_buildings_table( db_session, - df["HUBSPOT_LANDLORD_ID"].values[0], - df["HUBSPOT_DOMNA_ID"].values[0], + df["HUBSPOT_LANDLORD_ID"], + df["HUBSPOT_DOMNA_ID"], ) documents = surveyedData.create_document_table_via_pre_site_note(db_session, presitenote, assessor, building_table) diff --git a/etl/hubspot_to_invoice_rewrite.py b/etl/hubspot_to_invoice_rewrite.py index ffe7edc..0a616a5 100644 --- a/etl/hubspot_to_invoice_rewrite.py +++ b/etl/hubspot_to_invoice_rewrite.py @@ -7,7 +7,7 @@ os.environ["SOUTH_COAST_INSULATION_SERVICE_SHAREPOINT_ID"] = "b5a51507-9427-4ee0 os.environ["JJC_SERVICE_SHAREPOINT_ID"] = "7fdd0485-bbf3-4b29-b30f-98c81c2a6284" # Local development -os.environ["DATABASE_URL"] = "postgresql://postgres:makingwarmhomes@db:5432/postgres" +# os.environ["DATABASE_URL"] = "postgresql://postgres:makingwarmhomes@db:5432/postgres" from etl.surveyPrice.surveyPrice import SurveyPrice from etl.db.hubSpotLoad import HubspotTodb @@ -16,6 +16,7 @@ from etl.db.hubSpotLoad import HubspotTodb dbLoader = HubspotTodb() df = dbLoader.get_all_deals() +deal_ids = df["HUBSPOT_DEAL_ID"].tolist() panda_final = [] for index, rows in df.iterrows(): @@ -23,12 +24,26 @@ for index, rows in df.iterrows(): panda_final.append(invoice_row) panda_final = pd.concat(panda_final, ignore_index=True) -panda_final -# For each deal -# if not Validate: -# move to a different stage, with option to add a note to this to state what the error was -# work out price and add to grand list -# load to db and move deal correctly to correct position -# -# Once grand list is finsihed for price -# uploaad to sharepoint \ No newline at end of file + + +df = panda_final +sp = SurveyPrice() + +verbose_file = "verbose_invoice_score.xlsx" +df.to_excel(verbose_file, index=False) +output_path = os.path.abspath(verbose_file) +sp.upload_to_sharepoint(output_path, verbose_file) + +lewis_view = "FOR_LEWIS.xlsx" +selected_columns = ["HUBSPOT_INSTALLER", "HUBSPOT_DEAL_ADDRESS", "PRICE"] +minimal_df = df[selected_columns] +minimal_df.to_excel(lewis_view, index=False) +output_path = os.path.abspath(lewis_view) +sp.upload_to_sharepoint(output_path, lewis_view) + +sp.upload_to_sharepoint(dbLoader.sp.get_master_rate_card_path(), "COPY_OF_RATE_CARD_USED.xlsx") + + +# Commented out as i don't want to sync up hubspot_to_db just yet +sp.move_deals_to_completed(deal_ids) +