mirror of
https://github.com/Hestia-Homes/survey-extraction.git
synced 2026-06-08 11:17:29 +00:00
updated script to do live tracking and folder generation better
This commit is contained in:
parent
7eef0aca44
commit
1031cf63b1
6 changed files with 130 additions and 64 deletions
2
.github/workflows/hubspot_sync.yml
vendored
2
.github/workflows/hubspot_sync.yml
vendored
|
|
@ -3,7 +3,7 @@ name: Hubspot Sync
|
|||
on:
|
||||
schedule:
|
||||
# Every 15 minutes, 07:00–18:59, Monday–Friday (UTC)
|
||||
- cron: '*/15 7-18 * * 1-5'
|
||||
- cron: '0 7-18/2 * * 1-5'
|
||||
|
||||
# Once on Saturday at 09:00 UTC
|
||||
- cron: '0 9 * * 6'
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ hubspot = HubSpotClient()
|
|||
|
||||
companies = [
|
||||
Companies.THE_GUINESS_PARTNERSHIP,
|
||||
Companies.SOUTHERN_HOUSING_GROUP,
|
||||
]
|
||||
# All deals from a pipeline_id via filter
|
||||
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ PIPELINE_ID = Pipeline.OPERATIONS_SOCIAL_HOUSING.value
|
|||
|
||||
valuable_companies = [
|
||||
Companies.THE_GUINESS_PARTNERSHIP.value,
|
||||
Companies.SOUTHERN_HOUSING_GROUP.value,
|
||||
]
|
||||
|
||||
deals_to_add = []
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ db = HubspotTodb()
|
|||
|
||||
companies = [
|
||||
Companies.THE_GUINESS_PARTNERSHIP,
|
||||
Companies.SOUTHERN_HOUSING_GROUP,
|
||||
]
|
||||
|
||||
# Global trackers
|
||||
|
|
|
|||
|
|
@ -1,71 +1,126 @@
|
|||
import os
|
||||
# osmsis keys
|
||||
os.environ["SHAREPOINT_CLIENT_ID"] = "6832a4c5-fb8c-4082-a746-4f51e1020f0d"
|
||||
os.environ["SHAREPOINT_CLIENT_SECRET"] = "xpC8Q~Frww48SM1V-D8lGy5iOY7P_cJ7FF3jgarQ"
|
||||
os.environ["SHAREPOINT_TENANT_ID"] = "10d5af8b-2cfd-4882-9ccd-b96e4812dacf"
|
||||
os.environ["SHAREPOINT_CLIENT_ID"] = "0e28c4f9-3e77-4571-8d63-df1857f4266a"
|
||||
os.environ["SHAREPOINT_CLIENT_SECRET"] = "2s48Q~t8.pI-~rbtQaSCFcYY97Z3LiMYhuo0GaOb"
|
||||
os.environ["SHAREPOINT_TENANT_ID"] = "6f080c63-8a66-4bbc-9d72-b85d5df30555"
|
||||
|
||||
from etl.scraper.scraper import SharePointInstaller
|
||||
from etl.scraper.scraper import SharePointScraper
|
||||
import time
|
||||
import pandas as pd
|
||||
from tqdm import tqdm
|
||||
import time
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from functools import wraps
|
||||
from typing import Callable
|
||||
|
||||
from etl.scraper.scraper import SharePointInstaller, SharePointScraper
|
||||
|
||||
osmosis = SharePointScraper(SharePointInstaller.PRIVATE_PAY)
|
||||
|
||||
parent_folder = "/Projects/Southern Housing/SH-SURV-26-001/Assessments"
|
||||
|
||||
excel_path = "/workspaces/survey-extractor/example_data/SH-SURV-26-001-monday.com.xlsx"
|
||||
asset_list = pd.read_excel(excel_path, sheet_name="SH-SURV-26-001-monday.com")
|
||||
|
||||
# --------------------------------------------------
|
||||
# Retry Decorator (3 attempts + exponential backoff)
|
||||
# --------------------------------------------------
|
||||
|
||||
def retry(max_attempts: int = 3, base_delay: float = 1.0):
|
||||
def decorator(func: Callable):
|
||||
@wraps(func)
|
||||
def wrapper(*args, **kwargs):
|
||||
attempt = 0
|
||||
while attempt < max_attempts:
|
||||
try:
|
||||
return func(*args, **kwargs)
|
||||
except Exception as e:
|
||||
attempt += 1
|
||||
if attempt >= max_attempts:
|
||||
print(f"❌ Failed after {max_attempts} attempts: {e}")
|
||||
raise
|
||||
sleep_time = base_delay * (2 ** (attempt - 1))
|
||||
print(f"⚠ Retry {attempt}/{max_attempts} after error: {e}")
|
||||
time.sleep(sleep_time)
|
||||
return wrapper
|
||||
return decorator
|
||||
|
||||
|
||||
osmosis = SharePointScraper(SharePointInstaller.OSMOSIS_WAVE_3)
|
||||
# --------------------------------------------------
|
||||
# Folder Creation Logic (Wrapped with retry)
|
||||
# --------------------------------------------------
|
||||
|
||||
@retry(max_attempts=3)
|
||||
def process_asset(address: pd.Series):
|
||||
folder_name = f"{address['Name']} {address['Postcode']}"
|
||||
print(f"\n📁 Processing {folder_name}")
|
||||
|
||||
web_url = osmosis.create_dir(folder_name, parent_folder)
|
||||
time.sleep(0.5) # small throttle
|
||||
|
||||
base_path = f"{parent_folder}/{folder_name}"
|
||||
|
||||
# -----------------------
|
||||
# Folder Structure
|
||||
# -----------------------
|
||||
|
||||
osmosis.create_dir("1. Retrofit Assessment", base_path)
|
||||
osmosis.create_dir("A. Assessment", f"{base_path}/1. Retrofit Assessment")
|
||||
osmosis.create_dir("B. Air Tightness Tests", f"{base_path}/1. Retrofit Assessment")
|
||||
|
||||
osmosis.create_dir("2. RC Mid-Term Plan", base_path)
|
||||
osmosis.create_dir("SAP", f"{base_path}/2. RC Mid-Term Plan")
|
||||
|
||||
osmosis.create_dir("3. Retrofit Design", base_path)
|
||||
|
||||
osmosis.create_dir("4. Post EPC", base_path)
|
||||
osmosis.create_dir(
|
||||
f"{address['Name']} - POST EPC Photos",
|
||||
f"{base_path}/4. Post EPC"
|
||||
)
|
||||
|
||||
osmosis.create_dir("5. Trustmark Lodgement", base_path)
|
||||
trust_path = f"{base_path}/5. Trustmark Lodgement"
|
||||
osmosis.create_dir("1. Works", trust_path)
|
||||
osmosis.create_dir("2. Required Documents", trust_path)
|
||||
osmosis.create_dir("3. Additional Documents", trust_path)
|
||||
|
||||
return {
|
||||
"Name": address["Name"],
|
||||
"Postcode": address["Postcode"],
|
||||
"Sharepoint": web_url,
|
||||
}
|
||||
|
||||
|
||||
parent_folder = "/Osmosis-ACD Projects/NCHA WHSHF Wave 3/Property Folders"
|
||||
# --------------------------------------------------
|
||||
# Parallel Execution
|
||||
# --------------------------------------------------
|
||||
|
||||
asset_list = pd.read_excel("osmosis_data/asset_list.xlsx", sheet_name="Sheet 1")
|
||||
results = []
|
||||
failed_rows = []
|
||||
|
||||
MAX_WORKERS = 5
|
||||
|
||||
new_asset_list = []
|
||||
# Create asset list and location
|
||||
for index, address in tqdm(asset_list.iterrows()):
|
||||
if index > 39:
|
||||
folder_name = address['Name'] + " " + address['Postcode']
|
||||
webUrl = osmosis.create_dir(folder_name, parent_folder)
|
||||
time.sleep(1)
|
||||
print(f"building folders insidea {folder_name}")
|
||||
with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
|
||||
futures = [
|
||||
executor.submit(process_asset, row)
|
||||
for _, row in asset_list.iterrows()
|
||||
]
|
||||
|
||||
print("building retrofit assessment")
|
||||
first_folder = "1. Retrofit Assessment"
|
||||
osmosis.create_dir(first_folder, parent_folder + f"/{folder_name}")
|
||||
osmosis.create_dir("A. Assessment", parent_folder + f"/{folder_name}/{first_folder}")
|
||||
osmosis.create_dir("B. Air Tightness Tests", parent_folder + f"/{folder_name}/{first_folder}")
|
||||
# ✅ Progress bar tied to completed futures
|
||||
for future in tqdm(as_completed(futures), total=len(futures), desc="Processing Assets"):
|
||||
try:
|
||||
result = future.result()
|
||||
results.append(result)
|
||||
except Exception as e:
|
||||
failed_rows.append(e)
|
||||
|
||||
print("building RC MID Term plan")
|
||||
second_folder = "2. RC Mid-Term Plan"
|
||||
osmosis.create_dir(second_folder, parent_folder + f"/{folder_name}")
|
||||
osmosis.create_dir("SAP", parent_folder + f"/{folder_name}/{second_folder}")
|
||||
|
||||
print("building Retrofit Design")
|
||||
third_folder = "3. Retrofit Design"
|
||||
osmosis.create_dir(third_folder, parent_folder + f"/{folder_name}")
|
||||
|
||||
print("building post epc")
|
||||
fourth_folder = "4. Post EPC"
|
||||
osmosis.create_dir(fourth_folder, parent_folder + f"/{folder_name}")
|
||||
osmosis.create_dir(f"{address['Name']} - POST EPC Photos", parent_folder + f"/{folder_name}/{fourth_folder}")
|
||||
|
||||
print("Building Trust mark Lodgement")
|
||||
fifth_folder = "5. Trustmark Lodgement"
|
||||
osmosis.create_dir(fifth_folder, parent_folder + f"/{folder_name}")
|
||||
osmosis.create_dir("1. Works", parent_folder + f"/{folder_name}/{fifth_folder}")
|
||||
|
||||
osmosis.create_dir("2. Required Documents", parent_folder + f"/{folder_name}/{fifth_folder}")
|
||||
osmosis.create_dir("3. Additional Documents", parent_folder + f"/{folder_name}/{fifth_folder}")
|
||||
|
||||
asset_data = {
|
||||
"Name": address['Name'],
|
||||
"Postcode": address['Postcode'],
|
||||
"Sharepoint": webUrl,
|
||||
}
|
||||
print(asset_data)
|
||||
|
||||
new_asset_list.append(asset_data)
|
||||
print("\n✅ Finished")
|
||||
|
||||
print(f"✔ Successful: {len(results)}")
|
||||
print(f"❌ Failed: {len(failed_rows)}")
|
||||
|
||||
if failed_rows:
|
||||
pd.DataFrame(failed_rows).to_excel("failed_rows.xlsx", index=False)
|
||||
print("📄 Saved failed rows to failed_rows.xlsx")
|
||||
# # Run this is you just want to get url
|
||||
# def just_url(asset_list):
|
||||
# new_asset_list = []
|
||||
|
|
@ -82,9 +137,10 @@ for index, address in tqdm(asset_list.iterrows()):
|
|||
# new_asset_list.append(asset_data)
|
||||
# return new_asset_list
|
||||
|
||||
# new_asset_list = just_url(asset_list=asset_list)
|
||||
df = pd.DataFrame(new_asset_list)
|
||||
df.to_csv("output.csv", index=False)
|
||||
# # new_asset_list = just_url(asset_list=asset_list)
|
||||
|
||||
# df = pd.DataFrame(new_asset_list)
|
||||
# df.to_csv("output.csv", index=False)
|
||||
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -30,6 +30,7 @@ class SharePointInstaller(Enum):
|
|||
# WARMFRONT = os.getenv("WARMFRONT_SHARPOINT_ID", "bea71c30-d366-454c-a484-ae4d6fd95bc4")
|
||||
# NEW_JJC = os.getenv("NEW JJC", "10d96eba-b4f9-4e30-804f-05a8b60507b0")
|
||||
OSMOSIS_ACD = os.getenv("OSMOSIS_ACD_SHAREPOINT_ID", "931c4361-681b-44e4-86f6-1a54aba3ae8a")
|
||||
PRIVATE_PAY = os.getenv("PRIVATE_PAY_SHAREPOINT_ID", "16812ae4-5898-4fec-a6f6-382d1435586f")
|
||||
|
||||
class SharePointScraper():
|
||||
"""
|
||||
|
|
@ -122,7 +123,6 @@ class SharePointScraper():
|
|||
return False
|
||||
|
||||
def create_dir(self, file_name, at_path="/"):
|
||||
|
||||
sharepoint_client = SharePointClient(
|
||||
tenant_id=self.sharepoint_tenant_id,
|
||||
client_id=self.sharepoint_client_id,
|
||||
|
|
@ -130,13 +130,20 @@ class SharePointScraper():
|
|||
site_id=self.sharepoint_drive.value,
|
||||
)
|
||||
|
||||
if self.does_folder_exists_at(file_name, at_path) is False:
|
||||
return sharepoint_client.create_folder(file_name, at_path)['webUrl']
|
||||
else:
|
||||
for folders in self.get_folders_in_path(at_path)['value']:
|
||||
if file_name.upper() in folders["name"].upper():
|
||||
return folders["webUrl"]
|
||||
|
||||
folders = self.get_folders_in_path(at_path)
|
||||
|
||||
# Check if folder already exists (case-insensitive match)
|
||||
if "value" in folders:
|
||||
for folder in folders["value"]:
|
||||
if "name" in folder and folder["name"].lower() == file_name.lower():
|
||||
self.logger.info(f"Folder already exists: {file_name} at {at_path}")
|
||||
return folder["webUrl"] # ✅ return existing folder
|
||||
|
||||
# Folder does NOT exist → create it
|
||||
self.logger.info(f"Creating folder: {file_name} at {at_path}")
|
||||
created = sharepoint_client.create_folder(file_name, at_path)
|
||||
|
||||
return created["webUrl"]
|
||||
def upload_file(self, file_path, sharepoint_path, file_name):
|
||||
sharepoint_client = SharePointClient(
|
||||
tenant_id=self.sharepoint_tenant_id,
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue