Merge pull request #131 from Hestia-Homes/feature/live_tracker_documentation

updated script to do live tracking and folder generation better
This commit is contained in:
Jun-te Kim 2026-03-03 14:53:39 +00:00 committed by GitHub
commit 062cb76829
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 130 additions and 64 deletions

View file

@ -3,7 +3,7 @@ name: Hubspot Sync
on:
schedule:
# Every 15 minutes, 07:0018:59, MondayFriday (UTC)
- cron: '*/15 7-18 * * 1-5'
- cron: '0 7-18/2 * * 1-5'
# Once on Saturday at 09:00 UTC
- cron: '0 9 * * 6'

View file

@ -6,6 +6,7 @@ hubspot = HubSpotClient()
companies = [
Companies.THE_GUINESS_PARTNERSHIP,
Companies.SOUTHERN_HOUSING_GROUP,
]
# All deals from a pipeline_id via filter

View file

@ -9,6 +9,7 @@ PIPELINE_ID = Pipeline.OPERATIONS_SOCIAL_HOUSING.value
valuable_companies = [
Companies.THE_GUINESS_PARTNERSHIP.value,
Companies.SOUTHERN_HOUSING_GROUP.value,
]
deals_to_add = []

View file

@ -8,6 +8,7 @@ db = HubspotTodb()
companies = [
Companies.THE_GUINESS_PARTNERSHIP,
Companies.SOUTHERN_HOUSING_GROUP,
]
# Global trackers

View file

@ -1,71 +1,126 @@
import os
# osmsis keys
os.environ["SHAREPOINT_CLIENT_ID"] = "6832a4c5-fb8c-4082-a746-4f51e1020f0d"
os.environ["SHAREPOINT_CLIENT_SECRET"] = "xpC8Q~Frww48SM1V-D8lGy5iOY7P_cJ7FF3jgarQ"
os.environ["SHAREPOINT_TENANT_ID"] = "10d5af8b-2cfd-4882-9ccd-b96e4812dacf"
os.environ["SHAREPOINT_CLIENT_ID"] = "0e28c4f9-3e77-4571-8d63-df1857f4266a"
os.environ["SHAREPOINT_CLIENT_SECRET"] = "2s48Q~t8.pI-~rbtQaSCFcYY97Z3LiMYhuo0GaOb"
os.environ["SHAREPOINT_TENANT_ID"] = "6f080c63-8a66-4bbc-9d72-b85d5df30555"
from etl.scraper.scraper import SharePointInstaller
from etl.scraper.scraper import SharePointScraper
import time
import pandas as pd
from tqdm import tqdm
import time
from concurrent.futures import ThreadPoolExecutor, as_completed
from functools import wraps
from typing import Callable
from etl.scraper.scraper import SharePointInstaller, SharePointScraper
osmosis = SharePointScraper(SharePointInstaller.PRIVATE_PAY)
parent_folder = "/Projects/Southern Housing/SH-SURV-26-001/Assessments"
excel_path = "/workspaces/survey-extractor/example_data/SH-SURV-26-001-monday.com.xlsx"
asset_list = pd.read_excel(excel_path, sheet_name="SH-SURV-26-001-monday.com")
# --------------------------------------------------
# Retry Decorator (3 attempts + exponential backoff)
# --------------------------------------------------
def retry(max_attempts: int = 3, base_delay: float = 1.0):
def decorator(func: Callable):
@wraps(func)
def wrapper(*args, **kwargs):
attempt = 0
while attempt < max_attempts:
try:
return func(*args, **kwargs)
except Exception as e:
attempt += 1
if attempt >= max_attempts:
print(f"❌ Failed after {max_attempts} attempts: {e}")
raise
sleep_time = base_delay * (2 ** (attempt - 1))
print(f"⚠ Retry {attempt}/{max_attempts} after error: {e}")
time.sleep(sleep_time)
return wrapper
return decorator
osmosis = SharePointScraper(SharePointInstaller.OSMOSIS_WAVE_3)
# --------------------------------------------------
# Folder Creation Logic (Wrapped with retry)
# --------------------------------------------------
@retry(max_attempts=3)
def process_asset(address: pd.Series):
folder_name = f"{address['Name']} {address['Postcode']}"
print(f"\n📁 Processing {folder_name}")
web_url = osmosis.create_dir(folder_name, parent_folder)
time.sleep(0.5) # small throttle
base_path = f"{parent_folder}/{folder_name}"
# -----------------------
# Folder Structure
# -----------------------
osmosis.create_dir("1. Retrofit Assessment", base_path)
osmosis.create_dir("A. Assessment", f"{base_path}/1. Retrofit Assessment")
osmosis.create_dir("B. Air Tightness Tests", f"{base_path}/1. Retrofit Assessment")
osmosis.create_dir("2. RC Mid-Term Plan", base_path)
osmosis.create_dir("SAP", f"{base_path}/2. RC Mid-Term Plan")
osmosis.create_dir("3. Retrofit Design", base_path)
osmosis.create_dir("4. Post EPC", base_path)
osmosis.create_dir(
f"{address['Name']} - POST EPC Photos",
f"{base_path}/4. Post EPC"
)
osmosis.create_dir("5. Trustmark Lodgement", base_path)
trust_path = f"{base_path}/5. Trustmark Lodgement"
osmosis.create_dir("1. Works", trust_path)
osmosis.create_dir("2. Required Documents", trust_path)
osmosis.create_dir("3. Additional Documents", trust_path)
return {
"Name": address["Name"],
"Postcode": address["Postcode"],
"Sharepoint": web_url,
}
parent_folder = "/Osmosis-ACD Projects/NCHA WHSHF Wave 3/Property Folders"
# --------------------------------------------------
# Parallel Execution
# --------------------------------------------------
asset_list = pd.read_excel("osmosis_data/asset_list.xlsx", sheet_name="Sheet 1")
results = []
failed_rows = []
MAX_WORKERS = 5
new_asset_list = []
# Create asset list and location
for index, address in tqdm(asset_list.iterrows()):
if index > 39:
folder_name = address['Name'] + " " + address['Postcode']
webUrl = osmosis.create_dir(folder_name, parent_folder)
time.sleep(1)
print(f"building folders insidea {folder_name}")
with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
futures = [
executor.submit(process_asset, row)
for _, row in asset_list.iterrows()
]
print("building retrofit assessment")
first_folder = "1. Retrofit Assessment"
osmosis.create_dir(first_folder, parent_folder + f"/{folder_name}")
osmosis.create_dir("A. Assessment", parent_folder + f"/{folder_name}/{first_folder}")
osmosis.create_dir("B. Air Tightness Tests", parent_folder + f"/{folder_name}/{first_folder}")
# ✅ Progress bar tied to completed futures
for future in tqdm(as_completed(futures), total=len(futures), desc="Processing Assets"):
try:
result = future.result()
results.append(result)
except Exception as e:
failed_rows.append(e)
print("building RC MID Term plan")
second_folder = "2. RC Mid-Term Plan"
osmosis.create_dir(second_folder, parent_folder + f"/{folder_name}")
osmosis.create_dir("SAP", parent_folder + f"/{folder_name}/{second_folder}")
print("building Retrofit Design")
third_folder = "3. Retrofit Design"
osmosis.create_dir(third_folder, parent_folder + f"/{folder_name}")
print("building post epc")
fourth_folder = "4. Post EPC"
osmosis.create_dir(fourth_folder, parent_folder + f"/{folder_name}")
osmosis.create_dir(f"{address['Name']} - POST EPC Photos", parent_folder + f"/{folder_name}/{fourth_folder}")
print("Building Trust mark Lodgement")
fifth_folder = "5. Trustmark Lodgement"
osmosis.create_dir(fifth_folder, parent_folder + f"/{folder_name}")
osmosis.create_dir("1. Works", parent_folder + f"/{folder_name}/{fifth_folder}")
osmosis.create_dir("2. Required Documents", parent_folder + f"/{folder_name}/{fifth_folder}")
osmosis.create_dir("3. Additional Documents", parent_folder + f"/{folder_name}/{fifth_folder}")
asset_data = {
"Name": address['Name'],
"Postcode": address['Postcode'],
"Sharepoint": webUrl,
}
print(asset_data)
new_asset_list.append(asset_data)
print("\n✅ Finished")
print(f"✔ Successful: {len(results)}")
print(f"❌ Failed: {len(failed_rows)}")
if failed_rows:
pd.DataFrame(failed_rows).to_excel("failed_rows.xlsx", index=False)
print("📄 Saved failed rows to failed_rows.xlsx")
# # Run this is you just want to get url
# def just_url(asset_list):
# new_asset_list = []
@ -82,9 +137,10 @@ for index, address in tqdm(asset_list.iterrows()):
# new_asset_list.append(asset_data)
# return new_asset_list
# new_asset_list = just_url(asset_list=asset_list)
df = pd.DataFrame(new_asset_list)
df.to_csv("output.csv", index=False)
# # new_asset_list = just_url(asset_list=asset_list)
# df = pd.DataFrame(new_asset_list)
# df.to_csv("output.csv", index=False)

View file

@ -30,6 +30,7 @@ class SharePointInstaller(Enum):
# WARMFRONT = os.getenv("WARMFRONT_SHARPOINT_ID", "bea71c30-d366-454c-a484-ae4d6fd95bc4")
# NEW_JJC = os.getenv("NEW JJC", "10d96eba-b4f9-4e30-804f-05a8b60507b0")
OSMOSIS_ACD = os.getenv("OSMOSIS_ACD_SHAREPOINT_ID", "931c4361-681b-44e4-86f6-1a54aba3ae8a")
PRIVATE_PAY = os.getenv("PRIVATE_PAY_SHAREPOINT_ID", "16812ae4-5898-4fec-a6f6-382d1435586f")
class SharePointScraper():
"""
@ -122,7 +123,6 @@ class SharePointScraper():
return False
def create_dir(self, file_name, at_path="/"):
sharepoint_client = SharePointClient(
tenant_id=self.sharepoint_tenant_id,
client_id=self.sharepoint_client_id,
@ -130,13 +130,20 @@ class SharePointScraper():
site_id=self.sharepoint_drive.value,
)
if self.does_folder_exists_at(file_name, at_path) is False:
return sharepoint_client.create_folder(file_name, at_path)['webUrl']
else:
for folders in self.get_folders_in_path(at_path)['value']:
if file_name.upper() in folders["name"].upper():
return folders["webUrl"]
folders = self.get_folders_in_path(at_path)
# Check if folder already exists (case-insensitive match)
if "value" in folders:
for folder in folders["value"]:
if "name" in folder and folder["name"].lower() == file_name.lower():
self.logger.info(f"Folder already exists: {file_name} at {at_path}")
return folder["webUrl"] # ✅ return existing folder
# Folder does NOT exist → create it
self.logger.info(f"Creating folder: {file_name} at {at_path}")
created = sharepoint_client.create_folder(file_name, at_path)
return created["webUrl"]
def upload_file(self, file_path, sharepoint_path, file_name):
sharepoint_client = SharePointClient(
tenant_id=self.sharepoint_tenant_id,