diff --git a/devcontainer.sh b/devcontainer.sh new file mode 100644 index 0000000..5c536cb --- /dev/null +++ b/devcontainer.sh @@ -0,0 +1,102 @@ +#!/usr/bin/env bash +# +# devcontainer.sh — devcontainer helper for this repo +# +# Usage: +# ./devcontainer.sh +# +# Commands: +# up build + start the devcontainer (idempotent) +# shell attach a bash shell; auto-ups if not running +# down stop the devcontainer +# rebuild remove + rebuild from scratch, no cache +# +# Examples: +# ./devcontainer.sh shell # one-shot: up if needed, then bash +# ./devcontainer.sh rebuild + +set -euo pipefail + +SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)" +REPO_ROOT="${SCRIPT_DIR}" +CONFIG_PATH="${REPO_ROOT}/.devcontainer/devcontainer.json" + +VALID_COMMANDS=(up shell down rebuild) + +# --- helpers --------------------------------------------------------------- + +usage() { + sed -n '3,15p' "${BASH_SOURCE[0]}" | sed 's/^# \{0,1\}//' + exit "${1:-0}" +} + +die() { + echo "error: $*" >&2 + exit 1 +} + +in_list() { + local needle="$1" + shift + local item + for item in "$@"; do + [[ "${item}" == "${needle}" ]] && return 0 + done + return 1 +} + +container_id() { + # Find the running container for this repo via devcontainer labels. + docker ps -q \ + --filter "label=devcontainer.local_folder=${REPO_ROOT}" \ + --filter "label=devcontainer.config_file=${CONFIG_PATH}" +} + +# --- argument parsing ------------------------------------------------------ + +[[ $# -eq 1 ]] || usage 1 + +COMMAND="$1" + +in_list "${COMMAND}" "${VALID_COMMANDS[@]}" \ + || die "invalid command '${COMMAND}' (expected: ${VALID_COMMANDS[*]})" + +[[ -f "${CONFIG_PATH}" ]] || die "config not found: ${CONFIG_PATH}" + +DC_ARGS=(--workspace-folder "${REPO_ROOT}") + +# --- dispatch -------------------------------------------------------------- + +case "${COMMAND}" in + up) + echo ">> bringing up devcontainer" + devcontainer up "${DC_ARGS[@]}" + ;; + + shell) + # Auto-up if not already running. `devcontainer up` is idempotent — + # it reuses an existing container, so this is cheap on warm starts. + if [[ -z "$(container_id)" ]]; then + echo ">> devcontainer not running, bringing it up first" + devcontainer up "${DC_ARGS[@]}" + fi + echo ">> attaching shell" + devcontainer exec "${DC_ARGS[@]}" bash 2>/dev/null \ + || devcontainer exec "${DC_ARGS[@]}" sh + ;; + + down) + cid="$(container_id)" + if [[ -z "${cid}" ]]; then + echo ">> devcontainer not running, nothing to stop" + exit 0 + fi + echo ">> stopping devcontainer" + docker stop "${cid}" + ;; + + rebuild) + echo ">> rebuilding devcontainer from scratch" + devcontainer up "${DC_ARGS[@]}" --remove-existing-container --build-no-cache + ;; +esac diff --git a/etl/osmosis_monday_to_sharepoint_automation.py b/etl/osmosis_monday_to_sharepoint_automation.py index c9b7325..a485646 100644 --- a/etl/osmosis_monday_to_sharepoint_automation.py +++ b/etl/osmosis_monday_to_sharepoint_automation.py @@ -13,13 +13,13 @@ from typing import Callable from etl.scraper.scraper import SharePointInstaller, SharePointScraper -osmosis = SharePointScraper(SharePointInstaller.OSMOSIS_ACD) +osmosis = SharePointScraper(SharePointInstaller.SOCIAL_HOUSING_WAVE_3) +osmosis - -parent_folder = "/Projects/Southern Housing/SH-SURV-26-001/Assessments" - -excel_path = "/workspaces/survey-extractor/example_data/SH-SURV-26-001-monday.com.xlsx" -asset_list = pd.read_excel(excel_path, sheet_name="SH-SURV-26-001-monday.com") +parent_folder = "/Osmosis-ACD Projects/Sero-Clarion Housing/Sero Project Documents/Property Folders" +osmosis.get_folders_in_path(parent_folder) +excel_path = "/workspaces/survey-extractor/example_data/Solar Programme Phase 1 DRAFT.xlsx" +asset_list = pd.read_excel(excel_path, sheet_name="Sheet1") # -------------------------------------------------- # Retry Decorator (3 attempts + exponential backoff) @@ -51,7 +51,7 @@ def retry(max_attempts: int = 3, base_delay: float = 1.0): @retry(max_attempts=5) def process_asset(address: pd.Series): - folder_name = f"{address['Name']} {address['Postcode']}" + folder_name = f"{address['Real Full Address']}" print(f"\nšŸ“ Processing {folder_name}") web_url = osmosis.create_dir(folder_name, parent_folder) @@ -74,7 +74,7 @@ def process_asset(address: pd.Series): osmosis.create_dir("4. Post EPC", base_path) osmosis.create_dir( - f"{address['Name']} - POST EPC Photos", + f"{folder_name} - POST EPC Photos", f"{base_path}/4. Post EPC" ) @@ -85,8 +85,7 @@ def process_asset(address: pd.Series): osmosis.create_dir("3. Additional Documents", trust_path) return { - "Name": address["Name"], - "Postcode": address["Postcode"], + "Name": folder_name, "Sharepoint": web_url, } @@ -94,7 +93,7 @@ def process_asset(address: pd.Series): # -------------------------------------------------- # Parallel Execution # -------------------------------------------------- - +# asset_list = asset_list.head(1) results = [] failed_rows = [] diff --git a/etl/scraper/scraper.py b/etl/scraper/scraper.py index b599f45..68de810 100644 --- a/etl/scraper/scraper.py +++ b/etl/scraper/scraper.py @@ -31,6 +31,7 @@ class SharePointInstaller(Enum): # NEW_JJC = os.getenv("NEW JJC", "10d96eba-b4f9-4e30-804f-05a8b60507b0") OSMOSIS_ACD = os.getenv("OSMOSIS_ACD_SHAREPOINT_ID", "931c4361-681b-44e4-86f6-1a54aba3ae8a") PRIVATE_PAY = os.getenv("PRIVATE_PAY_SHAREPOINT_ID", "16812ae4-5898-4fec-a6f6-382d1435586f") + SOCIAL_HOUSING_WAVE_3 = "c60c58fe-94c5-4647-9b5f-8202f1309f0f" class SharePointScraper(): """ diff --git a/example_data/csr_special_case.pdf b/example_data/csr_special_case.pdf deleted file mode 100644 index d0a3f2a..0000000 Binary files a/example_data/csr_special_case.pdf and /dev/null differ diff --git a/example_data/pre_site_note.pdf b/example_data/pre_site_note.pdf deleted file mode 100644 index 8339e03..0000000 Binary files a/example_data/pre_site_note.pdf and /dev/null differ