added latest changes

This commit is contained in:
Jun-te Kim 2026-04-23 13:16:51 +00:00
parent 50ea324ca5
commit e538ade8bb
5 changed files with 113 additions and 11 deletions

102
devcontainer.sh Normal file
View file

@ -0,0 +1,102 @@
#!/usr/bin/env bash
#
# devcontainer.sh — devcontainer helper for this repo
#
# Usage:
# ./devcontainer.sh <command>
#
# Commands:
# up build + start the devcontainer (idempotent)
# shell attach a bash shell; auto-ups if not running
# down stop the devcontainer
# rebuild remove + rebuild from scratch, no cache
#
# Examples:
# ./devcontainer.sh shell # one-shot: up if needed, then bash
# ./devcontainer.sh rebuild
set -euo pipefail
SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
REPO_ROOT="${SCRIPT_DIR}"
CONFIG_PATH="${REPO_ROOT}/.devcontainer/devcontainer.json"
VALID_COMMANDS=(up shell down rebuild)
# --- helpers ---------------------------------------------------------------
usage() {
sed -n '3,15p' "${BASH_SOURCE[0]}" | sed 's/^# \{0,1\}//'
exit "${1:-0}"
}
die() {
echo "error: $*" >&2
exit 1
}
in_list() {
local needle="$1"
shift
local item
for item in "$@"; do
[[ "${item}" == "${needle}" ]] && return 0
done
return 1
}
container_id() {
# Find the running container for this repo via devcontainer labels.
docker ps -q \
--filter "label=devcontainer.local_folder=${REPO_ROOT}" \
--filter "label=devcontainer.config_file=${CONFIG_PATH}"
}
# --- argument parsing ------------------------------------------------------
[[ $# -eq 1 ]] || usage 1
COMMAND="$1"
in_list "${COMMAND}" "${VALID_COMMANDS[@]}" \
|| die "invalid command '${COMMAND}' (expected: ${VALID_COMMANDS[*]})"
[[ -f "${CONFIG_PATH}" ]] || die "config not found: ${CONFIG_PATH}"
DC_ARGS=(--workspace-folder "${REPO_ROOT}")
# --- dispatch --------------------------------------------------------------
case "${COMMAND}" in
up)
echo ">> bringing up devcontainer"
devcontainer up "${DC_ARGS[@]}"
;;
shell)
# Auto-up if not already running. `devcontainer up` is idempotent —
# it reuses an existing container, so this is cheap on warm starts.
if [[ -z "$(container_id)" ]]; then
echo ">> devcontainer not running, bringing it up first"
devcontainer up "${DC_ARGS[@]}"
fi
echo ">> attaching shell"
devcontainer exec "${DC_ARGS[@]}" bash 2>/dev/null \
|| devcontainer exec "${DC_ARGS[@]}" sh
;;
down)
cid="$(container_id)"
if [[ -z "${cid}" ]]; then
echo ">> devcontainer not running, nothing to stop"
exit 0
fi
echo ">> stopping devcontainer"
docker stop "${cid}"
;;
rebuild)
echo ">> rebuilding devcontainer from scratch"
devcontainer up "${DC_ARGS[@]}" --remove-existing-container --build-no-cache
;;
esac

View file

@ -13,13 +13,13 @@ from typing import Callable
from etl.scraper.scraper import SharePointInstaller, SharePointScraper
osmosis = SharePointScraper(SharePointInstaller.OSMOSIS_ACD)
osmosis = SharePointScraper(SharePointInstaller.SOCIAL_HOUSING_WAVE_3)
osmosis
parent_folder = "/Projects/Southern Housing/SH-SURV-26-001/Assessments"
excel_path = "/workspaces/survey-extractor/example_data/SH-SURV-26-001-monday.com.xlsx"
asset_list = pd.read_excel(excel_path, sheet_name="SH-SURV-26-001-monday.com")
parent_folder = "/Osmosis-ACD Projects/Sero-Clarion Housing/Sero Project Documents/Property Folders"
osmosis.get_folders_in_path(parent_folder)
excel_path = "/workspaces/survey-extractor/example_data/Solar Programme Phase 1 DRAFT.xlsx"
asset_list = pd.read_excel(excel_path, sheet_name="Sheet1")
# --------------------------------------------------
# Retry Decorator (3 attempts + exponential backoff)
@ -51,7 +51,7 @@ def retry(max_attempts: int = 3, base_delay: float = 1.0):
@retry(max_attempts=5)
def process_asset(address: pd.Series):
folder_name = f"{address['Name']} {address['Postcode']}"
folder_name = f"{address['Real Full Address']}"
print(f"\n📁 Processing {folder_name}")
web_url = osmosis.create_dir(folder_name, parent_folder)
@ -74,7 +74,7 @@ def process_asset(address: pd.Series):
osmosis.create_dir("4. Post EPC", base_path)
osmosis.create_dir(
f"{address['Name']} - POST EPC Photos",
f"{folder_name} - POST EPC Photos",
f"{base_path}/4. Post EPC"
)
@ -85,8 +85,7 @@ def process_asset(address: pd.Series):
osmosis.create_dir("3. Additional Documents", trust_path)
return {
"Name": address["Name"],
"Postcode": address["Postcode"],
"Name": folder_name,
"Sharepoint": web_url,
}
@ -94,7 +93,7 @@ def process_asset(address: pd.Series):
# --------------------------------------------------
# Parallel Execution
# --------------------------------------------------
# asset_list = asset_list.head(1)
results = []
failed_rows = []

View file

@ -31,6 +31,7 @@ class SharePointInstaller(Enum):
# NEW_JJC = os.getenv("NEW JJC", "10d96eba-b4f9-4e30-804f-05a8b60507b0")
OSMOSIS_ACD = os.getenv("OSMOSIS_ACD_SHAREPOINT_ID", "931c4361-681b-44e4-86f6-1a54aba3ae8a")
PRIVATE_PAY = os.getenv("PRIVATE_PAY_SHAREPOINT_ID", "16812ae4-5898-4fec-a6f6-382d1435586f")
SOCIAL_HOUSING_WAVE_3 = "c60c58fe-94c5-4647-9b5f-8202f1309f0f"
class SharePointScraper():
"""

Binary file not shown.

Binary file not shown.