mirror of
https://github.com/Hestia-Homes/survey-extraction.git
synced 2026-06-08 11:17:29 +00:00
added latest changes
This commit is contained in:
parent
50ea324ca5
commit
e538ade8bb
5 changed files with 113 additions and 11 deletions
102
devcontainer.sh
Normal file
102
devcontainer.sh
Normal file
|
|
@ -0,0 +1,102 @@
|
|||
#!/usr/bin/env bash
|
||||
#
|
||||
# devcontainer.sh — devcontainer helper for this repo
|
||||
#
|
||||
# Usage:
|
||||
# ./devcontainer.sh <command>
|
||||
#
|
||||
# Commands:
|
||||
# up build + start the devcontainer (idempotent)
|
||||
# shell attach a bash shell; auto-ups if not running
|
||||
# down stop the devcontainer
|
||||
# rebuild remove + rebuild from scratch, no cache
|
||||
#
|
||||
# Examples:
|
||||
# ./devcontainer.sh shell # one-shot: up if needed, then bash
|
||||
# ./devcontainer.sh rebuild
|
||||
|
||||
set -euo pipefail
|
||||
|
||||
SCRIPT_DIR="$(cd -- "$(dirname -- "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
|
||||
REPO_ROOT="${SCRIPT_DIR}"
|
||||
CONFIG_PATH="${REPO_ROOT}/.devcontainer/devcontainer.json"
|
||||
|
||||
VALID_COMMANDS=(up shell down rebuild)
|
||||
|
||||
# --- helpers ---------------------------------------------------------------
|
||||
|
||||
usage() {
|
||||
sed -n '3,15p' "${BASH_SOURCE[0]}" | sed 's/^# \{0,1\}//'
|
||||
exit "${1:-0}"
|
||||
}
|
||||
|
||||
die() {
|
||||
echo "error: $*" >&2
|
||||
exit 1
|
||||
}
|
||||
|
||||
in_list() {
|
||||
local needle="$1"
|
||||
shift
|
||||
local item
|
||||
for item in "$@"; do
|
||||
[[ "${item}" == "${needle}" ]] && return 0
|
||||
done
|
||||
return 1
|
||||
}
|
||||
|
||||
container_id() {
|
||||
# Find the running container for this repo via devcontainer labels.
|
||||
docker ps -q \
|
||||
--filter "label=devcontainer.local_folder=${REPO_ROOT}" \
|
||||
--filter "label=devcontainer.config_file=${CONFIG_PATH}"
|
||||
}
|
||||
|
||||
# --- argument parsing ------------------------------------------------------
|
||||
|
||||
[[ $# -eq 1 ]] || usage 1
|
||||
|
||||
COMMAND="$1"
|
||||
|
||||
in_list "${COMMAND}" "${VALID_COMMANDS[@]}" \
|
||||
|| die "invalid command '${COMMAND}' (expected: ${VALID_COMMANDS[*]})"
|
||||
|
||||
[[ -f "${CONFIG_PATH}" ]] || die "config not found: ${CONFIG_PATH}"
|
||||
|
||||
DC_ARGS=(--workspace-folder "${REPO_ROOT}")
|
||||
|
||||
# --- dispatch --------------------------------------------------------------
|
||||
|
||||
case "${COMMAND}" in
|
||||
up)
|
||||
echo ">> bringing up devcontainer"
|
||||
devcontainer up "${DC_ARGS[@]}"
|
||||
;;
|
||||
|
||||
shell)
|
||||
# Auto-up if not already running. `devcontainer up` is idempotent —
|
||||
# it reuses an existing container, so this is cheap on warm starts.
|
||||
if [[ -z "$(container_id)" ]]; then
|
||||
echo ">> devcontainer not running, bringing it up first"
|
||||
devcontainer up "${DC_ARGS[@]}"
|
||||
fi
|
||||
echo ">> attaching shell"
|
||||
devcontainer exec "${DC_ARGS[@]}" bash 2>/dev/null \
|
||||
|| devcontainer exec "${DC_ARGS[@]}" sh
|
||||
;;
|
||||
|
||||
down)
|
||||
cid="$(container_id)"
|
||||
if [[ -z "${cid}" ]]; then
|
||||
echo ">> devcontainer not running, nothing to stop"
|
||||
exit 0
|
||||
fi
|
||||
echo ">> stopping devcontainer"
|
||||
docker stop "${cid}"
|
||||
;;
|
||||
|
||||
rebuild)
|
||||
echo ">> rebuilding devcontainer from scratch"
|
||||
devcontainer up "${DC_ARGS[@]}" --remove-existing-container --build-no-cache
|
||||
;;
|
||||
esac
|
||||
|
|
@ -13,13 +13,13 @@ from typing import Callable
|
|||
|
||||
from etl.scraper.scraper import SharePointInstaller, SharePointScraper
|
||||
|
||||
osmosis = SharePointScraper(SharePointInstaller.OSMOSIS_ACD)
|
||||
osmosis = SharePointScraper(SharePointInstaller.SOCIAL_HOUSING_WAVE_3)
|
||||
osmosis
|
||||
|
||||
|
||||
parent_folder = "/Projects/Southern Housing/SH-SURV-26-001/Assessments"
|
||||
|
||||
excel_path = "/workspaces/survey-extractor/example_data/SH-SURV-26-001-monday.com.xlsx"
|
||||
asset_list = pd.read_excel(excel_path, sheet_name="SH-SURV-26-001-monday.com")
|
||||
parent_folder = "/Osmosis-ACD Projects/Sero-Clarion Housing/Sero Project Documents/Property Folders"
|
||||
osmosis.get_folders_in_path(parent_folder)
|
||||
excel_path = "/workspaces/survey-extractor/example_data/Solar Programme Phase 1 DRAFT.xlsx"
|
||||
asset_list = pd.read_excel(excel_path, sheet_name="Sheet1")
|
||||
|
||||
# --------------------------------------------------
|
||||
# Retry Decorator (3 attempts + exponential backoff)
|
||||
|
|
@ -51,7 +51,7 @@ def retry(max_attempts: int = 3, base_delay: float = 1.0):
|
|||
|
||||
@retry(max_attempts=5)
|
||||
def process_asset(address: pd.Series):
|
||||
folder_name = f"{address['Name']} {address['Postcode']}"
|
||||
folder_name = f"{address['Real Full Address']}"
|
||||
print(f"\n📁 Processing {folder_name}")
|
||||
|
||||
web_url = osmosis.create_dir(folder_name, parent_folder)
|
||||
|
|
@ -74,7 +74,7 @@ def process_asset(address: pd.Series):
|
|||
|
||||
osmosis.create_dir("4. Post EPC", base_path)
|
||||
osmosis.create_dir(
|
||||
f"{address['Name']} - POST EPC Photos",
|
||||
f"{folder_name} - POST EPC Photos",
|
||||
f"{base_path}/4. Post EPC"
|
||||
)
|
||||
|
||||
|
|
@ -85,8 +85,7 @@ def process_asset(address: pd.Series):
|
|||
osmosis.create_dir("3. Additional Documents", trust_path)
|
||||
|
||||
return {
|
||||
"Name": address["Name"],
|
||||
"Postcode": address["Postcode"],
|
||||
"Name": folder_name,
|
||||
"Sharepoint": web_url,
|
||||
}
|
||||
|
||||
|
|
@ -94,7 +93,7 @@ def process_asset(address: pd.Series):
|
|||
# --------------------------------------------------
|
||||
# Parallel Execution
|
||||
# --------------------------------------------------
|
||||
|
||||
# asset_list = asset_list.head(1)
|
||||
results = []
|
||||
failed_rows = []
|
||||
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ class SharePointInstaller(Enum):
|
|||
# NEW_JJC = os.getenv("NEW JJC", "10d96eba-b4f9-4e30-804f-05a8b60507b0")
|
||||
OSMOSIS_ACD = os.getenv("OSMOSIS_ACD_SHAREPOINT_ID", "931c4361-681b-44e4-86f6-1a54aba3ae8a")
|
||||
PRIVATE_PAY = os.getenv("PRIVATE_PAY_SHAREPOINT_ID", "16812ae4-5898-4fec-a6f6-382d1435586f")
|
||||
SOCIAL_HOUSING_WAVE_3 = "c60c58fe-94c5-4647-9b5f-8202f1309f0f"
|
||||
|
||||
class SharePointScraper():
|
||||
"""
|
||||
|
|
|
|||
Binary file not shown.
Binary file not shown.
Loading…
Add table
Reference in a new issue