mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
Merge 21f5cd40da into 133190f093
This commit is contained in:
commit
89c0b1df44
12 changed files with 358 additions and 12 deletions
|
|
@ -6,6 +6,7 @@ psycopg2-binary==2.9.10
|
|||
python-jose==3.3.0
|
||||
cryptography==43.0.3
|
||||
mangum==0.19.0
|
||||
playwright==1.58.0
|
||||
# AWS
|
||||
boto3==1.35.44
|
||||
# Data
|
||||
|
|
|
|||
|
|
@ -1,11 +0,0 @@
|
|||
version: "3.9"
|
||||
|
||||
services:
|
||||
categorisation-lambda:
|
||||
build:
|
||||
context: ../
|
||||
dockerfile: backend/categorisation/handler/Dockerfile
|
||||
ports:
|
||||
- "9000:8080"
|
||||
env_file:
|
||||
- ../.env
|
||||
13
backend/pashub_fetcher/core_files.py
Normal file
13
backend/pashub_fetcher/core_files.py
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
from enum import Enum
|
||||
|
||||
|
||||
class CoreFiles(Enum):
|
||||
PHOTOPACK = "Photopack"
|
||||
SITENOTE = "SiteNote"
|
||||
RDSAP_SITENOTE = "RdSAP_SiteNote"
|
||||
PAS2023_VENTILATION = "PAS 2023 Ventilation Assessment Report"
|
||||
PAS2023_CONDITION = "PAS 2023 Condition Report"
|
||||
PAS_SIGNIFICANCE = "PAS Significance"
|
||||
PAR_PHOTOPACK = "PAR Photo Pack"
|
||||
PAS2023_PROPERTY = "PAS 2023 Property Assessment Report"
|
||||
PAS2023_OCCUPANCY = "PAS 2023 Occupancy Assessment Report"
|
||||
137
backend/pashub_fetcher/cotality_client.py
Normal file
137
backend/pashub_fetcher/cotality_client.py
Normal file
|
|
@ -0,0 +1,137 @@
|
|||
from collections import defaultdict
|
||||
from typing import Dict, List, Optional
|
||||
from datetime import datetime
|
||||
|
||||
import requests
|
||||
|
||||
from backend.pashub_fetcher.core_files import CoreFiles
|
||||
from backend.pashub_fetcher.evidence_file_data import EvidenceFileData
|
||||
from backend.pashub_fetcher.evidence_metadata import EvidenceMetadata
|
||||
from utils.logger import setup_logger
|
||||
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
class UnauthorizedError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
class CotalityClient:
|
||||
def __init__(self, token: str):
|
||||
|
||||
self.token = token
|
||||
self.company_id = "cb5249e2-8f31-4ef4-aefd-08ddaccb1fa2"
|
||||
self.base = "https://pashub.net/api"
|
||||
|
||||
self.session = requests.Session()
|
||||
self.session.headers.update(
|
||||
{
|
||||
"Authorization": f"Bearer {self.token}",
|
||||
"Accept": "application/json",
|
||||
}
|
||||
)
|
||||
logger.info("Finished initialising CotalityClient")
|
||||
|
||||
def get_core_envidence_files_by_job_id(self, job_id: str) -> List[str]:
|
||||
logger.info(f"Getting Core Evidence Files for job ID {job_id}")
|
||||
evidence_list: List[EvidenceFileData] = self._get_evidence_list(job_id)
|
||||
logger.info(f"Found {len(evidence_list)} Evidence files to get")
|
||||
if not evidence_list:
|
||||
return []
|
||||
|
||||
saved_files: List[str] = []
|
||||
|
||||
core_files: Dict[CoreFiles, EvidenceFileData] = self._select_latest_core_files(
|
||||
evidence_list
|
||||
)
|
||||
|
||||
logger.info(f"Number of core files to download is {len(core_files)}")
|
||||
|
||||
for _, evidence in core_files.items():
|
||||
evidence_id = evidence.file_id
|
||||
if not evidence_id:
|
||||
continue
|
||||
|
||||
logger.info(f"Getting metadata for file {evidence.file_name}")
|
||||
metadata: EvidenceMetadata = self._get_evidence_metadata(
|
||||
job_id, evidence_id
|
||||
)
|
||||
|
||||
download_url: str = self._build_download_url(metadata, evidence.file_id)
|
||||
file_name = evidence.file_name
|
||||
|
||||
self._download_file(download_url, file_name)
|
||||
logger.info("Successfully downloaded file")
|
||||
saved_files.append(file_name)
|
||||
|
||||
return saved_files
|
||||
|
||||
def _get_core_file_type(self, file: EvidenceFileData) -> Optional[CoreFiles]:
|
||||
for core_file in CoreFiles:
|
||||
if file.file_name.startswith(core_file.value):
|
||||
return core_file
|
||||
return None
|
||||
|
||||
def _select_latest_core_files(
|
||||
self,
|
||||
files: List[EvidenceFileData],
|
||||
) -> Dict[CoreFiles, EvidenceFileData]:
|
||||
grouped: Dict[CoreFiles, List[EvidenceFileData]] = defaultdict(list)
|
||||
|
||||
for file in files:
|
||||
core_type = self._get_core_file_type(file)
|
||||
if not core_type:
|
||||
continue
|
||||
grouped[core_type].append(file)
|
||||
|
||||
latest_files: Dict[CoreFiles, EvidenceFileData] = {}
|
||||
|
||||
for core_type, group in grouped.items():
|
||||
latest = max(group, key=lambda f: datetime.fromisoformat(f.created_utc))
|
||||
latest_files[core_type] = latest
|
||||
|
||||
return latest_files
|
||||
|
||||
def _get_evidence_list(self, job_id: str) -> List[EvidenceFileData]:
|
||||
url = f"{self.base}/jobs/{job_id}/evidence"
|
||||
|
||||
r = self.session.get(url)
|
||||
if r.status_code == 401:
|
||||
raise UnauthorizedError("Token expired or invalid")
|
||||
|
||||
r.raise_for_status()
|
||||
|
||||
results = r.json().get("results", [])
|
||||
|
||||
return [EvidenceFileData.from_api(item) for item in results]
|
||||
|
||||
def _get_evidence_metadata(self, job_id: str, evidence_id: str) -> EvidenceMetadata:
|
||||
url = f"{self.base}/jobs/{job_id}/evidenceMetadata"
|
||||
params = {"evidenceIds": evidence_id}
|
||||
|
||||
r = self.session.get(url, params=params)
|
||||
if r.status_code == 401:
|
||||
raise UnauthorizedError()
|
||||
|
||||
r.raise_for_status()
|
||||
|
||||
return EvidenceMetadata.from_api(r.json())
|
||||
|
||||
def _build_download_url(self, metadata: EvidenceMetadata, file_id: str) -> str:
|
||||
container = metadata.container_name
|
||||
blob_uri = metadata.blob_uri
|
||||
|
||||
base, sas = blob_uri.split("?", 1)
|
||||
|
||||
return f"{base}{container}/{file_id}?{sas}"
|
||||
|
||||
def _download_file(self, url: str, file_name: str) -> None:
|
||||
r = requests.get(url)
|
||||
if r.status_code == 401:
|
||||
raise UnauthorizedError()
|
||||
|
||||
r.raise_for_status()
|
||||
|
||||
with open(file_name, "wb") as f:
|
||||
f.write(r.content)
|
||||
25
backend/pashub_fetcher/evidence_file_data.py
Normal file
25
backend/pashub_fetcher/evidence_file_data.py
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
from __future__ import annotations
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict, Optional
|
||||
|
||||
|
||||
@dataclass
|
||||
class EvidenceFileData:
|
||||
file_id: str
|
||||
file_name: str
|
||||
created_utc: str
|
||||
file_size: int
|
||||
file_extension: str
|
||||
|
||||
evidence_category: Optional[str] = None
|
||||
|
||||
@classmethod
|
||||
def from_api(cls, data: Dict[str, Any]) -> EvidenceFileData:
|
||||
return cls(
|
||||
file_id=data["fileId"],
|
||||
file_name=data["fileName"],
|
||||
created_utc=data["createdUtc"],
|
||||
file_size=data["fileSize"],
|
||||
file_extension=data["fileExtension"],
|
||||
evidence_category=data.get("evidenceCategory"),
|
||||
)
|
||||
16
backend/pashub_fetcher/evidence_metadata.py
Normal file
16
backend/pashub_fetcher/evidence_metadata.py
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
from __future__ import annotations
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Dict
|
||||
|
||||
|
||||
@dataclass
|
||||
class EvidenceMetadata:
|
||||
container_name: str
|
||||
blob_uri: str
|
||||
|
||||
@classmethod
|
||||
def from_api(cls, data: Dict[str, Any]) -> EvidenceMetadata:
|
||||
return cls(
|
||||
container_name=data["containerName"],
|
||||
blob_uri=data["blobUri"],
|
||||
)
|
||||
27
backend/pashub_fetcher/handler/Dockerfile
Normal file
27
backend/pashub_fetcher/handler/Dockerfile
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
FROM mcr.microsoft.com/playwright/python:v1.58.0-jammy
|
||||
|
||||
# Install AWS Lambda RIE
|
||||
ADD https://github.com/aws/aws-lambda-runtime-interface-emulator/releases/latest/download/aws-lambda-rie /usr/local/bin/aws-lambda-rie
|
||||
RUN chmod +x /usr/local/bin/aws-lambda-rie
|
||||
|
||||
# Install Lambda runtime client
|
||||
RUN pip install awslambdaric playwright==1.58.0 requests
|
||||
|
||||
# Set working directory (Lambda task root)
|
||||
WORKDIR /var/task
|
||||
|
||||
COPY backend/.env.test backend/.env
|
||||
|
||||
COPY utils/ utils/
|
||||
COPY backend/pashub_fetcher/ backend/pashub_fetcher/
|
||||
|
||||
|
||||
# Lambda entrypoint
|
||||
ENTRYPOINT ["/usr/local/bin/aws-lambda-rie", "python", "-m", "awslambdaric"]
|
||||
|
||||
# -----------------------------
|
||||
# Lambda handler
|
||||
# -----------------------------
|
||||
# CMD ["backend/pashub_fetcher/handler/handler.handler"]
|
||||
# For local running
|
||||
CMD ["backend.pashub_fetcher.handler.handler.handler"]
|
||||
47
backend/pashub_fetcher/handler/handler.py
Normal file
47
backend/pashub_fetcher/handler/handler.py
Normal file
|
|
@ -0,0 +1,47 @@
|
|||
import time
|
||||
from typing import Any, List, Mapping
|
||||
|
||||
from backend.pashub_fetcher.cotality_client import CotalityClient, UnauthorizedError
|
||||
from backend.pashub_fetcher.token_getter import get_token_from_local_storage
|
||||
from utils.logger import setup_logger
|
||||
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
def handler(event: Mapping[str, Any], context: Any) -> None:
|
||||
pas_hub_email = "random@test.com"
|
||||
pas_hub_password = "my_fake_password"
|
||||
|
||||
try:
|
||||
token: str = get_token_from_local_storage(pas_hub_email, pas_hub_password)
|
||||
logger.info(f"Token extracted successfully")
|
||||
except:
|
||||
logger.error("Error getting auth token from Pas Hub")
|
||||
raise
|
||||
|
||||
client = CotalityClient(token=token)
|
||||
|
||||
jobs = [
|
||||
"5abf6e27-e4c4-4ba8-b69d-9e34939e0002",
|
||||
"047f4455-85e2-4293-97b1-6b460137d33e",
|
||||
] # TODO: get these from request body
|
||||
|
||||
saved_files: List[str] = []
|
||||
for job_id in jobs:
|
||||
try:
|
||||
saved_files.extend(client.get_core_envidence_files_by_job_id(job_id))
|
||||
|
||||
except UnauthorizedError:
|
||||
logger.warning("Token expired — refreshing")
|
||||
|
||||
token = get_token_from_local_storage(pas_hub_email, pas_hub_password)
|
||||
|
||||
client = CotalityClient(token=token)
|
||||
|
||||
# retry once
|
||||
saved_files.extend(client.get_core_envidence_files_by_job_id(job_id))
|
||||
|
||||
time.sleep(10) # Simulate manual download
|
||||
|
||||
print(f"saved {len(saved_files)} files")
|
||||
11
backend/pashub_fetcher/local_handler/docker-compose.yml
Normal file
11
backend/pashub_fetcher/local_handler/docker-compose.yml
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
version: "3.9"
|
||||
|
||||
services:
|
||||
pashub-fetcher-lambda:
|
||||
build:
|
||||
context: ../../../
|
||||
dockerfile: backend/pashub_fetcher/handler/Dockerfile
|
||||
ports:
|
||||
- "9000:8080"
|
||||
env_file:
|
||||
- ../../../.env
|
||||
26
backend/pashub_fetcher/local_handler/invoke_local_lambda.py
Normal file
26
backend/pashub_fetcher/local_handler/invoke_local_lambda.py
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
#!/usr/bin/env python3
|
||||
import json
|
||||
import requests
|
||||
|
||||
HOST = "localhost"
|
||||
PORT = "9000"
|
||||
|
||||
LAMBDA_URL = f"http://{HOST}:{PORT}/2015-03-31/functions/function/invocations"
|
||||
|
||||
payload = {
|
||||
"Records": [
|
||||
{
|
||||
"body": json.dumps(
|
||||
{
|
||||
"uprn": 123456,
|
||||
}
|
||||
)
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
response = requests.post(LAMBDA_URL, json=payload)
|
||||
|
||||
print("Status code:", response.status_code)
|
||||
print("Response:")
|
||||
print(response.text)
|
||||
54
backend/pashub_fetcher/token_getter.py
Normal file
54
backend/pashub_fetcher/token_getter.py
Normal file
|
|
@ -0,0 +1,54 @@
|
|||
from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeoutError
|
||||
|
||||
from utils.logger import setup_logger
|
||||
|
||||
logger = setup_logger()
|
||||
|
||||
|
||||
def get_token_from_local_storage(email: str, password: str) -> str:
|
||||
logger.info("Starting Playwright flow")
|
||||
|
||||
with sync_playwright() as p:
|
||||
browser = p.chromium.launch(
|
||||
headless=True,
|
||||
args=["--no-sandbox", "--disable-dev-shm-usage"],
|
||||
)
|
||||
page = browser.new_page()
|
||||
|
||||
try:
|
||||
logger.info("Navigating to site...")
|
||||
page.goto("https://pashub.net/", timeout=30000)
|
||||
|
||||
logger.info("Filling login form...")
|
||||
page.fill("#email", email)
|
||||
page.fill("#password", password)
|
||||
|
||||
logger.info("Submitting login...")
|
||||
page.click("#btn-login")
|
||||
|
||||
page.wait_for_timeout(3000)
|
||||
|
||||
if "login" in page.url.lower():
|
||||
raise Exception("Login failed (still on login page)")
|
||||
|
||||
logger.info(f"Login likely successful. URL: {page.url}")
|
||||
|
||||
token = page.evaluate(
|
||||
"""() => {
|
||||
return localStorage.getItem('token');
|
||||
}"""
|
||||
)
|
||||
|
||||
if not token:
|
||||
raise Exception("Login succeeded but no token found")
|
||||
|
||||
return token
|
||||
|
||||
except PlaywrightTimeoutError as e:
|
||||
raise Exception(f"Timeout during login flow: {str(e)}")
|
||||
|
||||
except Exception as e:
|
||||
raise Exception(f"Unexpected error: {str(e)}")
|
||||
|
||||
finally:
|
||||
browser.close()
|
||||
|
|
@ -1,7 +1,7 @@
|
|||
import requests
|
||||
import json
|
||||
|
||||
TOKEN = "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCIsImtpZCI6Ik1EUTRNRU5GUTBVNU9FUXpOelk1TVRFME0wUkdOMFpFUkRoR1JVVkJNVGMxT1RFNFJERXlPQSJ9.eyJodHRwOi8vZW1haWwiOiJzZWJhc3RpYW5Ab3Ntb3Npcy1hY2QuY29tIiwiaHR0cDovL2NsdWsudG9rZW4vbGFzdFBhc3N3b3JkQ2hhbmdlIjoiMjAyNS0wOC0yNlQwOTo1NDoyNi4zMjZaIiwiaHR0cDovL2NsdWsudG9rZW4vY29ubmVjdGlvbiI6ImVUZWNoSUQiLCJodHRwOi8vY2x1ay50b2tlbi9zdHJhdGVneSI6ImF1dGgwIiwiaHR0cDovL2NsdWsudG9rZW4vc3RyYXRlZ3lUeXBlIjoiZGF0YWJhc2UiLCJpc3MiOiJodHRwczovL2V0ZWNoaWQuZXUuYXV0aDAuY29tLyIsInN1YiI6ImF1dGgwfDY4YWQ4NDUyZDI2YzI1ZmMyMzkwZmYxYSIsImF1ZCI6WyJodHRwczovL3Bhc2h1Yi5hcGkuZXRlY2gubmV0IiwiaHR0cHM6Ly9ldGVjaGlkLmV1LmF1dGgwLmNvbS91c2VyaW5mbyJdLCJpYXQiOjE3NzMyMzc4MjQsImV4cCI6MTc3MzI0NTAyNCwic2NvcGUiOiJvcGVuaWQiLCJhenAiOiJEaVp6d3VVaTVkVmozOXR3NG00bWZ6emZvRm5MdmVLZyJ9.mkkxeZiD_ByHY4TJKpLQ-trmeGs15s0ekL6u1n-ek9j-EzNyf6qalEHCyHf8gzdNhU_vay96bIOMRHp4vXFaLqSANwKZayIS3EoA_b9-u2FAZpooxEvReAMNJGoZ6WLD01AQXWv-l7ww1ZqAnQzw0moL_Oma6hVmA5oa-RJKJ3MerS7e0Wei97Db48E140-EAbQf2iPcKYYtCNRA4il6n8DFiqGeoUMGo99jkR1ceZAvMpOAj8RhKX-4qSiDfX6yXUS2G96U5m7S_GWI-DEj5TazkN10Af3TyOY3EVjmZoJcRpiAR4cFmlfcTydjrShU03DWmPZm1QItf2McxfCpNA"
|
||||
TOKEN = "eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCIsImtpZCI6Ik1EUTRNRU5GUTBVNU9FUXpOelk1TVRFME0wUkdOMFpFUkRoR1JVVkJNVGMxT1RFNFJERXlPQSJ9.eyJodHRwOi8vZW1haWwiOiJzZWJhc3RpYW5Ab3Ntb3Npcy1hY2QuY29tIiwiaHR0cDovL2NsdWsudG9rZW4vbGFzdFBhc3N3b3JkQ2hhbmdlIjoiMjAyNS0wOC0yNlQwOTo1NDoyNi4zMjZaIiwiaHR0cDovL2NsdWsudG9rZW4vY29ubmVjdGlvbiI6ImVUZWNoSUQiLCJodHRwOi8vY2x1ay50b2tlbi9zdHJhdGVneSI6ImF1dGgwIiwiaHR0cDovL2NsdWsudG9rZW4vc3RyYXRlZ3lUeXBlIjoiZGF0YWJhc2UiLCJpc3MiOiJodHRwczovL2V0ZWNoaWQuZXUuYXV0aDAuY29tLyIsInN1YiI6ImF1dGgwfDY4YWQ4NDUyZDI2YzI1ZmMyMzkwZmYxYSIsImF1ZCI6WyJodHRwczovL3Bhc2h1Yi5hcGkuZXRlY2gubmV0IiwiaHR0cHM6Ly9ldGVjaGlkLmV1LmF1dGgwLmNvbS91c2VyaW5mbyJdLCJpYXQiOjE3NzQyODczOTMsImV4cCI6MTc3NDI5NDU5Mywic2NvcGUiOiJvcGVuaWQiLCJhenAiOiJEaVp6d3VVaTVkVmozOXR3NG00bWZ6emZvRm5MdmVLZyJ9.NHh21XfnRofsFkRkc-28Dz-vQAdY70lXkEmh-Mzz7Fg6gjDbZeMu7PnBwgbDP_U8r6R0mI_pDIUc1MzJe1Rf5SF2-RV36TcGzmVzb3ek9wPsy3lxST5WL-vn-qUJ7GsZiGOeQ-jDLLFn8b8tjFrD7BGv8uphrfYAbPDm0atznkdbUSQQy-rfRJWhisnDtHf99j96TuJz3dV4bfI6VGrin-jezbg6BCvUYWQtttUs7knQKEWO0sGGDxtS29sbn4MX8Jqz4-hf6N2XSlgv52aIDwTVX-lyMWzfoeuIGhvCKuDiJeVw2c0r2UZFpHqjnfhXcb0_aacukXe8z-srj8-Rdw"
|
||||
|
||||
base = "https://pashub.net/api"
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue