mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
move extraction from spreadsheet to dedicated file
This commit is contained in:
parent
252657a374
commit
272837d6ef
2 changed files with 51 additions and 46 deletions
|
|
@ -1,8 +1,6 @@
|
||||||
from datetime import datetime, timezone
|
from datetime import datetime, timezone
|
||||||
import os
|
import os
|
||||||
import re
|
|
||||||
from typing import Any, Dict, List, Optional, Tuple, cast
|
from typing import Any, Dict, List, Optional, Tuple, cast
|
||||||
from openpyxl import load_workbook
|
|
||||||
|
|
||||||
from backend.app.config import get_settings
|
from backend.app.config import get_settings
|
||||||
from backend.app.db.connection import db_session
|
from backend.app.db.connection import db_session
|
||||||
|
|
@ -14,9 +12,6 @@ from backend.app.db.models.uploaded_file import (
|
||||||
from backend.documents_parser.db_writer import save_epc_property_data
|
from backend.documents_parser.db_writer import save_epc_property_data
|
||||||
from backend.documents_parser.parser import parse_pashub_site_notes
|
from backend.documents_parser.parser import parse_pashub_site_notes
|
||||||
from backend.pashub_fetcher.core_files import infer_file_type
|
from backend.pashub_fetcher.core_files import infer_file_type
|
||||||
from datatypes.epc.domain.epc_property_data import EpcPropertyData
|
|
||||||
|
|
||||||
from backend.pashub_fetcher.job import Job
|
|
||||||
from backend.pashub_fetcher.pashub_client import PashubClient, UnauthorizedError
|
from backend.pashub_fetcher.pashub_client import PashubClient, UnauthorizedError
|
||||||
from backend.pashub_fetcher.pashub_to_ara_trigger_request import (
|
from backend.pashub_fetcher.pashub_to_ara_trigger_request import (
|
||||||
PashubToAraTriggerRequest,
|
PashubToAraTriggerRequest,
|
||||||
|
|
@ -24,6 +19,7 @@ from backend.pashub_fetcher.pashub_to_ara_trigger_request import (
|
||||||
from backend.pashub_fetcher.sharepoint_subfolders import SharepointSubfolders
|
from backend.pashub_fetcher.sharepoint_subfolders import SharepointSubfolders
|
||||||
from backend.pashub_fetcher.token_getter import get_token_from_local_storage
|
from backend.pashub_fetcher.token_getter import get_token_from_local_storage
|
||||||
from backend.utils.subtasks import task_handler
|
from backend.utils.subtasks import task_handler
|
||||||
|
from datatypes.epc.domain.epc_property_data import EpcPropertyData
|
||||||
from utils.logger import setup_logger
|
from utils.logger import setup_logger
|
||||||
from utils.s3 import upload_file_to_s3
|
from utils.s3 import upload_file_to_s3
|
||||||
from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient
|
from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient
|
||||||
|
|
@ -33,45 +29,6 @@ from utils.sharepoint.domna_sites import DomnaSites
|
||||||
logger = setup_logger()
|
logger = setup_logger()
|
||||||
|
|
||||||
|
|
||||||
def extract_jobs(filepath: str) -> List[Job]:
|
|
||||||
wb = load_workbook(filepath, data_only=True)
|
|
||||||
# ws = wb["watford warm homes (wave 3) mai"]
|
|
||||||
ws = wb["filtered"]
|
|
||||||
|
|
||||||
HEADER_ROW = 3
|
|
||||||
|
|
||||||
headers: Dict[str, int] = {}
|
|
||||||
for col in range(1, ws.max_column + 1):
|
|
||||||
value = str(ws.cell(row=HEADER_ROW, column=col).value)
|
|
||||||
if value:
|
|
||||||
headers[value.strip()] = col
|
|
||||||
|
|
||||||
name_col = headers["Name"]
|
|
||||||
# link_col = headers["Pashub Link"]
|
|
||||||
link_col = headers["PasHub Link"]
|
|
||||||
|
|
||||||
jobs: List[Job] = []
|
|
||||||
|
|
||||||
for row in range(HEADER_ROW + 1, ws.max_row + 1):
|
|
||||||
name = ws.cell(row=row, column=name_col).value
|
|
||||||
link = ws.cell(row=row, column=link_col).value
|
|
||||||
|
|
||||||
if not name or not link:
|
|
||||||
continue
|
|
||||||
|
|
||||||
match = re.search(r"/jobs/([0-9a-fA-F\-]+)/", str(link))
|
|
||||||
if not match:
|
|
||||||
continue
|
|
||||||
|
|
||||||
jobs.append(
|
|
||||||
{
|
|
||||||
"id": match.group(1),
|
|
||||||
"address": str(name),
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
return jobs
|
|
||||||
|
|
||||||
|
|
||||||
def get_pashub_client(email: str, password: str) -> PashubClient:
|
def get_pashub_client(email: str, password: str) -> PashubClient:
|
||||||
token = get_token_from_local_storage(email, password)
|
token = get_token_from_local_storage(email, password)
|
||||||
|
|
@ -143,9 +100,14 @@ def upload_job_to_s3_and_update_db(
|
||||||
uploaded_files.append(uploaded_file)
|
uploaded_files.append(uploaded_file)
|
||||||
|
|
||||||
file_type: Optional[str] = cast(Optional[str], uploaded_file.file_type)
|
file_type: Optional[str] = cast(Optional[str], uploaded_file.file_type)
|
||||||
if file_type is not None and FileTypeEnum(file_type) == FileTypeEnum.RD_SAP_SITE_NOTE:
|
if (
|
||||||
|
file_type is not None
|
||||||
|
and FileTypeEnum(file_type) == FileTypeEnum.RD_SAP_SITE_NOTE
|
||||||
|
):
|
||||||
try:
|
try:
|
||||||
site_notes_pairs.append((uploaded_file, parse_pashub_site_notes(file_path)))
|
site_notes_pairs.append(
|
||||||
|
(uploaded_file, parse_pashub_site_notes(file_path))
|
||||||
|
)
|
||||||
except Exception:
|
except Exception:
|
||||||
logger.warning(f"Failed to parse site notes {file_path}", exc_info=True)
|
logger.warning(f"Failed to parse site notes {file_path}", exc_info=True)
|
||||||
|
|
||||||
|
|
|
||||||
43
backend/pashub_fetcher/spreadsheet.py
Normal file
43
backend/pashub_fetcher/spreadsheet.py
Normal file
|
|
@ -0,0 +1,43 @@
|
||||||
|
import re
|
||||||
|
from typing import Dict, List
|
||||||
|
from openpyxl import load_workbook
|
||||||
|
|
||||||
|
from backend.pashub_fetcher.job import Job
|
||||||
|
|
||||||
|
|
||||||
|
def extract_jobs(filepath: str) -> List[Job]:
|
||||||
|
wb = load_workbook(filepath, data_only=True)
|
||||||
|
ws = wb["filtered"]
|
||||||
|
|
||||||
|
HEADER_ROW = 3
|
||||||
|
|
||||||
|
headers: Dict[str, int] = {}
|
||||||
|
for col in range(1, ws.max_column + 1):
|
||||||
|
value = str(ws.cell(row=HEADER_ROW, column=col).value)
|
||||||
|
if value:
|
||||||
|
headers[value.strip()] = col
|
||||||
|
|
||||||
|
name_col = headers["Name"]
|
||||||
|
link_col = headers["PasHub Link"]
|
||||||
|
|
||||||
|
jobs: List[Job] = []
|
||||||
|
|
||||||
|
for row in range(HEADER_ROW + 1, ws.max_row + 1):
|
||||||
|
name = ws.cell(row=row, column=name_col).value
|
||||||
|
link = ws.cell(row=row, column=link_col).value
|
||||||
|
|
||||||
|
if not name or not link:
|
||||||
|
continue
|
||||||
|
|
||||||
|
match = re.search(r"/jobs/([0-9a-fA-F\-]+)/", str(link))
|
||||||
|
if not match:
|
||||||
|
continue
|
||||||
|
|
||||||
|
jobs.append(
|
||||||
|
{
|
||||||
|
"id": match.group(1),
|
||||||
|
"address": str(name),
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
return jobs
|
||||||
Loading…
Add table
Reference in a new issue