revert spreadsheet update changes. add better logging

This commit is contained in:
Daniel Roth 2026-04-07 11:29:10 +00:00
parent cd7b59a62f
commit ba30bccb07
3 changed files with 13 additions and 62 deletions

View file

@ -1,16 +1,14 @@
import re
from dataclasses import dataclass
from typing import Any, Dict, Optional, cast
from typing import Any, Dict, Optional
from openpyxl import Workbook, load_workbook
from openpyxl.worksheet.worksheet import Worksheet
from openpyxl.cell.cell import Cell
@dataclass
class PropertyRow:
row_index: int
address: str
processed: bool
def extract_addresses_from_spreadsheet(
@ -22,7 +20,6 @@ def extract_addresses_from_spreadsheet(
header_row: int = 1
id_col: Optional[int] = None
deal_name_col: Optional[int] = None
processed_col: Optional[int] = None
# find columns
for col in range(1, ws.max_column + 1):
@ -33,17 +30,10 @@ def extract_addresses_from_spreadsheet(
id_col = col
elif value == "deal name":
deal_name_col = col
elif value == "processed":
processed_col = col
if id_col is None or deal_name_col is None:
raise Exception("Missing required columns")
# create processed column if missing
if processed_col is None:
processed_col = ws.max_column + 1
cast(Cell, ws.cell(row=header_row, column=processed_col)).value = "processed"
properties: Dict[str, PropertyRow] = {}
for row in range(2, ws.max_row + 1):
@ -53,55 +43,16 @@ def extract_addresses_from_spreadsheet(
if not id_val or not deal_name:
continue
processed_val: Any = ws.cell(row=row, column=processed_col).value
processed: bool = str(processed_val).lower() == "true"
property_id: str = str(id_val).strip()
properties[property_id] = PropertyRow(
row_index=row,
address=extract_succinct_address(str(deal_name)),
processed=processed,
)
return properties
def mark_properties_as_processed(
filepath: str,
property_map: Dict[str, PropertyRow],
) -> None:
wb: Workbook = load_workbook(filepath)
ws: Worksheet = wb["Southern RA-Lite Programme 3103"]
header_row: int = 1
# find processed column
processed_col: int | None = None
for col in range(1, ws.max_column + 1):
value = ws.cell(row=header_row, column=col).value
if value and str(value).strip().lower() == "processed":
processed_col = col
break
if processed_col is None:
raise Exception("Processed column not found")
# update rows
for property_row in property_map.values():
if property_row.processed:
cast(
Cell,
ws.cell(
row=property_row.row_index,
column=processed_col,
),
).value = True
wb.save(filepath)
def extract_succinct_address(deal_name: str) -> str:
left_part = deal_name.split("|")[0].strip()

View file

@ -50,6 +50,7 @@ def get_first_row_signature(page: Page) -> str:
def go_to_next_page(page: Page) -> bool:
logger.info("Going to next page")
before = get_first_row_signature(page)
page.locator("#assessmentDatatable_next a").click()

View file

@ -11,7 +11,6 @@ from playwright.sync_api import (
from backend.ecmk_fetcher.address_list import (
PropertyRow,
extract_addresses_from_spreadsheet,
mark_properties_as_processed,
)
from backend.ecmk_fetcher.browser import (
attach_debug_listeners,
@ -23,9 +22,12 @@ from backend.ecmk_fetcher.browser import (
)
from backend.ecmk_fetcher.reports import REPORT_TYPES, build_property_id
from backend.ecmk_fetcher.sharepoint import upload_file_to_sharepoint
from utils.logger import setup_logger
from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient
from utils.sharepoint.domna_sites import DomnaSites
logger = setup_logger()
def run_job() -> None:
username: str = ""
@ -86,24 +88,24 @@ def run_job() -> None:
if not property_row:
continue
if property_row.processed:
continue
logger.info(f"Match found for property {address}")
sharepoint_address: str = property_row.address
go_to_assessment_details(page, row)
all_uploaded: bool = True
for report_type in REPORT_TYPES:
file_path: str | None = download_with_retry(
page, report_type
)
if not file_path:
all_uploaded = False
continue
logger.info(
f"Successfully downloaded file {os.path.basename(file_path)} from ECMK"
)
try:
upload_file_to_sharepoint(
client=sharepoint_client,
@ -111,16 +113,15 @@ def run_job() -> None:
base_path=sharepoint_base_path,
subpath=sharepoint_address,
)
logger.info(
f"Successfully loaded {os.path.basename(file_path)} to sharepoint for {address}"
)
except Exception:
all_uploaded = False
raise
finally:
if os.path.exists(file_path):
os.remove(file_path)
if all_uploaded:
property_row.processed = True
page.go_back()
page.wait_for_selector(
"#assessmentDatatable tbody tr", timeout=15000
@ -135,5 +136,3 @@ def run_job() -> None:
finally:
context.close()
browser.close()
mark_properties_as_processed(filepath, property_map)