From ba30bccb07b7f14199a1955a1f60a8f95eed0f12 Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Tue, 7 Apr 2026 11:29:10 +0000 Subject: [PATCH] revert spreadsheet update changes. add better logging --- backend/ecmk_fetcher/address_list.py | 51 +--------------------------- backend/ecmk_fetcher/browser.py | 1 + backend/ecmk_fetcher/processor.py | 23 ++++++------- 3 files changed, 13 insertions(+), 62 deletions(-) diff --git a/backend/ecmk_fetcher/address_list.py b/backend/ecmk_fetcher/address_list.py index 54c675d1..a2834366 100644 --- a/backend/ecmk_fetcher/address_list.py +++ b/backend/ecmk_fetcher/address_list.py @@ -1,16 +1,14 @@ import re from dataclasses import dataclass -from typing import Any, Dict, Optional, cast +from typing import Any, Dict, Optional from openpyxl import Workbook, load_workbook from openpyxl.worksheet.worksheet import Worksheet -from openpyxl.cell.cell import Cell @dataclass class PropertyRow: row_index: int address: str - processed: bool def extract_addresses_from_spreadsheet( @@ -22,7 +20,6 @@ def extract_addresses_from_spreadsheet( header_row: int = 1 id_col: Optional[int] = None deal_name_col: Optional[int] = None - processed_col: Optional[int] = None # find columns for col in range(1, ws.max_column + 1): @@ -33,17 +30,10 @@ def extract_addresses_from_spreadsheet( id_col = col elif value == "deal name": deal_name_col = col - elif value == "processed": - processed_col = col if id_col is None or deal_name_col is None: raise Exception("Missing required columns") - # create processed column if missing - if processed_col is None: - processed_col = ws.max_column + 1 - cast(Cell, ws.cell(row=header_row, column=processed_col)).value = "processed" - properties: Dict[str, PropertyRow] = {} for row in range(2, ws.max_row + 1): @@ -53,55 +43,16 @@ def extract_addresses_from_spreadsheet( if not id_val or not deal_name: continue - processed_val: Any = ws.cell(row=row, column=processed_col).value - processed: bool = str(processed_val).lower() == "true" - property_id: str = str(id_val).strip() properties[property_id] = PropertyRow( row_index=row, address=extract_succinct_address(str(deal_name)), - processed=processed, ) return properties -def mark_properties_as_processed( - filepath: str, - property_map: Dict[str, PropertyRow], -) -> None: - wb: Workbook = load_workbook(filepath) - ws: Worksheet = wb["Southern RA-Lite Programme 3103"] - - header_row: int = 1 - - # find processed column - processed_col: int | None = None - - for col in range(1, ws.max_column + 1): - value = ws.cell(row=header_row, column=col).value - if value and str(value).strip().lower() == "processed": - processed_col = col - break - - if processed_col is None: - raise Exception("Processed column not found") - - # update rows - for property_row in property_map.values(): - if property_row.processed: - cast( - Cell, - ws.cell( - row=property_row.row_index, - column=processed_col, - ), - ).value = True - - wb.save(filepath) - - def extract_succinct_address(deal_name: str) -> str: left_part = deal_name.split("|")[0].strip() diff --git a/backend/ecmk_fetcher/browser.py b/backend/ecmk_fetcher/browser.py index 6d018537..de349b92 100644 --- a/backend/ecmk_fetcher/browser.py +++ b/backend/ecmk_fetcher/browser.py @@ -50,6 +50,7 @@ def get_first_row_signature(page: Page) -> str: def go_to_next_page(page: Page) -> bool: + logger.info("Going to next page") before = get_first_row_signature(page) page.locator("#assessmentDatatable_next a").click() diff --git a/backend/ecmk_fetcher/processor.py b/backend/ecmk_fetcher/processor.py index dce6c7ef..e774fc9a 100644 --- a/backend/ecmk_fetcher/processor.py +++ b/backend/ecmk_fetcher/processor.py @@ -11,7 +11,6 @@ from playwright.sync_api import ( from backend.ecmk_fetcher.address_list import ( PropertyRow, extract_addresses_from_spreadsheet, - mark_properties_as_processed, ) from backend.ecmk_fetcher.browser import ( attach_debug_listeners, @@ -23,9 +22,12 @@ from backend.ecmk_fetcher.browser import ( ) from backend.ecmk_fetcher.reports import REPORT_TYPES, build_property_id from backend.ecmk_fetcher.sharepoint import upload_file_to_sharepoint +from utils.logger import setup_logger from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient from utils.sharepoint.domna_sites import DomnaSites +logger = setup_logger() + def run_job() -> None: username: str = "" @@ -86,24 +88,24 @@ def run_job() -> None: if not property_row: continue - if property_row.processed: - continue + logger.info(f"Match found for property {address}") sharepoint_address: str = property_row.address go_to_assessment_details(page, row) - all_uploaded: bool = True - for report_type in REPORT_TYPES: file_path: str | None = download_with_retry( page, report_type ) if not file_path: - all_uploaded = False continue + logger.info( + f"Successfully downloaded file {os.path.basename(file_path)} from ECMK" + ) + try: upload_file_to_sharepoint( client=sharepoint_client, @@ -111,16 +113,15 @@ def run_job() -> None: base_path=sharepoint_base_path, subpath=sharepoint_address, ) + logger.info( + f"Successfully loaded {os.path.basename(file_path)} to sharepoint for {address}" + ) except Exception: - all_uploaded = False raise finally: if os.path.exists(file_path): os.remove(file_path) - if all_uploaded: - property_row.processed = True - page.go_back() page.wait_for_selector( "#assessmentDatatable tbody tr", timeout=15000 @@ -135,5 +136,3 @@ def run_job() -> None: finally: context.close() browser.close() - - mark_properties_as_processed(filepath, property_map)