diff --git a/backend/app/db/models/uploaded_file.py b/backend/app/db/models/uploaded_file.py index 71763790..5b34a752 100644 --- a/backend/app/db/models/uploaded_file.py +++ b/backend/app/db/models/uploaded_file.py @@ -16,6 +16,7 @@ class FileTypeEnum(enum.Enum): PAS_2023_OCCUPANCY = "pas_2023_occupancy" ECMK_SITE_NOTE = "ecmk_site_note" ECMK_RD_SAP_SITE_NOTE = "ecmk_rd_sap_site_note" + ECMK_SURVEY_XML = "ecmk_survey_xml" class FileSourceEnum(enum.Enum): diff --git a/backend/ecmk_fetcher/excel_writer.py b/backend/ecmk_fetcher/excel_writer.py index 1e65cf33..f290614b 100644 --- a/backend/ecmk_fetcher/excel_writer.py +++ b/backend/ecmk_fetcher/excel_writer.py @@ -21,7 +21,8 @@ def write_row(file_path: str, row_data: dict[str, Any]) -> None: # Build a mutable header list and insert new columns using insert_cols so # that existing row data shifts along with the headers. - headers: list[str] = [cell.value for cell in ws[1]] # type: ignore[misc] + # Filter out None to guard against blank columns in the source file. + headers: list[str] = [cell.value for cell in ws[1] if cell.value is not None] # type: ignore[misc] for key in new_keys: if key in headers: diff --git a/backend/ecmk_fetcher/processor.py b/backend/ecmk_fetcher/processor.py index 0a910b72..4f8c24ea 100644 --- a/backend/ecmk_fetcher/processor.py +++ b/backend/ecmk_fetcher/processor.py @@ -30,10 +30,13 @@ from backend.ecmk_fetcher.reports import ( build_property_id, map_report_type_to_db_file_type, ) +from backend.ecmk_fetcher.excel_writer import write_row from backend.ecmk_fetcher.upload import ( + upload_excel_to_sharepoint, upload_file_to_s3_and_update_db, upload_file_to_sharepoint, ) +from backend.ecmk_fetcher.xml_processor import flatten_sap_property, parse_rdsap from utils.logger import setup_logger from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient from utils.sharepoint.domna_sites import DomnaSites @@ -60,6 +63,15 @@ def run_job() -> None: ) sharepoint_base_path: str = "/Projects/Southern Housing/SH-SURV-26-001/Assessments" + sharepoint_excel_path: str = "/Projects/Southern Housing/SH-SURV-26-001/Modelling" + + DIMENSIONS_FILENAME: str = "Dimensions.xlsx" + local_dimensions_path: str = os.path.join(BASE_DIR, DIMENSIONS_FILENAME) + + sharepoint_client.download_file( + sharepoint_path=f"{sharepoint_excel_path}/{DIMENSIONS_FILENAME}", + local_path=local_dimensions_path, + ) s3_bucket: str = "retrofit-energy-assessments-dev" @@ -143,8 +155,19 @@ def run_job() -> None: try: if report_type == FileDownloadButtonType.RAW_XML.value: - # TODO: extract data from XML and write to file - pass + with open(file_path, "r", encoding="utf-8") as f: + xml_string = f.read() + details = parse_rdsap(xml_string) + row_data = flatten_sap_property(details) + write_row(local_dimensions_path, row_data) + upload_excel_to_sharepoint( + client=sharepoint_client, + file_path=local_dimensions_path, + sharepoint_path=sharepoint_excel_path, + ) + logger.info( + f"Written dimensions row and uploaded Dimensions.xlsx for {address}" + ) else: upload_file_to_sharepoint( client=sharepoint_client, diff --git a/backend/ecmk_fetcher/reports.py b/backend/ecmk_fetcher/reports.py index 7ab4fc91..d2f8ea52 100644 --- a/backend/ecmk_fetcher/reports.py +++ b/backend/ecmk_fetcher/reports.py @@ -24,6 +24,8 @@ def map_report_type_to_db_file_type(report_type: int) -> FileTypeEnum: return FileTypeEnum.ECMK_SITE_NOTE case FileDownloadButtonType.SITENOTE_REPORT.value: return FileTypeEnum.ECMK_RD_SAP_SITE_NOTE + case FileDownloadButtonType.RAW_XML.value: + return FileTypeEnum.ECMK_SURVEY_XML case _: raise ValueError("Unknown report type") diff --git a/backend/ecmk_fetcher/upload.py b/backend/ecmk_fetcher/upload.py index 0a744e53..8cb451b0 100644 --- a/backend/ecmk_fetcher/upload.py +++ b/backend/ecmk_fetcher/upload.py @@ -28,6 +28,18 @@ def upload_file_to_sharepoint( ) +def upload_excel_to_sharepoint( + client: DomnaSharepointClient, + file_path: str, + sharepoint_path: str, +) -> None: + client.upload_file( + file_path=file_path, + sharepoint_path=sharepoint_path, + file_name=os.path.basename(file_path), + ) + + def upload_file_to_s3_and_update_db( bucket: str, file_path: str, hubspot_listing_id: str, file_type: FileTypeEnum ) -> None: diff --git a/utils/sharepoint/domna_sharepoint_client.py b/utils/sharepoint/domna_sharepoint_client.py index 67e079ed..5e0255ac 100644 --- a/utils/sharepoint/domna_sharepoint_client.py +++ b/utils/sharepoint/domna_sharepoint_client.py @@ -90,6 +90,41 @@ class DomnaSharepointClient: file_name, get_file_stream(file_path), sharepoint_path ) + def download_file(self, sharepoint_path: str, local_path: str) -> bool: + """ + Download a file from SharePoint to a local path. + + Returns True if the file was downloaded, False if it does not exist yet. + Raises on any other error. + """ + sharepoint_client = SharePointClient( + tenant_id=self.sharepoint_tenant_id, + client_id=self.sharepoint_client_id, + client_secret=self.sharepoint_client_secret, + site_id=self.sharepoint_drive.value, + ) + + try: + metadata: Dict[str, Any] = sharepoint_client.get_file_metadata(sharepoint_path) + except ValueError: + return False + + download_url: Optional[str] = metadata.get("@microsoft.graph.downloadUrl") + if not download_url: + return False + + content: BytesIO = SharePointClient.download_sharepoint_file(download_url) + + parent_dir = os.path.dirname(local_path) + if parent_dir: + os.makedirs(parent_dir, exist_ok=True) + + with open(local_path, "wb") as f: + f.write(content.getvalue()) + + self.logger.debug(f"Downloaded SharePoint file to: {local_path}") + return True + def create_temp_file(self, content: BytesIO, path: str): # Ensure the path is under /tmp/ new_path = os.path.join("/tmp/sharepoint", path) diff --git a/utils/sharepoint/sharepoint_client.py b/utils/sharepoint/sharepoint_client.py index 71f82b68..5807c3bd 100644 --- a/utils/sharepoint/sharepoint_client.py +++ b/utils/sharepoint/sharepoint_client.py @@ -278,6 +278,17 @@ class SharePointClient: # logger.debug(f"Listing folder contents from URL: {url}") return "GET", url, None + @api_call_decorator + def get_file_metadata(self, file_path: str) -> Dict[str, Any]: + """ + GET /drives/{drive-id}/root:/{file_path} + + Returns file metadata, including '@microsoft.graph.downloadUrl'. + Raises ValueError if the file does not exist (404). + """ + url = f"https://graph.microsoft.com/v1.0/drives/{self.document_drive_id}/root:/{file_path}" + return "GET", url, None + @api_call_decorator def create_folder(self, file_name: str, folder_path: str) -> Dict[str, Any]: """ @@ -325,7 +336,7 @@ class SharePointClient: return self.upload_file(file_name, sharepoint_parent_id, file_stream) @staticmethod - def download_sharepoint_file(download_url): + def download_sharepoint_file(download_url: str) -> BytesIO: """ Downloads a file from the given URL and returns its content.