update Dimensions.xlsx with XML contents

This commit is contained in:
Daniel Roth 2026-04-15 13:31:40 +00:00
parent 1153d19f0e
commit 155eab86e4
7 changed files with 89 additions and 4 deletions

View file

@ -16,6 +16,7 @@ class FileTypeEnum(enum.Enum):
PAS_2023_OCCUPANCY = "pas_2023_occupancy" PAS_2023_OCCUPANCY = "pas_2023_occupancy"
ECMK_SITE_NOTE = "ecmk_site_note" ECMK_SITE_NOTE = "ecmk_site_note"
ECMK_RD_SAP_SITE_NOTE = "ecmk_rd_sap_site_note" ECMK_RD_SAP_SITE_NOTE = "ecmk_rd_sap_site_note"
ECMK_SURVEY_XML = "ecmk_survey_xml"
class FileSourceEnum(enum.Enum): class FileSourceEnum(enum.Enum):

View file

@ -21,7 +21,8 @@ def write_row(file_path: str, row_data: dict[str, Any]) -> None:
# Build a mutable header list and insert new columns using insert_cols so # Build a mutable header list and insert new columns using insert_cols so
# that existing row data shifts along with the headers. # that existing row data shifts along with the headers.
headers: list[str] = [cell.value for cell in ws[1]] # type: ignore[misc] # Filter out None to guard against blank columns in the source file.
headers: list[str] = [cell.value for cell in ws[1] if cell.value is not None] # type: ignore[misc]
for key in new_keys: for key in new_keys:
if key in headers: if key in headers:

View file

@ -30,10 +30,13 @@ from backend.ecmk_fetcher.reports import (
build_property_id, build_property_id,
map_report_type_to_db_file_type, map_report_type_to_db_file_type,
) )
from backend.ecmk_fetcher.excel_writer import write_row
from backend.ecmk_fetcher.upload import ( from backend.ecmk_fetcher.upload import (
upload_excel_to_sharepoint,
upload_file_to_s3_and_update_db, upload_file_to_s3_and_update_db,
upload_file_to_sharepoint, upload_file_to_sharepoint,
) )
from backend.ecmk_fetcher.xml_processor import flatten_sap_property, parse_rdsap
from utils.logger import setup_logger from utils.logger import setup_logger
from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient from utils.sharepoint.domna_sharepoint_client import DomnaSharepointClient
from utils.sharepoint.domna_sites import DomnaSites from utils.sharepoint.domna_sites import DomnaSites
@ -60,6 +63,15 @@ def run_job() -> None:
) )
sharepoint_base_path: str = "/Projects/Southern Housing/SH-SURV-26-001/Assessments" sharepoint_base_path: str = "/Projects/Southern Housing/SH-SURV-26-001/Assessments"
sharepoint_excel_path: str = "/Projects/Southern Housing/SH-SURV-26-001/Modelling"
DIMENSIONS_FILENAME: str = "Dimensions.xlsx"
local_dimensions_path: str = os.path.join(BASE_DIR, DIMENSIONS_FILENAME)
sharepoint_client.download_file(
sharepoint_path=f"{sharepoint_excel_path}/{DIMENSIONS_FILENAME}",
local_path=local_dimensions_path,
)
s3_bucket: str = "retrofit-energy-assessments-dev" s3_bucket: str = "retrofit-energy-assessments-dev"
@ -143,8 +155,19 @@ def run_job() -> None:
try: try:
if report_type == FileDownloadButtonType.RAW_XML.value: if report_type == FileDownloadButtonType.RAW_XML.value:
# TODO: extract data from XML and write to file with open(file_path, "r", encoding="utf-8") as f:
pass xml_string = f.read()
details = parse_rdsap(xml_string)
row_data = flatten_sap_property(details)
write_row(local_dimensions_path, row_data)
upload_excel_to_sharepoint(
client=sharepoint_client,
file_path=local_dimensions_path,
sharepoint_path=sharepoint_excel_path,
)
logger.info(
f"Written dimensions row and uploaded Dimensions.xlsx for {address}"
)
else: else:
upload_file_to_sharepoint( upload_file_to_sharepoint(
client=sharepoint_client, client=sharepoint_client,

View file

@ -24,6 +24,8 @@ def map_report_type_to_db_file_type(report_type: int) -> FileTypeEnum:
return FileTypeEnum.ECMK_SITE_NOTE return FileTypeEnum.ECMK_SITE_NOTE
case FileDownloadButtonType.SITENOTE_REPORT.value: case FileDownloadButtonType.SITENOTE_REPORT.value:
return FileTypeEnum.ECMK_RD_SAP_SITE_NOTE return FileTypeEnum.ECMK_RD_SAP_SITE_NOTE
case FileDownloadButtonType.RAW_XML.value:
return FileTypeEnum.ECMK_SURVEY_XML
case _: case _:
raise ValueError("Unknown report type") raise ValueError("Unknown report type")

View file

@ -28,6 +28,18 @@ def upload_file_to_sharepoint(
) )
def upload_excel_to_sharepoint(
client: DomnaSharepointClient,
file_path: str,
sharepoint_path: str,
) -> None:
client.upload_file(
file_path=file_path,
sharepoint_path=sharepoint_path,
file_name=os.path.basename(file_path),
)
def upload_file_to_s3_and_update_db( def upload_file_to_s3_and_update_db(
bucket: str, file_path: str, hubspot_listing_id: str, file_type: FileTypeEnum bucket: str, file_path: str, hubspot_listing_id: str, file_type: FileTypeEnum
) -> None: ) -> None:

View file

@ -90,6 +90,41 @@ class DomnaSharepointClient:
file_name, get_file_stream(file_path), sharepoint_path file_name, get_file_stream(file_path), sharepoint_path
) )
def download_file(self, sharepoint_path: str, local_path: str) -> bool:
"""
Download a file from SharePoint to a local path.
Returns True if the file was downloaded, False if it does not exist yet.
Raises on any other error.
"""
sharepoint_client = SharePointClient(
tenant_id=self.sharepoint_tenant_id,
client_id=self.sharepoint_client_id,
client_secret=self.sharepoint_client_secret,
site_id=self.sharepoint_drive.value,
)
try:
metadata: Dict[str, Any] = sharepoint_client.get_file_metadata(sharepoint_path)
except ValueError:
return False
download_url: Optional[str] = metadata.get("@microsoft.graph.downloadUrl")
if not download_url:
return False
content: BytesIO = SharePointClient.download_sharepoint_file(download_url)
parent_dir = os.path.dirname(local_path)
if parent_dir:
os.makedirs(parent_dir, exist_ok=True)
with open(local_path, "wb") as f:
f.write(content.getvalue())
self.logger.debug(f"Downloaded SharePoint file to: {local_path}")
return True
def create_temp_file(self, content: BytesIO, path: str): def create_temp_file(self, content: BytesIO, path: str):
# Ensure the path is under /tmp/ # Ensure the path is under /tmp/
new_path = os.path.join("/tmp/sharepoint", path) new_path = os.path.join("/tmp/sharepoint", path)

View file

@ -278,6 +278,17 @@ class SharePointClient:
# logger.debug(f"Listing folder contents from URL: {url}") # logger.debug(f"Listing folder contents from URL: {url}")
return "GET", url, None return "GET", url, None
@api_call_decorator
def get_file_metadata(self, file_path: str) -> Dict[str, Any]:
"""
GET /drives/{drive-id}/root:/{file_path}
Returns file metadata, including '@microsoft.graph.downloadUrl'.
Raises ValueError if the file does not exist (404).
"""
url = f"https://graph.microsoft.com/v1.0/drives/{self.document_drive_id}/root:/{file_path}"
return "GET", url, None
@api_call_decorator @api_call_decorator
def create_folder(self, file_name: str, folder_path: str) -> Dict[str, Any]: def create_folder(self, file_name: str, folder_path: str) -> Dict[str, Any]:
""" """
@ -325,7 +336,7 @@ class SharePointClient:
return self.upload_file(file_name, sharepoint_parent_id, file_stream) return self.upload_file(file_name, sharepoint_parent_id, file_stream)
@staticmethod @staticmethod
def download_sharepoint_file(download_url): def download_sharepoint_file(download_url: str) -> BytesIO:
""" """
Downloads a file from the given URL and returns its content. Downloads a file from the given URL and returns its content.