From f1f3b84cbdadcecd4010658f0b119295a805e4ee Mon Sep 17 00:00:00 2001 From: Daniel Roth Date: Thu, 9 Apr 2026 15:49:13 +0000 Subject: [PATCH] simplify photo upload logic --- etl/hubspot/hubspotDataTodB.py | 89 +++++------------------------ etl/hubspot/scripts/scraper/main.py | 9 ++- 2 files changed, 19 insertions(+), 79 deletions(-) diff --git a/etl/hubspot/hubspotDataTodB.py b/etl/hubspot/hubspotDataTodB.py index a50c99da..6763f19c 100644 --- a/etl/hubspot/hubspotDataTodB.py +++ b/etl/hubspot/hubspotDataTodB.py @@ -78,53 +78,6 @@ class HubspotDataToDb: .one_or_none() ) - def update_deal_with_checks( - self, - deal_in_db: HubspotDealData, - hubspot_client: HubspotClient, - hs_deal: Dict[str, str], - hs_company_id: Optional[str], - hs_listing: Optional[Dict[str, str]], - ) -> bool: - """ - Updates deal in database and handles major_condition_issue_photos file upload to S3 with integrity check. - """ - self.upsert_deal(hs_deal, hs_company_id, hs_listing, hubspot_client) - - # Handle photo upload if it exists but S3 URL is missing - if self._needs_photo_upload(deal_in_db): - print( - f"🖼️ Found photo for deal_id {deal_in_db.deal_id} — uploading to S3..." - ) - - photo_url = hs_deal.get("major_condition_issue_photos") - - if photo_url: - self._upload_photo_to_s3( - deal_in_db, - photo_url, - hubspot_client, - verify=True, - ) - - # persist change - with db_read_session() as session: - db_record = session.get(HubspotDealData, deal_in_db.id) - db_record.major_condition_issue_evidence_s3_url = ( - deal_in_db.major_condition_issue_evidence_s3_url - ) - session.add(db_record) - session.commit() - - return False - else: - print(f"⚠️ Photo URL missing for deal_id {deal_in_db.deal_id}") - - else: - print(f"✅ No update or upload required for deal_id {deal_in_db.deal_id}.") - - return True - def upsert_deal( self, deal_data: Dict[str, str], @@ -169,14 +122,6 @@ class HubspotDataToDb: session.refresh(new_record) return new_record - def _sha256(self, file_path: str) -> str: - """Compute SHA-256 checksum of a file.""" - sha256 = hashlib.sha256() - with open(file_path, "rb") as f: - for chunk in iter(lambda: f.read(8192), b""): - sha256.update(chunk) - return sha256.hexdigest() - def _update_existing_deal( self, existing: HubspotDealData, @@ -315,18 +260,20 @@ class HubspotDataToDb: def _handle_existing_photo_upload( self, - existing: HubspotDealData, + existing_deal: HubspotDealData, hubspot_client: HubspotClient, ): - if self._needs_photo_upload(existing): - fresh_deal = hubspot_client.from_deal_id_get_info(existing.deal_id) - photo_url = fresh_deal.get("major_condition_issue_photos") + # if self._needs_photo_upload(existing): - if not photo_url: - print(f"⚠️ Photo URL missing for deal_id {existing.deal_id}") - return + fresh_deal = hubspot_client.from_deal_id_get_info(existing_deal.deal_id) + fresh_photo_url = fresh_deal.get("major_condition_issue_photos") - self._upload_photo_to_s3(existing, photo_url, hubspot_client) + if not fresh_photo_url: + print(f"⚠️ Photo URL missing for deal_id {existing_deal.deal_id}") + return + + if fresh_photo_url != existing_deal.major_condition_issue_photos: + self._upload_photo_to_s3(existing_deal, fresh_photo_url, hubspot_client) def _handle_new_photo_upload( self, @@ -343,12 +290,11 @@ class HubspotDataToDb: def _upload_photo_to_s3( self, record: HubspotDealData, - photo_url: str, + hubspot_photo_url: str, hubspot_client: HubspotClient, - verify: bool = False, ): try: - local_file = hubspot_client.download_file_from_url(photo_url) + local_file = hubspot_client.download_file_from_url(hubspot_photo_url) s3_url = self.s3.upload_file( local_file, @@ -356,11 +302,6 @@ class HubspotDataToDb: prefix="hubspot/awaabs_law_evidence/", ) - if verify: - downloaded = self.s3.download_from_url(s3_url) - if self._sha256(local_file) != self._sha256(downloaded): - raise ValueError("File integrity check failed after S3 upload.") - record.major_condition_issue_evidence_s3_url = s3_url except Exception as e: @@ -369,8 +310,8 @@ class HubspotDataToDb: if "local_file" in locals() and os.path.exists(local_file): os.remove(local_file) - def _needs_photo_upload(self, deal: HubspotDealData) -> bool: + def _needs_photo_upload(self, old_deal: HubspotDealData) -> bool: return bool( - deal.major_condition_issue_photos - and not deal.major_condition_issue_evidence_s3_url + old_deal.major_condition_issue_photos + and not old_deal.major_condition_issue_evidence_s3_url ) diff --git a/etl/hubspot/scripts/scraper/main.py b/etl/hubspot/scripts/scraper/main.py index ea79bc18..f41ef154 100644 --- a/etl/hubspot/scripts/scraper/main.py +++ b/etl/hubspot/scripts/scraper/main.py @@ -63,12 +63,11 @@ def handler(body: dict[str, Any], context: Any) -> None: logger.info( f"Deal {hubspot_deal_id} has been changed, updating database..." ) - db_client.update_deal_with_checks( - deal_in_db=db_deal, + db_client.upsert_deal( + deal_data=hubspot_deal, + company=company, + listing=listing, hubspot_client=hubspot_client, - hs_deal=hubspot_deal, - hs_company_id=company, - hs_listing=listing, ) deal_changed = True