diff --git a/etl/hubspot/hubspotDataTodB.py b/etl/hubspot/hubspotDataTodB.py index 210c9593..65fad572 100644 --- a/etl/hubspot/hubspotDataTodB.py +++ b/etl/hubspot/hubspotDataTodB.py @@ -78,22 +78,6 @@ class HubspotDataToDb: .one_or_none() ) - def _parse_hs_date(self, value: Optional[str]) -> Optional[datetime]: - if not value: - return None - try: - return datetime.fromisoformat(value.replace("Z", "+00:00")) - except ValueError: - return None - - def _sha256(self, file_path: str) -> str: - """Compute SHA-256 checksum of a file.""" - sha256 = hashlib.sha256() - with open(file_path, "rb") as f: - for chunk in iter(lambda: f.read(8192), b""): - sha256.update(chunk) - return sha256.hexdigest() - def update_deal_with_checks( self, deal_in_db: HubspotDealData, @@ -185,6 +169,14 @@ class HubspotDataToDb: session.refresh(new_record) return new_record + def _sha256(self, file_path: str) -> str: + """Compute SHA-256 checksum of a file.""" + sha256 = hashlib.sha256() + with open(file_path, "rb") as f: + for chunk in iter(lambda: f.read(8192), b""): + sha256.update(chunk) + return sha256.hexdigest() + def _update_existing_deal( self, existing: HubspotDealData, diff --git a/etl/hubspot/scripts/scraper/main.py b/etl/hubspot/scripts/scraper/main.py index cec03da8..8fa71bf7 100644 --- a/etl/hubspot/scripts/scraper/main.py +++ b/etl/hubspot/scripts/scraper/main.py @@ -38,56 +38,69 @@ def handler(body: dict[str, Any], context: Any) -> None: hubspot_deal_id ) + deal_changed = False if not db_deal: # New hubspot deal, no diffing to do + logger.info(f"New HubSpot deal of ID {hubspot_deal_id}. Loading to database...") if company: company_data: CompanyData = hubspot_client.get_company_information(company) db_client: HubspotDataToDb = HubspotDataToDb() db_client.upsert_company(company_data) db_client.upsert_deal(hubspot_deal, company, listing, hubspot_client) - return - - deal_unchanged = True - - # Deal already in db, check whether anything has changed - if HubspotDealDiffer.check_for_db_update_trigger( - new_deal=hubspot_deal, - new_company=company, - new_listing=listing, - old_deal=db_deal, - ): - db_client.update_deal_with_checks( - deal_in_db=db_deal, - hubspot_client=hubspot_client, - hs_deal=hubspot_deal, - hs_company_id=company, - hs_listing=listing, - ) - deal_unchanged = False - - if deal_unchanged: - return - - # ============================== - # Orchestration of other lambdas - # ============================== - if HubspotDealDiffer.check_for_pashub_trigger( - new_deal=hubspot_deal, old_deal=db_deal - ): - message_body: Dict[str, Optional[str]] = { - "pashub_link": hubspot_deal["pashub_link"], - "address": None, # can we get this? - "sharepoint_link": hubspot_deal["sharepoint_link"], - "uprn": hubspot_deal["national_uprn"], - "landlord_property_id": hubspot_deal["owner_property_id"], - "deal_stage": hubspot_deal["deal_stage"], - } - - response = sqs_client.send_message( - QueueUrl=PASHUB_TRIGGER_QUEUE_URL, MessageBody=json.dumps(message_body) - ) - + else: + # Deal already in db, check whether anything has changed logger.info( - f"Sent message to Pashub To Ara queue. MessageId: {response['MessageId']}" + f"HubSpot deal {hubspot_deal_id} already in database. Checking for changes..." ) + if HubspotDealDiffer.check_for_db_update_trigger( + new_deal=hubspot_deal, + new_company=company, + new_listing=listing, + old_deal=db_deal, + ): + logger.info( + f"Deal {hubspot_deal_id} has been changed, updating database..." + ) + db_client.update_deal_with_checks( + deal_in_db=db_deal, + hubspot_client=hubspot_client, + hs_deal=hubspot_deal, + hs_company_id=company, + hs_listing=listing, + ) + deal_changed = True + + if not deal_changed: + logger.info(f"No changes to deal {hubspot_deal_id}") + return + + # ============================== + # Orchestration of other lambdas + # ============================== + if HubspotDealDiffer.check_for_pashub_trigger( + new_deal=hubspot_deal, old_deal=db_deal + ): + logger.info( + f"Triggering Pas Hub file fetcher for HubSpot deal ID {hubspot_deal_id}" + ) + message_body: Dict[str, Optional[str]] = { + "pashub_link": hubspot_deal["pashub_link"], + "address": None, # potentially available from Listing, leave as None for now + "sharepoint_link": hubspot_deal["sharepoint_link"], + "uprn": hubspot_deal["national_uprn"], + "landlord_property_id": hubspot_deal["owner_property_id"], + "deal_stage": hubspot_deal["deal_stage"], + } + + response = sqs_client.send_message( + QueueUrl=PASHUB_TRIGGER_QUEUE_URL, MessageBody=json.dumps(message_body) + ) + + logger.info( + f"Sent message to Pashub To Ara queue. MessageId: {response['MessageId']}" + ) + else: + logger.info( + f"Not Triggering PasHub file fetcher for HubSpot deal ID {hubspot_deal_id}" + )