Model/etl/hubspot/hubspotDataTodB.py

418 lines
17 KiB
Python

import os
from sqlmodel import select, Session
from datetime import datetime, timezone
from typing import Dict, Optional
from backend.app.db.models.hubspot_deal_data import HubspotDealData
from backend.app.db.models.hubspot_user import HubspotUser
from etl.hubspot.company_data import CompanyData
from etl.hubspot.hubspotClient import HubspotClient
from etl.hubspot.s3_uploader import S3Uploader
from backend.app.db.connection import db_read_session
from backend.app.db.models.organisation import Organisation
from etl.hubspot.utils import parse_hs_bool, parse_hs_date
from utils.logger import setup_logger
logger = setup_logger()
class HubspotDataToDb:
def __init__(self):
self.s3 = S3Uploader(
aws_access_key=os.getenv("AWS_ACCESS_KEY"),
aws_secret_key=os.getenv("AWS_SECRET_KEY"),
region=os.getenv("AWS_REGION"),
)
def read_org_table(self, limit: int = 10):
with db_read_session() as session:
records = session.exec(select(Organisation).limit(limit)).all()
return records
def get_org_names(self, limit: int = 10) -> list[str]:
"""Returns a list of organisation names."""
records = self.read_org_table(limit)
return [org.name for org in records if org.name]
def upsert_organisation(self, company_data: CompanyData) -> Organisation:
"""Upserts a company record. Updates if hubspot_company_id exists, otherwise creates new."""
with db_read_session() as session:
hubspot_id = company_data.get("hs_object_id")
company_name = company_data.get("name")
# Check if company already exists
existing = session.exec(
select(Organisation).where(
Organisation.hubspot_company_id == hubspot_id
)
).first()
if existing:
# Update existing record
existing.name = company_name
existing.updated_at = datetime.now(timezone.utc)
session.add(existing)
record = existing
else:
# Create new record
record = Organisation(
hubspot_company_id=hubspot_id,
name=company_name,
)
session.add(record)
session.commit()
return record
def find_all_deals_with_company_id(self, company_id: str):
"""Returns a list of deals for a given company_id."""
with db_read_session() as session:
return (
session.query(HubspotDealData)
.filter(HubspotDealData.company_id == company_id)
.all()
)
def find_deal_with_deal_id(self, deal_id: str) -> Optional[HubspotDealData]:
with db_read_session() as session:
return (
session.query(HubspotDealData)
.filter(HubspotDealData.deal_id == deal_id)
.one_or_none()
)
def upsert_deal(
self,
deal_data: Dict[str, str],
company: Optional[str],
listing: Optional[dict[str, str]],
hubspot_client: HubspotClient,
):
"""
Inserts or updates a deal record.
Also uploads photos if present and adds S3 URL.
"""
with db_read_session() as session:
deal_id = deal_data.get("hs_object_id")
self._sync_owner_to_db(
deal_data.get("coordinator_user"), hubspot_client, session
)
self._sync_owner_to_db(
deal_data.get("designer_user"), hubspot_client, session
)
statement = select(HubspotDealData).where(
HubspotDealData.deal_id == deal_id
)
existing = session.exec(statement).first()
if existing:
self._handle_existing_photo_upload(existing, hubspot_client)
print(f"🔄 Updating existing deal (deal_id={deal_id})")
self._update_existing_deal(existing, deal_data, listing, company)
session.add(existing)
session.commit()
session.refresh(existing)
return existing
else:
print(f"🆕 Inserting new deal (deal_id={deal_id})")
new_record: HubspotDealData = self._build_new_deal(
deal_id, deal_data, listing, company
)
# Handle upload at insert time
self._handle_new_photo_upload(new_record, hubspot_client)
session.add(new_record)
session.commit()
session.refresh(new_record)
return new_record
def _sync_owner_to_db(
self,
owner_id: Optional[str],
hubspot_client: HubspotClient,
session: Session,
) -> None:
if not owner_id:
return
owner_info = hubspot_client.get_owner_info(owner_id)
if owner_info is None:
return
now = datetime.now(timezone.utc)
existing: Optional[HubspotUser] = session.get(HubspotUser, owner_id)
if existing:
existing.first_name = owner_info["first_name"]
existing.last_name = owner_info["last_name"]
existing.email = owner_info["email"]
existing.updated_at = now
session.add(existing)
else:
session.add(
HubspotUser(
hubspot_owner_id=owner_id,
first_name=owner_info["first_name"],
last_name=owner_info["last_name"],
email=owner_info["email"],
updated_at=now,
)
)
def _update_existing_deal(
self,
existing: HubspotDealData,
deal_data: Dict[str, str],
listing: Optional[dict[str, str]],
company: Optional[str],
):
for attr, value in {
"dealname": deal_data.get("dealname"),
"dealstage": deal_data.get("dealstage"),
"listing_id": listing.get("listing_id", None) if listing else None,
"landlord_property_id": (
listing.get("owner_property_id", None) if listing else None
),
"uprn": listing.get("national_uprn", None) if listing else None,
"outcome": deal_data.get("outcome"),
"outcome_notes": deal_data.get("outcome_notes"),
"project_code": deal_data.get("project_code"),
"company_id": company,
"major_condition_issue_description": deal_data.get(
"major_condition_issue_description"
),
"major_condition_issue_photos": deal_data.get(
"major_condition_issue_photos"
),
"coordination_status": deal_data.get("coordination_status__stage_1_"),
"design_status": deal_data.get("retrofit_design_status"),
"coordination_comments": deal_data.get("coordination_comments"),
"pashub_link": deal_data.get("pashub_link"),
"sharepoint_link": deal_data.get("sharepoint_link"),
"dampmould_growth": deal_data.get("dampmould_growth"),
"damp_mould_and_repairs_comments": deal_data.get(
"damp_mould_and_repairs_comments"
),
"pre_sap": deal_data.get("pre_sap_score_dropdown"),
"batch": deal_data.get("batch"),
"block_reference": deal_data.get("block_reference"),
"epc_prn": deal_data.get("epc_prn"),
"potential_post_sap_score_dropdown": deal_data.get(
"potential_post_sap_score_dropdown"
),
"ei_score": deal_data.get("ei_score"),
"ei_score__potential_": deal_data.get("ei_score__potential_"),
"epc_sap_score": deal_data.get("epc_sap_score"),
"epc_sap_score__potential_": deal_data.get("epc_sap_score__potential_"),
"coordinator": deal_data.get("coordinator_user"),
"mtp_completion_date": parse_hs_date(deal_data.get("mtp_completion_date")),
"mtp_re_model_completion_date": parse_hs_date(
deal_data.get("mtp_re_model_completion_date")
),
"ioe_v3_completion_date": parse_hs_date(
deal_data.get("ioe_v3_completion_date")
),
"proposed_measures": deal_data.get("proposed_measures_dropdown"),
"approved_package": deal_data.get("approved_package"),
"designer": deal_data.get("designer_user"),
"design_completion_date": parse_hs_date(
deal_data.get("design_completion_date")
),
"actual_measures_installed": deal_data.get("actual_measures_installed"),
"installer": deal_data.get("installer"),
"installer_handover": deal_data.get("installer_handover"),
"lodgement_status": deal_data.get("lodgement_status"),
"measures_lodgement_date": parse_hs_date(
deal_data.get("measures_lodgement_date")
),
"lodgement_date": parse_hs_date(deal_data.get("lodgement_date")),
"expected_commencement_date": parse_hs_date(
deal_data.get("expected_commencement_date")
),
"surveyor": deal_data.get("surveyor"),
"confirmed_survey_date": parse_hs_date(
deal_data.get("confirmed_survey_date")
),
"confirmed_survey_time": deal_data.get("confirmed_survey_time"),
"surveyed_date": parse_hs_date(deal_data.get("surveyed_date")),
"design_type": deal_data.get("design_type"),
"survey_type": deal_data.get("survey_type"),
"measures_for_pibi_ordered": deal_data.get("measures_for_pibi_ordered"),
"pibi_order_date": parse_hs_date(deal_data.get("pibi_order_date")),
"pibi_completed_date": parse_hs_date(deal_data.get("pibi_completed_date")),
"property_halted_date": parse_hs_date(
deal_data.get("property_halted_date")
),
"property_halted_reason": deal_data.get("property_halted_reason"),
"technical_approved_measures_for_install": deal_data.get(
"technical_approved_measures_for_install"
),
"sent_to_installer_for_pricing": parse_hs_date(
deal_data.get("sent_to_iw_for_pricing")
),
"domna_survey_required": parse_hs_bool(
deal_data.get("osmosis_survey_required")
),
"domna_survey_date": parse_hs_date(deal_data.get("osmosis_survey_date")),
}.items():
setattr(existing, attr, value)
def _build_new_deal(
self,
deal_id: str,
deal_data: Dict[str, str],
listing: Optional[dict[str, str]],
company: Optional[str],
) -> HubspotDealData:
return HubspotDealData(
deal_id=deal_id,
dealname=deal_data.get("dealname"),
dealstage=deal_data.get("dealstage"),
listing_id=listing.get("listing_id") if listing else None,
landlord_property_id=(
listing.get("owner_property_id") if listing else None
),
uprn=listing.get("national_uprn") if listing else None,
outcome=deal_data.get("outcome"),
outcome_notes=deal_data.get("outcome_notes"),
project_code=deal_data.get("project_code"),
company_id=company,
major_condition_issue_description=deal_data.get(
"major_condition_issue_description"
),
major_condition_issue_photos=deal_data.get("major_condition_issue_photos"),
coordination_status=deal_data.get("coordination_status__stage_1_"),
design_status=deal_data.get("retrofit_design_status"),
coordination_comments=deal_data.get("coordination_comments"),
pashub_link=deal_data.get("pashub_link"),
sharepoint_link=deal_data.get("sharepoint_link"),
dampmould_growth=deal_data.get("dampmould_growth"),
damp_mould_and_repairs_comments=deal_data.get(
"damp_mould_and_repairs_comments"
),
pre_sap=deal_data.get("pre_sap_score_dropdown"),
batch=deal_data.get("batch"),
block_reference=deal_data.get("block_reference"),
epc_prn=deal_data.get("epc_prn"),
potential_post_sap_score_dropdown=deal_data.get(
"potential_post_sap_score_dropdown"
),
ei_score=deal_data.get("ei_score"),
ei_score__potential_=deal_data.get("ei_score__potential_"),
epc_sap_score=deal_data.get("epc_sap_score"),
epc_sap_score__potential_=deal_data.get("epc_sap_score__potential_"),
coordinator=deal_data.get("coordinator_user"),
mtp_completion_date=parse_hs_date(deal_data.get("mtp_completion_date")),
mtp_re_model_completion_date=parse_hs_date(
deal_data.get("mtp_re_model_completion_date")
),
ioe_v3_completion_date=parse_hs_date(
deal_data.get("ioe_v3_completion_date")
),
proposed_measures=deal_data.get("proposed_measures_dropdown"),
approved_package=deal_data.get("approved_package"),
designer=deal_data.get("designer_user"),
design_completion_date=parse_hs_date(
deal_data.get("design_completion_date")
),
actual_measures_installed=deal_data.get("actual_measures_installed"),
installer=deal_data.get("installer"),
installer_handover=deal_data.get("installer_handover"),
lodgement_status=deal_data.get("lodgement_status"),
measures_lodgement_date=parse_hs_date(
deal_data.get("measures_lodgement_date")
),
lodgement_date=parse_hs_date(deal_data.get("lodgement_date")),
expected_commencement_date=parse_hs_date(
deal_data.get("expected_commencement_date")
),
surveyor=deal_data.get("surveyor"),
confirmed_survey_date=parse_hs_date(deal_data.get("confirmed_survey_date")),
confirmed_survey_time=deal_data.get("confirmed_survey_time"),
surveyed_date=parse_hs_date(deal_data.get("surveyed_date")),
design_type=deal_data.get("design_type"),
survey_type=deal_data.get("survey_type"),
measures_for_pibi_ordered=deal_data.get("measures_for_pibi_ordered"),
pibi_order_date=parse_hs_date(deal_data.get("pibi_order_date")),
pibi_completed_date=parse_hs_date(deal_data.get("pibi_completed_date")),
property_halted_date=parse_hs_date(deal_data.get("property_halted_date")),
property_halted_reason=deal_data.get("property_halted_reason"),
technical_approved_measures_for_install=deal_data.get(
"technical_approved_measures_for_install"
),
sent_to_installer_for_pricing=parse_hs_date(
deal_data.get("sent_to_iw_for_pricing")
),
domna_survey_required=parse_hs_bool(
deal_data.get("osmosis_survey_required")
),
domna_survey_date=parse_hs_date(deal_data.get("osmosis_survey_date")),
)
def _handle_existing_photo_upload(
self,
existing_deal: HubspotDealData,
hubspot_client: HubspotClient,
):
# if self._needs_photo_upload(existing):
fresh_deal = hubspot_client.from_deal_id_get_info(existing_deal.deal_id)
fresh_photo_url = fresh_deal.get("major_condition_issue_photos")
if not fresh_photo_url:
print(f"⚠️ Photo URL missing for deal_id {existing_deal.deal_id}")
return
if fresh_photo_url != existing_deal.major_condition_issue_photos:
logger.info(
f"Hubspot image URL changed from {existing_deal.major_condition_issue_photos} to {fresh_photo_url}"
)
self._upload_photo_to_s3(existing_deal, fresh_photo_url, hubspot_client)
else:
logger.info(f"Hubspot image URL unchanged: {fresh_photo_url}")
def _handle_new_photo_upload(
self,
record: HubspotDealData,
hubspot_client: HubspotClient,
):
if record.major_condition_issue_photos:
self._upload_photo_to_s3(
record,
record.major_condition_issue_photos,
hubspot_client,
)
def _upload_photo_to_s3(
self,
record: HubspotDealData,
hubspot_photo_url: str,
hubspot_client: HubspotClient,
):
try:
local_file = hubspot_client.download_file_from_url(hubspot_photo_url)
s3_url = self.s3.upload_file(
local_file,
"retrofit-data-dev",
prefix="hubspot/awaabs_law_evidence/",
)
record.major_condition_issue_evidence_s3_url = s3_url
except Exception as e:
print(f"⚠️ Failed to upload photo for deal_id {record.deal_id}: {e}")
finally:
if "local_file" in locals() and os.path.exists(local_file):
os.remove(local_file)
def _needs_photo_upload(self, old_deal: HubspotDealData) -> bool:
return bool(
old_deal.major_condition_issue_photos
and not old_deal.major_condition_issue_evidence_s3_url
)