mirror of
https://github.com/Hestia-Homes/Model.git
synced 2026-06-08 11:17:27 +00:00
added bulk
This commit is contained in:
parent
1ac5cb253a
commit
ea84cf9fd4
6 changed files with 236 additions and 16 deletions
|
|
@ -40,6 +40,30 @@ class HubspotDealData(SQLModel, table=True):
|
|||
coordination_status: Optional[str] = Field(default=None)
|
||||
design_status: Optional[str] = Field(default=None)
|
||||
|
||||
listing_id: Optional[str] = Field(default=None)
|
||||
pashub_link: Optional[str] = Field(default=None)
|
||||
sharepoint_link: Optional[str] = Field(default=None)
|
||||
dampmould_growth: Optional[str] = Field(default=None)
|
||||
pre_sap: Optional[str] = Field(default=None)
|
||||
coordinator: Optional[str] = Field(default=None)
|
||||
mtp_completion_date: Optional[datetime] = Field(default=None)
|
||||
mtp_re_model_completion_date: Optional[datetime] = Field(default=None)
|
||||
ioe_v3_completion_date: Optional[datetime] = Field(default=None)
|
||||
proposed_measures: Optional[str] = Field(default=None)
|
||||
approved_package: Optional[str] = Field(default=None)
|
||||
designer: Optional[str] = Field(default=None)
|
||||
design_completion_date: Optional[datetime] = Field(default=None)
|
||||
actual_measures_installed: Optional[str] = Field(default=None)
|
||||
installer: Optional[str] = Field(default=None)
|
||||
installer_handover: Optional[str] = Field(default=None)
|
||||
lodgement_status: Optional[str] = Field(default=None)
|
||||
measures_lodgement_date: Optional[datetime] = Field(default=None)
|
||||
lodgement_date: Optional[datetime] = Field(default=None)
|
||||
expected_commencement_date: Optional[datetime] = Field(default=None)
|
||||
surveyor: Optional[str] = Field(default=None)
|
||||
confirmed_survey_date: Optional[datetime] = Field(default=None)
|
||||
confirmed_survey_time: Optional[str] = Field(default=None)
|
||||
|
||||
created_at: datetime = Field(
|
||||
sa_column=Column(
|
||||
DateTime(timezone=True),
|
||||
|
|
|
|||
|
|
@ -189,6 +189,7 @@ class HubspotClient:
|
|||
)
|
||||
|
||||
listing_info: dict[str, str] = cast(dict[str, str], listing.properties) # type: ignore[reportUnknownMemberType]
|
||||
listing_info["listing_id"] = listing_id
|
||||
self.logger.info(f"Listing info for deal {deal_id}: {listing_info}")
|
||||
return listing_info
|
||||
|
||||
|
|
@ -201,13 +202,35 @@ class HubspotClient:
|
|||
"dealname",
|
||||
"dealstage",
|
||||
"pipeline",
|
||||
"outcome", # outcome,
|
||||
"outcome_notes", # outcome notes
|
||||
"outcome",
|
||||
"outcome_notes",
|
||||
"project_code",
|
||||
"major_condition_issue_description",
|
||||
"major_condition_issue_photos",
|
||||
"coordination_status__stage_1_", # Coordiantion Status (Stage 1),
|
||||
"retrofit_design_status", # Retrofit Design Status
|
||||
"coordination_status__stage_1_",
|
||||
"retrofit_design_status",
|
||||
"pashub_link",
|
||||
"sharepoint_link",
|
||||
"dampmould_growth",
|
||||
"pre_sap",
|
||||
"coordinator",
|
||||
"mtp_completion_date",
|
||||
"mtp_re_model_completion_date",
|
||||
"ioe_v3_completion_date",
|
||||
"proposed_measures",
|
||||
"approved_package",
|
||||
"designer",
|
||||
"design_completion_date",
|
||||
"actual_measures_installed",
|
||||
"installer",
|
||||
"installer_handover",
|
||||
"lodgement_status",
|
||||
"measures_lodgement_date",
|
||||
"lodgement_date",
|
||||
"expected_commencement_date",
|
||||
"surveyor",
|
||||
"confirmed_survey_date",
|
||||
"confirmed_survey_time",
|
||||
],
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ from backend.app.db.connection import db_read_session
|
|||
from backend.app.db.models.organisation import Organisation, HubspotDealData
|
||||
from sqlmodel import select
|
||||
from datetime import datetime, timezone
|
||||
from typing import TypedDict
|
||||
from typing import TypedDict, Optional
|
||||
from etl.hubspot.s3_uploader import S3Uploader
|
||||
import hashlib
|
||||
import os
|
||||
|
|
@ -82,6 +82,14 @@ class HubspotDataToDb:
|
|||
.one_or_none()
|
||||
)
|
||||
|
||||
def _parse_hs_date(self, value: Optional[str]) -> Optional[datetime]:
|
||||
if not value:
|
||||
return None
|
||||
try:
|
||||
return datetime.fromisoformat(value.replace("Z", "+00:00"))
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
def _sha256(self, file_path: str) -> str:
|
||||
"""Compute SHA-256 checksum of a file."""
|
||||
sha256 = hashlib.sha256()
|
||||
|
|
@ -114,6 +122,10 @@ class HubspotDataToDb:
|
|||
deal_in_db.deal_id == hs_deal.get("hs_object_id"), "deal_id mismatch"
|
||||
),
|
||||
soft_assert(deal_in_db.company_id == hs_company_id, "company_id mismatch"),
|
||||
soft_assert(
|
||||
deal_in_db.listing_id == hs_listing.get("listing_id"),
|
||||
"listing_id mismatch",
|
||||
),
|
||||
soft_assert(
|
||||
deal_in_db.landlord_property_id == hs_listing.get("owner_property_id"),
|
||||
"landlord_property_id mismatch",
|
||||
|
|
@ -157,6 +169,94 @@ class HubspotDataToDb:
|
|||
deal_in_db.design_status == hs_deal.get("retrofit_design_status"),
|
||||
"retrofit design mismatch",
|
||||
),
|
||||
soft_assert(
|
||||
deal_in_db.pashub_link == hs_deal.get("pashub_link"),
|
||||
"pashub_link mismatch",
|
||||
),
|
||||
soft_assert(
|
||||
deal_in_db.sharepoint_link == hs_deal.get("sharepoint_link"),
|
||||
"sharepoint_link mismatch",
|
||||
),
|
||||
soft_assert(
|
||||
deal_in_db.dampmould_growth == hs_deal.get("dampmould_growth"),
|
||||
"dampmould_growth mismatch",
|
||||
),
|
||||
soft_assert(
|
||||
deal_in_db.pre_sap == hs_deal.get("pre_sap"),
|
||||
"pre_sap mismatch",
|
||||
),
|
||||
soft_assert(
|
||||
deal_in_db.coordinator == hs_deal.get("coordinator"),
|
||||
"coordinator mismatch",
|
||||
),
|
||||
soft_assert(
|
||||
deal_in_db.mtp_completion_date == self._parse_hs_date(hs_deal.get("mtp_completion_date")),
|
||||
"mtp_completion_date mismatch",
|
||||
),
|
||||
soft_assert(
|
||||
deal_in_db.mtp_re_model_completion_date == self._parse_hs_date(hs_deal.get("mtp_re_model_completion_date")),
|
||||
"mtp_re_model_completion_date mismatch",
|
||||
),
|
||||
soft_assert(
|
||||
deal_in_db.ioe_v3_completion_date == self._parse_hs_date(hs_deal.get("ioe_v3_completion_date")),
|
||||
"ioe_v3_completion_date mismatch",
|
||||
),
|
||||
soft_assert(
|
||||
deal_in_db.proposed_measures == hs_deal.get("proposed_measures"),
|
||||
"proposed_measures mismatch",
|
||||
),
|
||||
soft_assert(
|
||||
deal_in_db.approved_package == hs_deal.get("approved_package"),
|
||||
"approved_package mismatch",
|
||||
),
|
||||
soft_assert(
|
||||
deal_in_db.designer == hs_deal.get("designer"),
|
||||
"designer mismatch",
|
||||
),
|
||||
soft_assert(
|
||||
deal_in_db.design_completion_date == self._parse_hs_date(hs_deal.get("design_completion_date")),
|
||||
"design_completion_date mismatch",
|
||||
),
|
||||
soft_assert(
|
||||
deal_in_db.actual_measures_installed == hs_deal.get("actual_measures_installed"),
|
||||
"actual_measures_installed mismatch",
|
||||
),
|
||||
soft_assert(
|
||||
deal_in_db.installer == hs_deal.get("installer"),
|
||||
"installer mismatch",
|
||||
),
|
||||
soft_assert(
|
||||
deal_in_db.installer_handover == hs_deal.get("installer_handover"),
|
||||
"installer_handover mismatch",
|
||||
),
|
||||
soft_assert(
|
||||
deal_in_db.lodgement_status == hs_deal.get("lodgement_status"),
|
||||
"lodgement_status mismatch",
|
||||
),
|
||||
soft_assert(
|
||||
deal_in_db.measures_lodgement_date == self._parse_hs_date(hs_deal.get("measures_lodgement_date")),
|
||||
"measures_lodgement_date mismatch",
|
||||
),
|
||||
soft_assert(
|
||||
deal_in_db.lodgement_date == self._parse_hs_date(hs_deal.get("lodgement_date")),
|
||||
"lodgement_date mismatch",
|
||||
),
|
||||
soft_assert(
|
||||
deal_in_db.expected_commencement_date == self._parse_hs_date(hs_deal.get("expected_commencement_date")),
|
||||
"expected_commencement_date mismatch",
|
||||
),
|
||||
soft_assert(
|
||||
deal_in_db.surveyor == hs_deal.get("surveyor"),
|
||||
"surveyor mismatch",
|
||||
),
|
||||
soft_assert(
|
||||
deal_in_db.confirmed_survey_date == self._parse_hs_date(hs_deal.get("confirmed_survey_date")),
|
||||
"confirmed_survey_date mismatch",
|
||||
),
|
||||
soft_assert(
|
||||
deal_in_db.confirmed_survey_time == hs_deal.get("confirmed_survey_time"),
|
||||
"confirmed_survey_time mismatch",
|
||||
),
|
||||
]
|
||||
|
||||
# If discrepancies found, update from HubSpot
|
||||
|
|
@ -238,6 +338,7 @@ class HubspotDataToDb:
|
|||
for attr, value in {
|
||||
"dealname": deal_data.get("dealname"),
|
||||
"dealstage": deal_data.get("dealstage"),
|
||||
"listing_id": listing.get("listing_id"),
|
||||
"landlord_property_id": listing.get("owner_property_id"),
|
||||
"uprn": listing.get("national_uprn"),
|
||||
"outcome": deal_data.get("outcome"),
|
||||
|
|
@ -250,16 +351,32 @@ class HubspotDataToDb:
|
|||
"major_condition_issue_photos": deal_data.get(
|
||||
"major_condition_issue_photos"
|
||||
),
|
||||
"major_condition_issue_description": deal_data.get(
|
||||
"major_condition_issue_description"
|
||||
),
|
||||
"major_condition_issue_photos": deal_data.get(
|
||||
"major_condition_issue_photos"
|
||||
),
|
||||
"coordination_status": deal_data.get(
|
||||
"coordination_status__stage_1_"
|
||||
),
|
||||
"design_status": deal_data.get("retrofit_design_status"),
|
||||
"pashub_link": deal_data.get("pashub_link"),
|
||||
"sharepoint_link": deal_data.get("sharepoint_link"),
|
||||
"dampmould_growth": deal_data.get("dampmould_growth"),
|
||||
"pre_sap": deal_data.get("pre_sap"),
|
||||
"coordinator": deal_data.get("coordinator"),
|
||||
"mtp_completion_date": self._parse_hs_date(deal_data.get("mtp_completion_date")),
|
||||
"mtp_re_model_completion_date": self._parse_hs_date(deal_data.get("mtp_re_model_completion_date")),
|
||||
"ioe_v3_completion_date": self._parse_hs_date(deal_data.get("ioe_v3_completion_date")),
|
||||
"proposed_measures": deal_data.get("proposed_measures"),
|
||||
"approved_package": deal_data.get("approved_package"),
|
||||
"designer": deal_data.get("designer"),
|
||||
"design_completion_date": self._parse_hs_date(deal_data.get("design_completion_date")),
|
||||
"actual_measures_installed": deal_data.get("actual_measures_installed"),
|
||||
"installer": deal_data.get("installer"),
|
||||
"installer_handover": deal_data.get("installer_handover"),
|
||||
"lodgement_status": deal_data.get("lodgement_status"),
|
||||
"measures_lodgement_date": self._parse_hs_date(deal_data.get("measures_lodgement_date")),
|
||||
"lodgement_date": self._parse_hs_date(deal_data.get("lodgement_date")),
|
||||
"expected_commencement_date": self._parse_hs_date(deal_data.get("expected_commencement_date")),
|
||||
"surveyor": deal_data.get("surveyor"),
|
||||
"confirmed_survey_date": self._parse_hs_date(deal_data.get("confirmed_survey_date")),
|
||||
"confirmed_survey_time": deal_data.get("confirmed_survey_time"),
|
||||
}.items():
|
||||
setattr(existing, attr, value or getattr(existing, attr))
|
||||
|
||||
|
|
@ -302,6 +419,7 @@ class HubspotDataToDb:
|
|||
deal_id=deal_id,
|
||||
dealname=deal_data.get("dealname"),
|
||||
dealstage=deal_data.get("dealstage"),
|
||||
listing_id=listing.get("listing_id"),
|
||||
landlord_property_id=listing.get("owner_property_id"),
|
||||
uprn=listing.get("national_uprn"),
|
||||
outcome=deal_data.get("outcome"),
|
||||
|
|
@ -316,6 +434,28 @@ class HubspotDataToDb:
|
|||
),
|
||||
coordination_status=deal_data.get("coordination_status__stage_1_"),
|
||||
design_status=deal_data.get("retrofit_design_status"),
|
||||
pashub_link=deal_data.get("pashub_link"),
|
||||
sharepoint_link=deal_data.get("sharepoint_link"),
|
||||
dampmould_growth=deal_data.get("dampmould_growth"),
|
||||
pre_sap=deal_data.get("pre_sap"),
|
||||
coordinator=deal_data.get("coordinator"),
|
||||
mtp_completion_date=self._parse_hs_date(deal_data.get("mtp_completion_date")),
|
||||
mtp_re_model_completion_date=self._parse_hs_date(deal_data.get("mtp_re_model_completion_date")),
|
||||
ioe_v3_completion_date=self._parse_hs_date(deal_data.get("ioe_v3_completion_date")),
|
||||
proposed_measures=deal_data.get("proposed_measures"),
|
||||
approved_package=deal_data.get("approved_package"),
|
||||
designer=deal_data.get("designer"),
|
||||
design_completion_date=self._parse_hs_date(deal_data.get("design_completion_date")),
|
||||
actual_measures_installed=deal_data.get("actual_measures_installed"),
|
||||
installer=deal_data.get("installer"),
|
||||
installer_handover=deal_data.get("installer_handover"),
|
||||
lodgement_status=deal_data.get("lodgement_status"),
|
||||
measures_lodgement_date=self._parse_hs_date(deal_data.get("measures_lodgement_date")),
|
||||
lodgement_date=self._parse_hs_date(deal_data.get("lodgement_date")),
|
||||
expected_commencement_date=self._parse_hs_date(deal_data.get("expected_commencement_date")),
|
||||
surveyor=deal_data.get("surveyor"),
|
||||
confirmed_survey_date=self._parse_hs_date(deal_data.get("confirmed_survey_date")),
|
||||
confirmed_survey_time=deal_data.get("confirmed_survey_time"),
|
||||
)
|
||||
|
||||
# Handle upload at insert time
|
||||
|
|
|
|||
33
etl/hubspot/scripts/scraper/bulk_load.py
Normal file
33
etl/hubspot/scripts/scraper/bulk_load.py
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
from etl.hubspot.hubspotClient import HubspotClient, Companies, Pipeline
|
||||
from etl.hubspot.scripts.scraper.main import handler
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
PIPELINE_ID = Pipeline.OPERATIONS_SOCIAL_HOUSING.value
|
||||
|
||||
companies = list([Companies.THE_GUINESS_PARTNERSHIP, Companies.SOUTHERN_HOUSING_GROUP])
|
||||
|
||||
|
||||
def bulk_load(companies: list[Companies] | None = None) -> None:
|
||||
"""
|
||||
Load all deals from the given companies (defaults to all Companies enum values)
|
||||
into the database, filtered to the Operations/Social Housing pipeline.
|
||||
"""
|
||||
hubspot = HubspotClient()
|
||||
targets = companies or list(Companies)
|
||||
|
||||
for company in tqdm(targets, desc="Companies"):
|
||||
company_id = company.value
|
||||
deal_ids = hubspot.get_deal_ids_from_company(company_id)
|
||||
|
||||
for deal_id in tqdm(deal_ids, desc=f"{company.name}", leave=False):
|
||||
deal_data = hubspot.from_deal_id_get_info(deal_id)
|
||||
if deal_data.get("pipeline") != PIPELINE_ID:
|
||||
continue
|
||||
|
||||
handler({"hubspot_deal_id": deal_id}, context=None)
|
||||
print(f"Processed deal {deal_id} (company: {company.name})")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
bulk_load(companies)
|
||||
|
|
@ -4,7 +4,7 @@
|
|||
3) [completed] Load the db and check if upsert it into the table
|
||||
4) [completed]Getting working on a AWS lambda
|
||||
5) [completed] subtask and tasks history
|
||||
6) [TODO]The new sexy deal properties, move it over
|
||||
6) [completed]The new sexy deal properties, move it over
|
||||
"""
|
||||
|
||||
from etl.hubspot.hubspotClient import HubspotClient
|
||||
|
|
|
|||
|
|
@ -26,13 +26,13 @@ from backend.app.db.functions.materials_functions import get_materials
|
|||
from collections import defaultdict
|
||||
from sqlalchemy import func
|
||||
|
||||
PORTFOLIO_ID = 639
|
||||
SCENARIOS = [1157]
|
||||
PORTFOLIO_ID = 640
|
||||
SCENARIOS = [1154]
|
||||
scenario_names = {
|
||||
1157: "EPC C - no EWI solid floor",
|
||||
1154: "EPC - 10k Budget",
|
||||
}
|
||||
|
||||
project_name = "Instagroup Sample"
|
||||
project_name = "First Charterhouse Investments"
|
||||
|
||||
|
||||
def get_data(portfolio_id, scenario_ids):
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue