mirror of
https://github.com/Hestia-Homes/survey-extraction.git
synced 2026-06-08 11:17:29 +00:00
added code for s3 url
This commit is contained in:
parent
8daddf6cb7
commit
2fe2e5053f
5 changed files with 89 additions and 3 deletions
33
alembic/versions/c8af22cece92_s3_add.py
Normal file
33
alembic/versions/c8af22cece92_s3_add.py
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
"""s3 add
|
||||
|
||||
Revision ID: c8af22cece92
|
||||
Revises: ed6aaa298de4
|
||||
Create Date: 2025-11-07 15:00:32.917157
|
||||
|
||||
"""
|
||||
from typing import Sequence, Union
|
||||
|
||||
from alembic import op
|
||||
import sqlalchemy as sa
|
||||
import sqlmodel
|
||||
|
||||
|
||||
# revision identifiers, used by Alembic.
|
||||
revision: str = 'c8af22cece92'
|
||||
down_revision: Union[str, None] = 'ed6aaa298de4'
|
||||
branch_labels: Union[str, Sequence[str], None] = None
|
||||
depends_on: Union[str, Sequence[str], None] = None
|
||||
|
||||
|
||||
def upgrade() -> None:
|
||||
"""Upgrade schema."""
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
op.add_column('hubspot_deal_data', sa.Column('major_condition_issue_evidence_s3_url', sqlmodel.sql.sqltypes.AutoString(), nullable=True))
|
||||
# ### end Alembic commands ###
|
||||
|
||||
|
||||
def downgrade() -> None:
|
||||
"""Downgrade schema."""
|
||||
# ### commands auto generated by Alembic - please adjust! ###
|
||||
op.drop_column('hubspot_deal_data', 'major_condition_issue_evidence_s3_url')
|
||||
# ### end Alembic commands ###
|
||||
|
|
@ -6,7 +6,7 @@ terraform {
|
|||
}
|
||||
}
|
||||
backend "s3" {
|
||||
bucket = "survey-extractor-tf-state"
|
||||
= "survey-extractor-tf-state"
|
||||
region = "eu-west-2"
|
||||
key = "env:/dev/terraform.tfstate"
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,6 +3,8 @@ from enum import Enum
|
|||
from etl.utils.logger import Logger
|
||||
import logging
|
||||
from hubspot.crm.associations import ApiException
|
||||
import os
|
||||
import requests
|
||||
|
||||
class Companies(Enum):
|
||||
ABRI = "237615001799"
|
||||
|
|
@ -208,4 +210,54 @@ class HubSpotClient():
|
|||
|
||||
except Exception as e:
|
||||
self.logger.error(f"Error retrieving deal stages: {e}")
|
||||
return []
|
||||
return []
|
||||
|
||||
def download_file_from_url(self, download_url: str, save_path: str = None) -> str:
|
||||
"""
|
||||
Download a file from a HubSpot file URL (public or private), keeping its original file type.
|
||||
"""
|
||||
import mimetypes
|
||||
import requests
|
||||
import os
|
||||
|
||||
try:
|
||||
headers = {}
|
||||
if "hubspotusercontent" not in download_url:
|
||||
headers["Authorization"] = f"Bearer {self.access_token}"
|
||||
|
||||
self.logger.info(f"Downloading HubSpot file: {download_url}")
|
||||
response = requests.get(download_url, headers=headers, stream=True, allow_redirects=True)
|
||||
response.raise_for_status()
|
||||
|
||||
# Try to infer filename from Content-Disposition header
|
||||
content_disposition = response.headers.get("content-disposition")
|
||||
if content_disposition and "filename=" in content_disposition:
|
||||
filename = content_disposition.split("filename=")[1].strip('"')
|
||||
else:
|
||||
# fallback: extract from URL or content-type
|
||||
filename = os.path.basename(download_url.split("?")[0]) or "hubspot_download"
|
||||
if "." not in filename:
|
||||
content_type = response.headers.get("content-type")
|
||||
ext = mimetypes.guess_extension(content_type.split(";")[0]) if content_type else None
|
||||
if ext:
|
||||
filename += ext
|
||||
|
||||
# Make sure save_path is valid
|
||||
if save_path is None:
|
||||
save_path = os.path.abspath(filename)
|
||||
elif os.path.isdir(save_path):
|
||||
save_path = os.path.join(save_path, filename)
|
||||
else:
|
||||
# if user passes a file path directly, leave it
|
||||
save_path = os.path.abspath(save_path)
|
||||
|
||||
with open(save_path, "wb") as f:
|
||||
for chunk in response.iter_content(chunk_size=8192):
|
||||
f.write(chunk)
|
||||
|
||||
self.logger.info(f"File downloaded successfully → {save_path}")
|
||||
return save_path
|
||||
|
||||
except requests.exceptions.RequestException as e:
|
||||
self.logger.error(f"Failed to download file from HubSpot: {e}")
|
||||
raise
|
||||
|
|
|
|||
|
|
@ -83,6 +83,7 @@ class HubspotDealData(SQLModel, table=True):
|
|||
|
||||
major_condition_issue_description: Optional[str] = Field(default=None)
|
||||
major_condition_issue_photos: Optional[str] = Field(default=None)
|
||||
major_condition_issue_evidence_s3_url: Optional[str] = Field(default=None)
|
||||
|
||||
created_at: datetime = Field(
|
||||
sa_column=Column(
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
#poetry run alembic revision --autogenerate -m "added major condition issue things"
|
||||
#poetry run alembic revision --autogenerate -m "s3 add "
|
||||
|
||||
poetry run alembic upgrade head
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Reference in a new issue